/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu

Bug Summary

File:	dev/pci/drm/amd/amdgpu/amdgpu_ras.c
Warning:	line 1911, column 3 Value stored to 'ret' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name amdgpu_ras.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_ras.c

1	/*
2	* Copyright 2018 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*
23	*/
24	#include <linux/debugfs.h>
25	#include <linux/list.h>
26	#include <linux/module.h>
27	#include <linux/uaccess.h>
28	#include <linux/reboot.h>
29	#include <linux/syscalls.h>
30
31	#include "amdgpu.h"
32	#include "amdgpu_ras.h"
33	#include "amdgpu_atomfirmware.h"
34	#include "amdgpu_xgmi.h"
35	#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
36
37	static const char *RAS_FS_NAME = "ras";
38
39	const char *ras_error_string[] = {
40	"none",
41	"parity",
42	"single_correctable",
43	"multi_uncorrectable",
44	"poison",
45	};
46
47	const char *ras_block_string[] = {
48	"umc",
49	"sdma",
50	"gfx",
51	"mmhub",
52	"athub",
53	"pcie_bif",
54	"hdp",
55	"xgmi_wafl",
56	"df",
57	"smn",
58	"sem",
59	"mp0",
60	"mp1",
61	"fuse",
62	};
63
64	#define ras_err_str(i)(ras_error_string[ffs(i)]) (ras_error_string[ffs(i)])
65	#define ras_block_str(i)(ras_block_string[i]) (ras_block_string[i])
66
67	#define RAS_DEFAULT_FLAGS((0x1 << 0)) (AMDGPU_RAS_FLAG_INIT_BY_VBIOS(0x1 << 0))
68
69	/* inject address is 52 bits */
70	#define RAS_UMC_INJECT_ADDR_LIMIT(0x1ULL << 52) (0x1ULL << 52)
71
72	/* typical ECC bad page rate(1 bad page per 100MB VRAM) */
73	#define RAS_BAD_PAGE_RATE(100 * 1024 * 1024ULL) (100 * 1024 * 1024ULL)
74
75	enum amdgpu_ras_retire_page_reservation {
76	AMDGPU_RAS_RETIRE_PAGE_RESERVED,
77	AMDGPU_RAS_RETIRE_PAGE_PENDING,
78	AMDGPU_RAS_RETIRE_PAGE_FAULT,
79	};
80
81	atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0)(0);
82
83	static bool_Bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
84	uint64_t addr);
85
86	void amdgpu_ras_set_error_query_ready(struct amdgpu_device *adev, bool_Bool ready)
87	{
88	if (adev && amdgpu_ras_get_context(adev)((adev)->psp.ras.ras))
89	amdgpu_ras_get_context(adev)((adev)->psp.ras.ras)->error_query_ready = ready;
90	}
91
92	static bool_Bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev)
93	{
94	if (adev && amdgpu_ras_get_context(adev)((adev)->psp.ras.ras))
95	return amdgpu_ras_get_context(adev)((adev)->psp.ras.ras)->error_query_ready;
96
97	return false0;
98	}
99
100	#ifdef __linux__
101
102	static ssize_t amdgpu_ras_debugfs_read(struct file f, char __user buf,
103	size_t size, loff_t *pos)
104	{
105	struct ras_manager obj = (struct ras_manager )file_inode(f)->i_private;
106	struct ras_query_if info = {
107	.head = obj->head,
108	};
109	ssize_t s;
110	char val[128];
111
112	if (amdgpu_ras_error_query(obj->adev, &info))
113	return -EINVAL22;
114
115	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
116	"ue", info.ue_count,
117	"ce", info.ce_count);
118	if (*pos >= s)
119	return 0;
120
121	s -= *pos;
122	s = min_t(u64, s, size)({ u64 __min_a = (s); u64 __min_b = (size); __min_a < __min_b ? __min_a : __min_b; });
123
124
125	if (copy_to_user(buf, &val[*pos], s))
126	return -EINVAL22;
127
128	*pos += s;
129
130	return s;
131	}
132
133	static const struct file_operations amdgpu_ras_debugfs_ops = {
134	.owner = THIS_MODULE((void *)0),
135	.read = amdgpu_ras_debugfs_read,
136	.write = NULL((void *)0),
137	.llseek = default_llseek
138	};
139
140	static int amdgpu_ras_find_block_id_by_name(const char name, int block_id)
141	{
142	int i;
143
144	for (i = 0; i < ARRAY_SIZE(ras_block_string)(sizeof((ras_block_string)) / sizeof((ras_block_string)[0])); i++) {
145	*block_id = i;
146	if (strcmp(name, ras_block_str(i)(ras_block_string[i])) == 0)
147	return 0;
148	}
149	return -EINVAL22;
150	}
151
152	static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
153	const char __user *buf, size_t size,
154	loff_t pos, struct ras_debug_if data)
155	{
156	ssize_t s = min_t(u64, 64, size)({ u64 __min_a = (64); u64 __min_b = (size); __min_a < __min_b ? __min_a : __min_b; });
157	char str[65];
158	char block_name[33];
159	char err[9] = "ue";
160	int op = -1;
161	int block_id;
162	uint32_t sub_block;
163	u64 address, value;
164
165	if (*pos)
166	return -EINVAL22;
167	*pos = size;
168
169	memset(str, 0, sizeof(str))__builtin_memset((str), (0), (sizeof(str)));
170	memset(data, 0, sizeof(data))__builtin_memset((data), (0), (sizeof(data)));
171
172	if (copy_from_user(str, buf, s))
173	return -EINVAL22;
174
175	if (sscanf(str, "disable %32s", block_name) == 1)
176	op = 0;
177	else if (sscanf(str, "enable %32s %8s", block_name, err) == 2)
178	op = 1;
179	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
180	op = 2;
181	else if (str[0] && str[1] && str[2] && str[3])
182	/* ascii string, but commands are not matched. */
183	return -EINVAL22;
184
185	if (op != -1) {
186	if (amdgpu_ras_find_block_id_by_name(block_name, &block_id))
187	return -EINVAL22;
188
189	data->head.block = block_id;
190	/* only ue and ce errors are supported */
191	if (!memcmp("ue", err, 2)__builtin_memcmp(("ue"), (err), (2)))
192	data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
193	else if (!memcmp("ce", err, 2)__builtin_memcmp(("ce"), (err), (2)))
194	data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE;
195	else
196	return -EINVAL22;
197
198	data->op = op;
199
200	if (op == 2) {
201	if (sscanf(str, "%s %s %*s %u %llu %llu",
202	&sub_block, &address, &value) != 3)
203	if (sscanf(str, "%s %s %*s 0x%x 0x%llx 0x%llx",
204	&sub_block, &address, &value) != 3)
205	return -EINVAL22;
206	data->head.sub_block_index = sub_block;
207	data->inject.address = address;
208	data->inject.value = value;
209	}
210	} else {
211	if (size < sizeof(*data))
212	return -EINVAL22;
213
214	if (copy_from_user(data, buf, sizeof(*data)))
215	return -EINVAL22;
216	}
217
218	return 0;
219	}
220
221	/**
222	* DOC: AMDGPU RAS debugfs control interface
223	*
224	* It accepts struct ras_debug_if who has two members.
225	*
226	* First member: ras_debug_if::head or ras_debug_if::inject.
227	*
228	* head is used to indicate which IP block will be under control.
229	*
230	* head has four members, they are block, type, sub_block_index, name.
231	* block: which IP will be under control.
232	* type: what kind of error will be enabled/disabled/injected.
233	* sub_block_index: some IPs have subcomponets. say, GFX, sDMA.
234	* name: the name of IP.
235	*
236	* inject has two more members than head, they are address, value.
237	* As their names indicate, inject operation will write the
238	* value to the address.
239	*
240	* The second member: struct ras_debug_if::op.
241	* It has three kinds of operations.
242	*
243	* - 0: disable RAS on the block. Take ::head as its data.
244	* - 1: enable RAS on the block. Take ::head as its data.
245	* - 2: inject errors on the block. Take ::inject as its data.
246	*
247	* How to use the interface?
248	*
249	* Programs
250	*
251	* Copy the struct ras_debug_if in your codes and initialize it.
252	* Write the struct to the control node.
253	*
254	* Shells
255	*
256	* .. code-block:: bash
257	*
258	* echo op block [error [sub_block address value]] > .../ras/ras_ctrl
259	*
260	* Parameters:
261	*
262	* op: disable, enable, inject
263	* disable: only block is needed
264	* enable: block and error are needed
265	* inject: error, address, value are needed
266	* block: umc, sdma, gfx, .........
267	* see ras_block_string[] for details
268	* error: ue, ce
269	* ue: multi_uncorrectable
270	* ce: single_correctable
271	* sub_block:
272	* sub block index, pass 0 if there is no sub block
273	*
274	* here are some examples for bash commands:
275	*
276	* .. code-block:: bash
277	*
278	* echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
279	* echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
280	* echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
281	*
282	* How to check the result?
283	*
284	* For disable/enable, please check ras features at
285	* /sys/class/drm/card[0/1/2...]/device/ras/features
286	*
287	* For inject, please check corresponding err count at
288	* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
289	*
290	* .. note::
291	* Operations are only allowed on blocks which are supported.
292	* Please check ras mask at /sys/module/amdgpu/parameters/ras_mask
293	* to see which blocks support RAS on a particular asic.
294	*
295	*/
296	static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file f, const char __user buf,
297	size_t size, loff_t *pos)
298	{
299	struct amdgpu_device adev = (struct amdgpu_device )file_inode(f)->i_private;
300	struct ras_debug_if data;
301	int ret = 0;
302
303	if (!amdgpu_ras_get_error_query_ready(adev)) {
304	dev_warn(adev->dev, "RAS WARN: error injection "printf("drm:pid%d:%s WARNING " "RAS WARN: error injection " "currently inaccessible\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__)
305	"currently inaccessible\n")printf("drm:pid%d:%s WARNING " "RAS WARN: error injection " "currently inaccessible\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__);
306	return size;
307	}
308
309	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
310	if (ret)
311	return -EINVAL22;
312
313	if (!amdgpu_ras_is_supported(adev, data.head.block))
314	return -EINVAL22;
315
316	switch (data.op) {
317	case 0:
318	ret = amdgpu_ras_feature_enable(adev, &data.head, 0);
319	break;
320	case 1:
321	ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
322	break;
323	case 2:
324	if ((data.inject.address >= adev->gmc.mc_vram_size) \|\|
325	(data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT(0x1ULL << 52))) {
326	dev_warn(adev->dev, "RAS WARN: input address "printf("drm:pid%d:%s WARNING " "RAS WARN: input address " "0x%llx is invalid." , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , data.inject .address)
327	"0x%llx is invalid.",printf("drm:pid%d:%s WARNING " "RAS WARN: input address " "0x%llx is invalid." , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , data.inject .address)
328	data.inject.address)printf("drm:pid%d:%s WARNING " "RAS WARN: input address " "0x%llx is invalid." , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , data.inject .address);
329	ret = -EINVAL22;
330	break;
331	}
332
333	/* umc ce/ue error injection for a bad page is not allowed */
334	if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
335	amdgpu_ras_check_bad_page(adev, data.inject.address)) {
336	dev_warn(adev->dev, "RAS WARN: 0x%llx has been marked "printf("drm:pid%d:%s WARNING " "RAS WARN: 0x%llx has been marked " "as bad before error injection!\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p-> ps_pid, __func__ , data.inject.address)
337	"as bad before error injection!\n",printf("drm:pid%d:%s WARNING " "RAS WARN: 0x%llx has been marked " "as bad before error injection!\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p-> ps_pid, __func__ , data.inject.address)
338	data.inject.address)printf("drm:pid%d:%s WARNING " "RAS WARN: 0x%llx has been marked " "as bad before error injection!\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p-> ps_pid, __func__ , data.inject.address);
339	break;
340	}
341
342	/* data.inject.address is offset instead of absolute gpu address */
343	ret = amdgpu_ras_error_inject(adev, &data.inject);
344	break;
345	default:
346	ret = -EINVAL22;
347	break;
348	}
349
350	if (ret)
351	return -EINVAL22;
352
353	return size;
354	}
355
356	/**
357	* DOC: AMDGPU RAS debugfs EEPROM table reset interface
358	*
359	* Some boards contain an EEPROM which is used to persistently store a list of
360	* bad pages which experiences ECC errors in vram. This interface provides
361	* a way to reset the EEPROM, e.g., after testing error injection.
362	*
363	* Usage:
364	*
365	* .. code-block:: bash
366	*
367	* echo 1 > ../ras/ras_eeprom_reset
368	*
369	* will reset EEPROM table to 0 entries.
370	*
371	*/
372	static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file f, const char __user buf,
373	size_t size, loff_t *pos)
374	{
375	struct amdgpu_device *adev =
376	(struct amdgpu_device *)file_inode(f)->i_private;
377	int ret;
378
379	ret = amdgpu_ras_eeprom_reset_table(
380	&(amdgpu_ras_get_context(adev)((adev)->psp.ras.ras)->eeprom_control));
381
382	if (ret == 1) {
383	amdgpu_ras_get_context(adev)((adev)->psp.ras.ras)->flags = RAS_DEFAULT_FLAGS((0x1 << 0));
384	return size;
385	} else {
386	return -EIO5;
387	}
388	}
389
390	static const struct file_operations amdgpu_ras_debugfs_ctrl_ops = {
391	.owner = THIS_MODULE((void *)0),
392	.read = NULL((void *)0),
393	.write = amdgpu_ras_debugfs_ctrl_write,
394	.llseek = default_llseek
395	};
396
397	static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
398	.owner = THIS_MODULE((void *)0),
399	.read = NULL((void *)0),
400	.write = amdgpu_ras_debugfs_eeprom_write,
401	.llseek = default_llseek
402	};
403
404	/**
405	* DOC: AMDGPU RAS sysfs Error Count Interface
406	*
407	* It allows the user to read the error count for each IP block on the gpu through
408	* /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
409	*
410	* It outputs the multiple lines which report the uncorrected (ue) and corrected
411	* (ce) error counts.
412	*
413	* The format of one line is below,
414	*
415	* [ce\|ue]: count
416	*
417	* Example:
418	*
419	* .. code-block:: bash
420	*
421	* ue: 0
422	* ce: 1
423	*
424	*/
425	static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
426	struct device_attribute attr, char buf)
427	{
428	struct ras_manager obj = container_of(attr, struct ras_manager, sysfs_attr)({ const __typeof( ((struct ras_manager )0)->sysfs_attr ) __mptr = (attr); (struct ras_manager )( (char *)__mptr - __builtin_offsetof (struct ras_manager, sysfs_attr) );});
429	struct ras_query_if info = {
430	.head = obj->head,
431	};
432
433	if (!amdgpu_ras_get_error_query_ready(obj->adev))
434	return snprintf(buf, PAGE_SIZE(1 << 12),
435	"Query currently inaccessible\n");
436
437	if (amdgpu_ras_error_query(obj->adev, &info))
438	return -EINVAL22;
439
440	return snprintf(buf, PAGE_SIZE(1 << 12), "%s: %lu\n%s: %lu\n",
441	"ue", info.ue_count,
442	"ce", info.ce_count);
443	}
444
445	#endif /* __linux__ */
446
447	/* obj begin */
448
449	#define get_obj(obj)do { (obj)->use++; } while (0) do { (obj)->use++; } while (0)
450	#define alive_obj(obj)((obj)->use) ((obj)->use)
451
452	static inline void put_obj(struct ras_manager *obj)
453	{
454	if (obj && --obj->use == 0)
455	list_del(&obj->node);
456	if (obj && obj->use < 0) {
457	DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", obj->head.name)__drm_err("RAS ERROR: Unbalance obj(%s) use\n", obj->head. name);
458	}
459	}
460
461	/* make one obj and return it. */
462	static struct ras_manager amdgpu_ras_create_obj(struct amdgpu_device adev,
463	struct ras_common_if *head)
464	{
465	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
466	struct ras_manager *obj;
467
468	if (!con)
469	return NULL((void *)0);
470
471	if (head->block >= AMDGPU_RAS_BLOCK_COUNTAMDGPU_RAS_BLOCK__LAST)
472	return NULL((void *)0);
473
474	obj = &con->objs[head->block];
475	/* already exist. return obj? */
476	if (alive_obj(obj)((obj)->use))
477	return NULL((void *)0);
478
479	obj->head = *head;
480	obj->adev = adev;
481	list_add(&obj->node, &con->head);
482	get_obj(obj)do { (obj)->use++; } while (0);
483
484	return obj;
485	}
486
487	/* return an obj equal to head, or the first when head is NULL */
488	struct ras_manager amdgpu_ras_find_obj(struct amdgpu_device adev,
489	struct ras_common_if *head)
490	{
491	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
492	struct ras_manager *obj;
493	int i;
494
495	if (!con)
496	return NULL((void *)0);
497
498	if (head) {
499	if (head->block >= AMDGPU_RAS_BLOCK_COUNTAMDGPU_RAS_BLOCK__LAST)
500	return NULL((void *)0);
501
502	obj = &con->objs[head->block];
503
504	if (alive_obj(obj)((obj)->use)) {
505	WARN_ON(head->block != obj->head.block)({ int __ret = !!(head->block != obj->head.block); if ( __ret) printf("WARNING %s failed at %s:%d\n", "head->block != obj->head.block" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_ras.c", 505); __builtin_expect (!!(__ret), 0); });
506	return obj;
507	}
508	} else {
509	for (i = 0; i < AMDGPU_RAS_BLOCK_COUNTAMDGPU_RAS_BLOCK__LAST; i++) {
510	obj = &con->objs[i];
511	if (alive_obj(obj)((obj)->use)) {
512	WARN_ON(i != obj->head.block)({ int __ret = !!(i != obj->head.block); if (__ret) printf ("WARNING %s failed at %s:%d\n", "i != obj->head.block", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_ras.c" , 512); __builtin_expect(!!(__ret), 0); });
513	return obj;
514	}
515	}
516	}
517
518	return NULL((void *)0);
519	}
520	/* obj end */
521
522	static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
523	const char* invoke_type,
524	const char* block_name,
525	enum ta_ras_status ret)
526	{
527	switch (ret) {
528	case TA_RAS_STATUS__SUCCESS:
529	return;
530	case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
531	dev_warn(adev->dev,printf("drm:pid%d:%s WARNING " "RAS WARN: %s %s currently unavailable\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name)
532	"RAS WARN: %s %s currently unavailable\n",printf("drm:pid%d:%s WARNING " "RAS WARN: %s %s currently unavailable\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name)
533	invoke_type,printf("drm:pid%d:%s WARNING " "RAS WARN: %s %s currently unavailable\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name)
534	block_name)printf("drm:pid%d:%s WARNING " "RAS WARN: %s %s currently unavailable\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name);
535	break;
536	default:
537	dev_err(adev->dev,printf("drm:pid%d:%s ERROR " "RAS ERROR: %s %s error failed ret 0x%X\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name, ret)
538	"RAS ERROR: %s %s error failed ret 0x%X\n",printf("drm:pid%d:%s ERROR " "RAS ERROR: %s %s error failed ret 0x%X\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name, ret)
539	invoke_type,printf("drm:pid%d:%s ERROR " "RAS ERROR: %s %s error failed ret 0x%X\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name, ret)
540	block_name,printf("drm:pid%d:%s ERROR " "RAS ERROR: %s %s error failed ret 0x%X\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name, ret)
541	ret)printf("drm:pid%d:%s ERROR " "RAS ERROR: %s %s error failed ret 0x%X\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , invoke_type , block_name, ret);
542	}
543	}
544
545	/* feature ctl begin */
546	static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
547	struct ras_common_if *head)
548	{
549	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
550
551	return con->hw_supported & BIT(head->block)(1UL << (head->block));
552	}
553
554	static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
555	struct ras_common_if *head)
556	{
557	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
558
559	return con->features & BIT(head->block)(1UL << (head->block));
560	}
561
562	/*
563	* if obj is not created, then create one.
564	* set feature enable flag.
565	*/
566	static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
567	struct ras_common_if *head, int enable)
568	{
569	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
570	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
571
572	/* If hardware does not support ras, then do not create obj.
573	* But if hardware support ras, we can create the obj.
574	* Ras framework checks con->hw_supported to see if it need do
575	* corresponding initialization.
576	* IP checks con->support to see if it need disable ras.
577	*/
578	if (!amdgpu_ras_is_feature_allowed(adev, head))
579	return 0;
580	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
581	return 0;
582
583	if (enable) {
584	if (!obj) {
585	obj = amdgpu_ras_create_obj(adev, head);
586	if (!obj)
587	return -EINVAL22;
588	} else {
589	/* In case we create obj somewhere else */
590	get_obj(obj)do { (obj)->use++; } while (0);
591	}
592	con->features \|= BIT(head->block)(1UL << (head->block));
593	} else {
594	if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
595	con->features &= ~BIT(head->block)(1UL << (head->block));
596	put_obj(obj);
597	}
598	}
599
600	return 0;
601	}
602
603	/* wrapper of psp_ras_enable_features */
604	int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
605	struct ras_common_if *head, bool_Bool enable)
606	{
607	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
608	union ta_ras_cmd_input *info;
609	int ret;
610
611	if (!con)
612	return -EINVAL22;
613
614	info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL(0x0001 \| 0x0004));
615	if (!info)
616	return -ENOMEM12;
617
618	if (!enable) {
619	info->disable_features = (struct ta_ras_disable_features_input) {
620	.block_id = amdgpu_ras_block_to_ta(head->block),
621	.error_type = amdgpu_ras_error_to_ta(head->type),
622	};
623	} else {
624	info->enable_features = (struct ta_ras_enable_features_input) {
625	.block_id = amdgpu_ras_block_to_ta(head->block),
626	.error_type = amdgpu_ras_error_to_ta(head->type),
627	};
628	}
629
630	/* Do not enable if it is not allowed. */
631	WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head))({ int __ret = !!(enable && !amdgpu_ras_is_feature_allowed (adev, head)); if (__ret) printf("WARNING %s failed at %s:%d\n" , "enable && !amdgpu_ras_is_feature_allowed(adev, head)" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_ras.c", 631); __builtin_expect (!!(__ret), 0); });
632	/* Are we alerady in that state we are going to set? */
633	if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) {
634	ret = 0;
635	goto out;
636	}
637
638	if (!amdgpu_ras_intr_triggered()) {
639	ret = psp_ras_enable_features(&adev->psp, info, enable);
640	if (ret) {
641	amdgpu_ras_parse_status_code(adev,
642	enable ? "enable":"disable",
643	ras_block_str(head->block)(ras_block_string[head->block]),
644	(enum ta_ras_status)ret);
645	if (ret == TA_RAS_STATUS__RESET_NEEDED)
646	ret = -EAGAIN35;
647	else
648	ret = -EINVAL22;
649
650	goto out;
651	}
652	}
653
654	/* setup the obj */
655	__amdgpu_ras_feature_enable(adev, head, enable);
656	ret = 0;
657	out:
658	kfree(info);
659	return ret;
660	}
661
662	/* Only used in device probe stage and called only once. */
663	int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
664	struct ras_common_if *head, bool_Bool enable)
665	{
666	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
667	int ret;
668
669	if (!con)
670	return -EINVAL22;
671
672	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS(0x1 << 0)) {
673	if (enable) {
674	/* There is no harm to issue a ras TA cmd regardless of
675	* the currecnt ras state.
676	* If current state == target state, it will do nothing
677	* But sometimes it requests driver to reset and repost
678	* with error code -EAGAIN.
679	*/
680	ret = amdgpu_ras_feature_enable(adev, head, 1);
681	/* With old ras TA, we might fail to enable ras.
682	* Log it and just setup the object.
683	* TODO need remove this WA in the future.
684	*/
685	if (ret == -EINVAL22) {
686	ret = __amdgpu_ras_feature_enable(adev, head, 1);
687	if (!ret)
688	dev_info(adev->dev,do { } while(0)
689	"RAS INFO: %s setup object\n",do { } while(0)
690	ras_block_str(head->block))do { } while(0);
691	}
692	} else {
693	/* setup the object then issue a ras TA disable cmd.*/
694	ret = __amdgpu_ras_feature_enable(adev, head, 1);
695	if (ret)
696	return ret;
697
698	ret = amdgpu_ras_feature_enable(adev, head, 0);
699	}
700	} else
701	ret = amdgpu_ras_feature_enable(adev, head, enable);
702
703	return ret;
704	}
705
706	static int amdgpu_ras_disable_all_features(struct amdgpu_device *adev,
707	bool_Bool bypass)
708	{
709	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
710	struct ras_manager obj, tmp;
711
712	list_for_each_entry_safe(obj, tmp, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}), tmp = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj->node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof(__typeof(obj), node) );}); &obj-> node != (&con->head); obj = tmp, tmp = ({ const __typeof ( ((__typeof(tmp) )0)->node ) __mptr = (tmp->node.next ); (__typeof(tmp) )( (char )__mptr - __builtin_offsetof(__typeof (*tmp), node) );})) {
713	/* bypass psp.
714	* aka just release the obj and corresponding flags
715	*/
716	if (bypass) {
717	if (__amdgpu_ras_feature_enable(adev, &obj->head, 0))
718	break;
719	} else {
720	if (amdgpu_ras_feature_enable(adev, &obj->head, 0))
721	break;
722	}
723	}
724
725	return con->features;
726	}
727
728	static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev,
729	bool_Bool bypass)
730	{
731	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
732	int ras_block_count = AMDGPU_RAS_BLOCK_COUNTAMDGPU_RAS_BLOCK__LAST;
733	int i;
734	const enum amdgpu_ras_error_type default_ras_type =
735	AMDGPU_RAS_ERROR__NONE;
736
737	for (i = 0; i < ras_block_count; i++) {
738	struct ras_common_if head = {
739	.block = i,
740	.type = default_ras_type,
741	.sub_block_index = 0,
742	};
743	strlcpy(head.name, ras_block_str(i)(ras_block_string[i]), sizeof(head.name));
744	if (bypass) {
745	/*
746	* bypass psp. vbios enable ras for us.
747	* so just create the obj
748	*/
749	if (__amdgpu_ras_feature_enable(adev, &head, 1))
750	break;
751	} else {
752	if (amdgpu_ras_feature_enable(adev, &head, 1))
753	break;
754	}
755	}
756
757	return con->features;
758	}
759	/* feature ctl end */
760
761	/* query/inject/cure begin */
762	int amdgpu_ras_error_query(struct amdgpu_device *adev,
763	struct ras_query_if *info)
764	{
765	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
766	struct ras_err_data err_data = {0, 0, 0, NULL((void *)0)};
767	int i;
768
769	if (!obj)
770	return -EINVAL22;
771
772	switch (info->head.block) {
773	case AMDGPU_RAS_BLOCK__UMC:
774	if (adev->umc.funcs->query_ras_error_count)
775	adev->umc.funcs->query_ras_error_count(adev, &err_data);
776	/* umc query_ras_error_address is also responsible for clearing
777	* error status
778	*/
779	if (adev->umc.funcs->query_ras_error_address)
780	adev->umc.funcs->query_ras_error_address(adev, &err_data);
781	break;
782	case AMDGPU_RAS_BLOCK__SDMA:
783	if (adev->sdma.funcs->query_ras_error_count) {
784	for (i = 0; i < adev->sdma.num_instances; i++)
785	adev->sdma.funcs->query_ras_error_count(adev, i,
786	&err_data);
787	}
788	break;
789	case AMDGPU_RAS_BLOCK__GFX:
790	if (adev->gfx.funcs->query_ras_error_count)
791	adev->gfx.funcs->query_ras_error_count(adev, &err_data);
792	break;
793	case AMDGPU_RAS_BLOCK__MMHUB:
794	if (adev->mmhub.funcs->query_ras_error_count)
795	adev->mmhub.funcs->query_ras_error_count(adev, &err_data);
796	break;
797	case AMDGPU_RAS_BLOCK__PCIE_BIF:
798	if (adev->nbio.funcs->query_ras_error_count)
799	adev->nbio.funcs->query_ras_error_count(adev, &err_data);
800	break;
801	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
802	amdgpu_xgmi_query_ras_error_count(adev, &err_data);
803	break;
804	default:
805	break;
806	}
807
808	obj->err_data.ue_count += err_data.ue_count;
809	obj->err_data.ce_count += err_data.ce_count;
810
811	info->ue_count = obj->err_data.ue_count;
812	info->ce_count = obj->err_data.ce_count;
813
814	if (err_data.ce_count) {
815	dev_info(adev->dev, "%ld correctable hardware errors "do { } while(0)
816	"detected in %s block, no user "do { } while(0)
817	"action is needed.\n",do { } while(0)
818	obj->err_data.ce_count,do { } while(0)
819	ras_block_str(info->head.block))do { } while(0);
820	}
821	if (err_data.ue_count) {
822	dev_info(adev->dev, "%ld uncorrectable hardware errors "do { } while(0)
823	"detected in %s block\n",do { } while(0)
824	obj->err_data.ue_count,do { } while(0)
825	ras_block_str(info->head.block))do { } while(0);
826	}
827
828	return 0;
829	}
830
831	/* Trigger XGMI/WAFL error */
832	static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
833	struct ta_ras_trigger_error_input *block_info)
834	{
835	int ret;
836
837	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
838	dev_warn(adev->dev, "Failed to disallow df cstate")printf("drm:pid%d:%s WARNING " "Failed to disallow df cstate" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
839
840	if (amdgpu_dpm_allow_xgmi_power_down(adev, false0))
841	dev_warn(adev->dev, "Failed to disallow XGMI power down")printf("drm:pid%d:%s WARNING " "Failed to disallow XGMI power down" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
842
843	ret = psp_ras_trigger_error(&adev->psp, block_info);
844
845	if (amdgpu_ras_intr_triggered())
846	return ret;
847
848	if (amdgpu_dpm_allow_xgmi_power_down(adev, true1))
849	dev_warn(adev->dev, "Failed to allow XGMI power down")printf("drm:pid%d:%s WARNING " "Failed to allow XGMI power down" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
850
851	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
852	dev_warn(adev->dev, "Failed to allow df cstate")printf("drm:pid%d:%s WARNING " "Failed to allow df cstate", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
853
854	return ret;
855	}
856
857	/* wrapper of psp_ras_trigger_error */
858	int amdgpu_ras_error_inject(struct amdgpu_device *adev,
859	struct ras_inject_if *info)
860	{
861	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
862	struct ta_ras_trigger_error_input block_info = {
863	.block_id = amdgpu_ras_block_to_ta(info->head.block),
864	.inject_error_type = amdgpu_ras_error_to_ta(info->head.type),
865	.sub_block_index = info->head.sub_block_index,
866	.address = info->address,
867	.value = info->value,
868	};
869	int ret = 0;
870
871	if (!obj)
872	return -EINVAL22;
873
874	/* Calculate XGMI relative offset */
875	if (adev->gmc.xgmi.num_physical_nodes > 1) {
876	block_info.address =
877	amdgpu_xgmi_get_relative_phy_addr(adev,
878	block_info.address);
879	}
880
881	switch (info->head.block) {
882	case AMDGPU_RAS_BLOCK__GFX:
883	if (adev->gfx.funcs->ras_error_inject)
884	ret = adev->gfx.funcs->ras_error_inject(adev, info);
885	else
886	ret = -EINVAL22;
887	break;
888	case AMDGPU_RAS_BLOCK__UMC:
889	case AMDGPU_RAS_BLOCK__MMHUB:
890	case AMDGPU_RAS_BLOCK__PCIE_BIF:
891	ret = psp_ras_trigger_error(&adev->psp, &block_info);
892	break;
893	case AMDGPU_RAS_BLOCK__XGMI_WAFL:
894	ret = amdgpu_ras_error_inject_xgmi(adev, &block_info);
895	break;
896	default:
897	dev_info(adev->dev, "%s error injection is not supported yet\n",do { } while(0)
898	ras_block_str(info->head.block))do { } while(0);
899	ret = -EINVAL22;
900	}
901
902	amdgpu_ras_parse_status_code(adev,
903	"inject",
904	ras_block_str(info->head.block)(ras_block_string[info->head.block]),
905	(enum ta_ras_status)ret);
906
907	return ret;
908	}
909
910	int amdgpu_ras_error_cure(struct amdgpu_device *adev,
911	struct ras_cure_if *info)
912	{
913	/* psp fw has no cure interface for now. */
914	return 0;
915	}
916
917	/* get the total error counts on all IPs */
918	unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
919	bool_Bool is_ce)
920	{
921	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
922	struct ras_manager *obj;
923	struct ras_err_data data = {0, 0};
924
925	if (!con)
926	return 0;
927
928	list_for_each_entry(obj, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}); &obj->node != (&con->head); obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj-> node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof (__typeof(obj), node) );})) {
929	struct ras_query_if info = {
930	.head = obj->head,
931	};
932
933	if (amdgpu_ras_error_query(adev, &info))
934	return 0;
935
936	data.ce_count += info.ce_count;
937	data.ue_count += info.ue_count;
938	}
939
940	return is_ce ? data.ce_count : data.ue_count;
941	}
942	/* query/inject/cure end */
943
944	#ifdef __linux__
945
946	/* sysfs begin */
947
948	static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
949	struct ras_badpage *bps, unsigned int count);
950
951	static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
952	{
953	switch (flags) {
954	case AMDGPU_RAS_RETIRE_PAGE_RESERVED:
955	return "R";
956	case AMDGPU_RAS_RETIRE_PAGE_PENDING:
957	return "P";
958	case AMDGPU_RAS_RETIRE_PAGE_FAULT:
959	default:
960	return "F";
961	};
962	}
963
964	/**
965	* DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
966	*
967	* It allows user to read the bad pages of vram on the gpu through
968	* /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
969	*
970	* It outputs multiple lines, and each line stands for one gpu page.
971	*
972	* The format of one line is below,
973	* gpu pfn : gpu page size : flags
974	*
975	* gpu pfn and gpu page size are printed in hex format.
976	* flags can be one of below character,
977	*
978	* R: reserved, this gpu page is reserved and not able to use.
979	*
980	* P: pending for reserve, this gpu page is marked as bad, will be reserved
981	* in next window of page_reserve.
982	*
983	* F: unable to reserve. this gpu page can't be reserved due to some reasons.
984	*
985	* Examples:
986	*
987	* .. code-block:: bash
988	*
989	* 0x00000001 : 0x00001000 : R
990	* 0x00000002 : 0x00001000 : P
991	*
992	*/
993
994	static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
995	struct kobject kobj, struct bin_attribute attr,
996	char *buf, loff_t ppos, size_t count)
997	{
998	struct amdgpu_ras *con =
999	container_of(attr, struct amdgpu_ras, badpages_attr)({ const __typeof( ((struct amdgpu_ras )0)->badpages_attr ) __mptr = (attr); (struct amdgpu_ras )( (char )__mptr - __builtin_offsetof (struct amdgpu_ras, badpages_attr) );});
1000	struct amdgpu_device *adev = con->adev;
1001	const unsigned int element_size =
1002	sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
1003	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
1004	unsigned int end = div64_ul(ppos + count - 1, element_size);
1005	ssize_t s = 0;
1006	struct ras_badpage bps = NULL((void )0);
1007	unsigned int bps_count = 0;
1008
1009	memset(buf, 0, count)__builtin_memset((buf), (0), (count));
1010
1011	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
1012	return 0;
1013
1014	for (; start < end && start < bps_count; start++)
1015	s += scnprintf(&buf[s], element_size + 1,snprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n" , bps[start].bp, bps[start].size, amdgpu_ras_badpage_flags_str (bps[start].flags))
1016	"0x%08x : 0x%08x : %1s\n",snprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n" , bps[start].bp, bps[start].size, amdgpu_ras_badpage_flags_str (bps[start].flags))
1017	bps[start].bp,snprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n" , bps[start].bp, bps[start].size, amdgpu_ras_badpage_flags_str (bps[start].flags))
1018	bps[start].size,snprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n" , bps[start].bp, bps[start].size, amdgpu_ras_badpage_flags_str (bps[start].flags))
1019	amdgpu_ras_badpage_flags_str(bps[start].flags))snprintf(&buf[s], element_size + 1, "0x%08x : 0x%08x : %1s\n" , bps[start].bp, bps[start].size, amdgpu_ras_badpage_flags_str (bps[start].flags));
1020
1021	kfree(bps);
1022
1023	return s;
1024	}
1025
1026	static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
1027	struct device_attribute attr, char buf)
1028	{
1029	struct amdgpu_ras *con =
1030	container_of(attr, struct amdgpu_ras, features_attr)({ const __typeof( ((struct amdgpu_ras )0)->features_attr ) __mptr = (attr); (struct amdgpu_ras )( (char )__mptr - __builtin_offsetof (struct amdgpu_ras, features_attr) );});
1031
1032	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features)snprintf(buf, (1 << 12), "feature mask: 0x%x\n", con-> features);
1033	}
1034
1035	static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
1036	{
1037	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1038
1039	sysfs_remove_file_from_group(&adev->dev->kobj,
1040	&con->badpages_attr.attr,
1041	RAS_FS_NAME);
1042	}
1043
1044	static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
1045	{
1046	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1047	struct attribute *attrs[] = {
1048	&con->features_attr.attr,
1049	NULL((void *)0)
1050	};
1051	struct attribute_group group = {
1052	.name = RAS_FS_NAME,
1053	.attrs = attrs,
1054	};
1055
1056	sysfs_remove_group(&adev->dev->kobj, &group);
1057
1058	return 0;
1059	}
1060
1061	#endif /* __linux__ */
1062
1063	int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
1064	struct ras_fs_if *head)
1065	{
1066	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1067
1068	if (!obj \|\| obj->attr_inuse)
1069	return -EINVAL22;
1070
1071	STUB()do { printf("%s: stub\n", __func__); } while(0);
1072	return -ENOSYS78;
1073	#ifdef notyet
1074	get_obj(obj)do { (obj)->use++; } while (0);
1075
1076	memcpy(obj->fs_data.sysfs_name,__builtin_memcpy((obj->fs_data.sysfs_name), (head->sysfs_name ), (sizeof(obj->fs_data.sysfs_name)))
1077	head->sysfs_name,__builtin_memcpy((obj->fs_data.sysfs_name), (head->sysfs_name ), (sizeof(obj->fs_data.sysfs_name)))
1078	sizeof(obj->fs_data.sysfs_name))__builtin_memcpy((obj->fs_data.sysfs_name), (head->sysfs_name ), (sizeof(obj->fs_data.sysfs_name)));
1079
1080	obj->sysfs_attr = (struct device_attribute){
1081	.attr = {
1082	.name = obj->fs_data.sysfs_name,
1083	.mode = S_IRUGO,
1084	},
1085	.show = amdgpu_ras_sysfs_read,
1086	};
1087	sysfs_attr_init(&obj->sysfs_attr.attr);
1088
1089	if (sysfs_add_file_to_group(&adev->dev->kobj,
1090	&obj->sysfs_attr.attr,
1091	RAS_FS_NAME)) {
1092	put_obj(obj);
1093	return -EINVAL22;
1094	}
1095
1096	obj->attr_inuse = 1;
1097
1098	return 0;
1099	#endif
1100	}
1101
1102	int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
1103	struct ras_common_if *head)
1104	{
1105	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1106
1107	if (!obj \|\| !obj->attr_inuse)
1108	return -EINVAL22;
1109
1110	sysfs_remove_file_from_group(&adev->dev->kobj,
1111	&obj->sysfs_attr.attr,
1112	RAS_FS_NAME);
1113	obj->attr_inuse = 0;
1114	put_obj(obj);
1115
1116	return 0;
1117	}
1118
1119	#ifdef __linux__
1120
1121	static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
1122	{
1123	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1124	struct ras_manager obj, tmp;
1125
1126	list_for_each_entry_safe(obj, tmp, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}), tmp = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj->node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof(__typeof(obj), node) );}); &obj-> node != (&con->head); obj = tmp, tmp = ({ const __typeof ( ((__typeof(tmp) )0)->node ) __mptr = (tmp->node.next ); (__typeof(tmp) )( (char )__mptr - __builtin_offsetof(__typeof (*tmp), node) );})) {
1127	amdgpu_ras_sysfs_remove(adev, &obj->head);
1128	}
1129
1130	if (amdgpu_bad_page_threshold != 0)
1131	amdgpu_ras_sysfs_remove_bad_page_node(adev);
1132
1133	amdgpu_ras_sysfs_remove_feature_node(adev);
1134
1135	return 0;
1136	}
1137	/* sysfs end */
1138
1139	/**
1140	* DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors
1141	*
1142	* Normally when there is an uncorrectable error, the driver will reset
1143	* the GPU to recover. However, in the event of an unrecoverable error,
1144	* the driver provides an interface to reboot the system automatically
1145	* in that event.
1146	*
1147	* The following file in debugfs provides that interface:
1148	* /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1149	*
1150	* Usage:
1151	*
1152	* .. code-block:: bash
1153	*
1154	* echo true > .../ras/auto_reboot
1155	*
1156	*/
1157	/* debugfs begin */
1158	static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
1159	{
1160	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1161	struct drm_minor *minor = adev_to_drm(adev)->primary;
1162
1163	con->dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
1164	debugfs_create_file("ras_ctrl", S_IWUGO \| S_IRUGO, con->dir,
1165	adev, &amdgpu_ras_debugfs_ctrl_ops);
1166	debugfs_create_file("ras_eeprom_reset", S_IWUGO \| S_IRUGO, con->dir,
1167	adev, &amdgpu_ras_debugfs_eeprom_ops);
1168
1169	/*
1170	* After one uncorrectable error happens, usually GPU recovery will
1171	* be scheduled. But due to the known problem in GPU recovery failing
1172	* to bring GPU back, below interface provides one direct way to
1173	* user to reboot system automatically in such case within
1174	* ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
1175	* will never be called.
1176	*/
1177	debugfs_create_bool("auto_reboot", S_IWUGO \| S_IRUGO, con->dir,
1178	&con->reboot);
1179
1180	/*
1181	* User could set this not to clean up hardware's error count register
1182	* of RAS IPs during ras recovery.
1183	*/
1184	debugfs_create_bool("disable_ras_err_cnt_harvest", 0644,
1185	con->dir, &con->disable_ras_err_cnt_harvest);
1186	}
1187
1188	static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
1189	struct ras_fs_if *head)
1190	{
1191	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1192	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head);
1193
1194	if (!obj \|\| obj->ent)
1195	return;
1196
1197	get_obj(obj)do { (obj)->use++; } while (0);
1198
1199	memcpy(obj->fs_data.debugfs_name,__builtin_memcpy((obj->fs_data.debugfs_name), (head->debugfs_name ), (sizeof(obj->fs_data.debugfs_name)))
1200	head->debugfs_name,__builtin_memcpy((obj->fs_data.debugfs_name), (head->debugfs_name ), (sizeof(obj->fs_data.debugfs_name)))
1201	sizeof(obj->fs_data.debugfs_name))__builtin_memcpy((obj->fs_data.debugfs_name), (head->debugfs_name ), (sizeof(obj->fs_data.debugfs_name)));
1202
1203	obj->ent = debugfs_create_file(obj->fs_data.debugfs_name,
1204	S_IWUGO \| S_IRUGO, con->dir, obj,
1205	&amdgpu_ras_debugfs_ops);
1206	}
1207
1208	void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev)
1209	{
1210	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1211	struct ras_manager *obj;
1212	struct ras_fs_if fs_info;
1213
1214	/*
1215	* it won't be called in resume path, no need to check
1216	* suspend and gpu reset status
1217	*/
1218	if (!IS_ENABLED(CONFIG_DEBUG_FS)0 \|\| !con)
1219	return;
1220
1221	amdgpu_ras_debugfs_create_ctrl_node(adev);
1222
1223	list_for_each_entry(obj, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}); &obj->node != (&con->head); obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj-> node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof (__typeof(obj), node) );})) {
1224	if (amdgpu_ras_is_supported(adev, obj->head.block) &&
1225	(obj->attr_inuse == 1)) {
1226	sprintf(fs_info.debugfs_name, "%s_err_inject",
1227	ras_block_str(obj->head.block)(ras_block_string[obj->head.block]));
1228	fs_info.head = obj->head;
1229	amdgpu_ras_debugfs_create(adev, &fs_info);
1230	}
1231	}
1232	}
1233
1234	static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev,
1235	struct ras_common_if *head)
1236	{
1237	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
1238
1239	if (!obj \|\| !obj->ent)
1240	return;
1241
1242	obj->ent = NULL((void *)0);
1243	put_obj(obj);
1244	}
1245
1246	static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev)
1247	{
1248	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1249	struct ras_manager obj, tmp;
1250
1251	list_for_each_entry_safe(obj, tmp, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}), tmp = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj->node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof(__typeof(obj), node) );}); &obj-> node != (&con->head); obj = tmp, tmp = ({ const __typeof ( ((__typeof(tmp) )0)->node ) __mptr = (tmp->node.next ); (__typeof(tmp) )( (char )__mptr - __builtin_offsetof(__typeof (*tmp), node) );})) {
1252	amdgpu_ras_debugfs_remove(adev, &obj->head);
1253	}
1254
1255	con->dir = NULL((void *)0);
1256	}
1257	/* debugfs end */
1258
1259	/* ras fs */
1260	static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO,
1261	amdgpu_ras_sysfs_badpages_read, NULL((void *)0), 0);
1262	#endif /* __linux__ */
1263	static DEVICE_ATTR(features, S_IRUGO,struct device_attribute dev_attr_features
1264	amdgpu_ras_sysfs_features_read, NULL)struct device_attribute dev_attr_features;
1265	static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
1266	{
1267	#ifdef __linux__
1268	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1269	struct attribute_group group = {
1270	.name = RAS_FS_NAME,
1271	};
1272	struct attribute *attrs[] = {
1273	&con->features_attr.attr,
1274	NULL((void *)0)
1275	};
1276	struct bin_attribute *bin_attrs[] = {
1277	NULL((void *)0),
1278	NULL((void *)0),
1279	};
1280	int r;
1281
1282	/* add features entry */
1283	con->features_attr = dev_attr_features;
1284	group.attrs = attrs;
1285	sysfs_attr_init(attrs[0]);
1286
1287	if (amdgpu_bad_page_threshold != 0) {
1288	/* add bad_page_features entry */
1289	bin_attr_gpu_vram_bad_pages.private = NULL((void *)0);
1290	con->badpages_attr = bin_attr_gpu_vram_bad_pages;
1291	bin_attrs[0] = &con->badpages_attr;
1292	group.bin_attrs = bin_attrs;
1293	sysfs_bin_attr_init(bin_attrs[0]);
1294	}
1295
1296	r = sysfs_create_group(&adev->dev->kobj, &group)0;
1297	if (r)
1298	dev_err(adev->dev, "Failed to create RAS sysfs group!")printf("drm:pid%d:%s ERROR " "Failed to create RAS sysfs group!" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1299	#endif
1300
1301	return 0;
1302	}
1303
1304	static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
1305	{
1306	#ifdef __linux__
1307	if (IS_ENABLED(CONFIG_DEBUG_FS)0)
1308	amdgpu_ras_debugfs_remove_all(adev);
1309	amdgpu_ras_sysfs_remove_all(adev);
1310	#endif
1311	return 0;
1312	}
1313	/* ras fs end */
1314
1315	/* ih begin */
1316	static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
1317	{
1318	struct ras_ih_data *data = &obj->ih_data;
1319	struct amdgpu_iv_entry entry;
1320	int ret;
1321	struct ras_err_data err_data = {0, 0, 0, NULL((void *)0)};
1322
1323	while (data->rptr != data->wptr) {
1324	rmb()do { __asm volatile("lfence" ::: "memory"); } while (0);
1325	memcpy(&entry, &data->ring[data->rptr],__builtin_memcpy((&entry), (&data->ring[data->rptr ]), (data->element_size))
1326	data->element_size)__builtin_memcpy((&entry), (&data->ring[data->rptr ]), (data->element_size));
1327
1328	wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
1329	data->rptr = (data->aligned_element_size +
1330	data->rptr) % data->ring_size;
1331
1332	/* Let IP handle its data, maybe we need get the output
1333	* from the callback to udpate the error type/count, etc
1334	*/
1335	if (data->cb) {
1336	ret = data->cb(obj->adev, &err_data, &entry);
1337	/* ue will trigger an interrupt, and in that case
1338	* we need do a reset to recovery the whole system.
1339	* But leave IP do that recovery, here we just dispatch
1340	* the error.
1341	*/
1342	if (ret == AMDGPU_RAS_SUCCESS) {
1343	/* these counts could be left as 0 if
1344	* some blocks do not count error number
1345	*/
1346	obj->err_data.ue_count += err_data.ue_count;
1347	obj->err_data.ce_count += err_data.ce_count;
1348	}
1349	}
1350	}
1351	}
1352
1353	static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
1354	{
1355	struct ras_ih_data *data =
1356	container_of(work, struct ras_ih_data, ih_work)({ const __typeof( ((struct ras_ih_data )0)->ih_work ) __mptr = (work); (struct ras_ih_data )( (char )__mptr - __builtin_offsetof (struct ras_ih_data, ih_work) );});
1357	struct ras_manager *obj =
1358	container_of(data, struct ras_manager, ih_data)({ const __typeof( ((struct ras_manager )0)->ih_data ) __mptr = (data); (struct ras_manager )( (char )__mptr - __builtin_offsetof (struct ras_manager, ih_data) );});
1359
1360	amdgpu_ras_interrupt_handler(obj);
1361	}
1362
1363	int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
1364	struct ras_dispatch_if *info)
1365	{
1366	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1367	struct ras_ih_data *data = &obj->ih_data;
1368
1369	if (!obj)
1370	return -EINVAL22;
1371
1372	if (data->inuse == 0)
1373	return 0;
1374
1375	/* Might be overflow... */
1376	memcpy(&data->ring[data->wptr], info->entry,__builtin_memcpy((&data->ring[data->wptr]), (info-> entry), (data->element_size))
1377	data->element_size)__builtin_memcpy((&data->ring[data->wptr]), (info-> entry), (data->element_size));
1378
1379	wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
1380	data->wptr = (data->aligned_element_size +
1381	data->wptr) % data->ring_size;
1382
1383	schedule_work(&data->ih_work);
1384
1385	return 0;
1386	}
1387
1388	int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev,
1389	struct ras_ih_if *info)
1390	{
1391	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1392	struct ras_ih_data *data;
1393
1394	if (!obj)
1395	return -EINVAL22;
1396
1397	data = &obj->ih_data;
1398	if (data->inuse == 0)
1399	return 0;
1400
1401	cancel_work_sync(&data->ih_work);
1402
1403	kfree(data->ring);
1404	memset(data, 0, sizeof(data))__builtin_memset((data), (0), (sizeof(data)));
1405	put_obj(obj);
1406
1407	return 0;
1408	}
1409
1410	int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev,
1411	struct ras_ih_if *info)
1412	{
1413	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
1414	struct ras_ih_data *data;
1415
1416	if (!obj) {
1417	/* in case we registe the IH before enable ras feature */
1418	obj = amdgpu_ras_create_obj(adev, &info->head);
1419	if (!obj)
1420	return -EINVAL22;
1421	} else
1422	get_obj(obj)do { (obj)->use++; } while (0);
1423
1424	data = &obj->ih_data;
1425	/* add the callback.etc */
1426	*data = (struct ras_ih_data) {
1427	.inuse = 0,
1428	.cb = info->cb,
1429	.element_size = sizeof(struct amdgpu_iv_entry),
1430	.rptr = 0,
1431	.wptr = 0,
1432	};
1433
1434	INIT_WORK(&data->ih_work, amdgpu_ras_interrupt_process_handler);
1435
1436	data->aligned_element_size = roundup2(data->element_size, 8)(((data->element_size) + ((8) - 1)) & (~((__typeof(data ->element_size))(8) - 1)));
1437	/* the ring can store 64 iv entries. */
1438	data->ring_size = 64 * data->aligned_element_size;
1439	data->ring = kmalloc(data->ring_size, GFP_KERNEL(0x0001 \| 0x0004));
1440	if (!data->ring) {
1441	put_obj(obj);
1442	return -ENOMEM12;
1443	}
1444
1445	/* IH is ready */
1446	data->inuse = 1;
1447
1448	return 0;
1449	}
1450
1451	static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
1452	{
1453	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1454	struct ras_manager obj, tmp;
1455
1456	list_for_each_entry_safe(obj, tmp, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}), tmp = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj->node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof(__typeof(obj), node) );}); &obj-> node != (&con->head); obj = tmp, tmp = ({ const __typeof ( ((__typeof(tmp) )0)->node ) __mptr = (tmp->node.next ); (__typeof(tmp) )( (char )__mptr - __builtin_offsetof(__typeof (*tmp), node) );})) {
1457	struct ras_ih_if info = {
1458	.head = obj->head,
1459	};
1460	amdgpu_ras_interrupt_remove_handler(adev, &info);
1461	}
1462
1463	return 0;
1464	}
1465	/* ih end */
1466
1467	/* traversal all IPs except NBIO to query error counter */
1468	static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
1469	{
1470	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1471	struct ras_manager *obj;
1472
1473	if (!con)
1474	return;
1475
1476	list_for_each_entry(obj, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}); &obj->node != (&con->head); obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj-> node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof (__typeof(obj), node) );})) {
1477	struct ras_query_if info = {
1478	.head = obj->head,
1479	};
1480
1481	/*
1482	* PCIE_BIF IP has one different isr by ras controller
1483	* interrupt, the specific ras counter query will be
1484	* done in that isr. So skip such block from common
1485	* sync flood interrupt isr calling.
1486	*/
1487	if (info.head.block == AMDGPU_RAS_BLOCK__PCIE_BIF)
1488	continue;
1489
1490	amdgpu_ras_error_query(adev, &info);
1491	}
1492	}
1493
1494	/* Parse RdRspStatus and WrRspStatus */
1495	void amdgpu_ras_error_status_query(struct amdgpu_device *adev,
1496	struct ras_query_if *info)
1497	{
1498	/*
1499	* Only two block need to query read/write
1500	* RspStatus at current state
1501	*/
1502	switch (info->head.block) {
1503	case AMDGPU_RAS_BLOCK__GFX:
1504	if (adev->gfx.funcs->query_ras_error_status)
1505	adev->gfx.funcs->query_ras_error_status(adev);
1506	break;
1507	case AMDGPU_RAS_BLOCK__MMHUB:
1508	if (adev->mmhub.funcs->query_ras_error_status)
1509	adev->mmhub.funcs->query_ras_error_status(adev);
1510	break;
1511	default:
1512	break;
1513	}
1514	}
1515
1516	static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
1517	{
1518	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1519	struct ras_manager *obj;
1520
1521	if (!con)
1522	return;
1523
1524	list_for_each_entry(obj, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}); &obj->node != (&con->head); obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj-> node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof (__typeof(obj), node) );})) {
1525	struct ras_query_if info = {
1526	.head = obj->head,
1527	};
1528
1529	amdgpu_ras_error_status_query(adev, &info);
1530	}
1531	}
1532
1533	/* recovery begin */
1534
1535	/* return 0 on success.
1536	* caller need free bps.
1537	*/
1538	static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
1539	struct ras_badpage *bps, unsigned int count)
1540	{
1541	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1542	struct ras_err_handler_data *data;
1543	int i = 0;
1544	int ret = 0;
1545
1546	if (!con \|\| !con->eh_data \|\| !bps \|\| !count)
1547	return -EINVAL22;
1548
1549	mutex_lock(&con->recovery_lock)rw_enter_write(&con->recovery_lock);
1550	data = con->eh_data;
1551	if (!data \|\| data->count == 0) {
1552	bps = NULL((void )0);
1553	ret = -EINVAL22;
1554	goto out;
1555	}
1556
1557	bps = kmalloc(sizeof(struct ras_badpage) data->count, GFP_KERNEL(0x0001 \| 0x0004));
1558	if (!*bps) {
1559	ret = -ENOMEM12;
1560	goto out;
1561	}
1562
1563	for (; i < data->count; i++) {
1564	(*bps)[i] = (struct ras_badpage){
1565	.bp = data->bps[i].retired_page,
1566	.size = AMDGPU_GPU_PAGE_SIZE4096,
1567	.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
1568	};
1569
1570	if (data->last_reserved <= i)
1571	(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
1572	else if (data->bps_bo[i] == NULL((void *)0))
1573	(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
1574	}
1575
1576	*count = data->count;
1577	out:
1578	mutex_unlock(&con->recovery_lock)rw_exit_write(&con->recovery_lock);
1579	return ret;
1580	}
1581
1582	static void amdgpu_ras_do_recovery(struct work_struct *work)
1583	{
1584	struct amdgpu_ras *ras =
1585	container_of(work, struct amdgpu_ras, recovery_work)({ const __typeof( ((struct amdgpu_ras )0)->recovery_work ) __mptr = (work); (struct amdgpu_ras )( (char )__mptr - __builtin_offsetof (struct amdgpu_ras, recovery_work) );});
1586	struct amdgpu_device remote_adev = NULL((void )0);
1587	struct amdgpu_device *adev = ras->adev;
1588	struct list_head device_list, device_list_handle = NULL((void )0);
1589
1590	if (!ras->disable_ras_err_cnt_harvest) {
1591	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
1592
1593	/* Build list of devices to query RAS related errors */
1594	if (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
1595	device_list_handle = &hive->device_list;
1596	} else {
1597	INIT_LIST_HEAD(&device_list);
1598	list_add_tail(&adev->gmc.xgmi.head, &device_list);
1599	device_list_handle = &device_list;
1600	}
1601
1602	list_for_each_entry(remote_adev,for (remote_adev = ({ const __typeof( ((__typeof(remote_adev ) )0)->gmc.xgmi.head ) __mptr = ((device_list_handle)-> next); (__typeof(remote_adev) )( (char )__mptr - __builtin_offsetof (__typeof(remote_adev), gmc.xgmi.head) );}); &remote_adev ->gmc.xgmi.head != (device_list_handle); remote_adev = ({ const __typeof( ((__typeof(remote_adev) )0)->gmc.xgmi.head ) __mptr = (remote_adev->gmc.xgmi.head.next); (__typeof(remote_adev ) )( (char )__mptr - __builtin_offsetof(__typeof(remote_adev ), gmc.xgmi.head) );}))
1603	device_list_handle, gmc.xgmi.head)for (remote_adev = ({ const __typeof( ((__typeof(remote_adev ) )0)->gmc.xgmi.head ) __mptr = ((device_list_handle)-> next); (__typeof(remote_adev) )( (char )__mptr - __builtin_offsetof (__typeof(remote_adev), gmc.xgmi.head) );}); &remote_adev ->gmc.xgmi.head != (device_list_handle); remote_adev = ({ const __typeof( ((__typeof(remote_adev) )0)->gmc.xgmi.head ) __mptr = (remote_adev->gmc.xgmi.head.next); (__typeof(remote_adev ) )( (char )__mptr - __builtin_offsetof(__typeof(remote_adev ), gmc.xgmi.head) );})) {
1604	amdgpu_ras_query_err_status(remote_adev);
1605	amdgpu_ras_log_on_err_counter(remote_adev);
1606	}
1607
1608	amdgpu_put_xgmi_hive(hive);
1609	}
1610
1611	if (amdgpu_device_should_recover_gpu(ras->adev))
1612	amdgpu_device_gpu_recover(ras->adev, NULL((void *)0));
1613	atomic_set(&ras->in_recovery, 0)({ typeof((&ras->in_recovery)) __tmp = ((0)); (volatile typeof((&ras->in_recovery)) )&(*(&ras->in_recovery )) = __tmp; __tmp; });
1614	}
1615
1616	/* alloc/realloc bps array */
1617	static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
1618	struct ras_err_handler_data *data, int pages)
1619	{
1620	unsigned int old_space = data->count + data->space_left;
1621	unsigned int new_space = old_space + pages;
1622	unsigned int align_space = roundup2(new_space, 512)(((new_space) + ((512) - 1)) & (~((__typeof(new_space))(512 ) - 1)));
1623	void bps = kmalloc(align_space sizeof(*data->bps), GFP_KERNEL(0x0001 \| 0x0004));
1624	struct amdgpu_bo **bps_bo =
1625	kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL(0x0001 \| 0x0004));
1626
1627	if (!bps \|\| !bps_bo) {
1628	kfree(bps);
1629	kfree(bps_bo);
1630	return -ENOMEM12;
1631	}
1632
1633	if (data->bps) {
1634	memcpy(bps, data->bps,__builtin_memcpy((bps), (data->bps), (data->count * sizeof (*data->bps)))
1635	data->count * sizeof(data->bps))__builtin_memcpy((bps), (data->bps), (data->count sizeof (*data->bps)));
1636	kfree(data->bps);
1637	}
1638	if (data->bps_bo) {
1639	memcpy(bps_bo, data->bps_bo,__builtin_memcpy((bps_bo), (data->bps_bo), (data->count * sizeof(*data->bps_bo)))
1640	data->count * sizeof(data->bps_bo))__builtin_memcpy((bps_bo), (data->bps_bo), (data->count sizeof(*data->bps_bo)));
1641	kfree(data->bps_bo);
1642	}
1643
1644	data->bps = bps;
1645	data->bps_bo = bps_bo;
1646	data->space_left += align_space - old_space;
1647	return 0;
1648	}
1649
1650	/* it deal with vram only. */
1651	int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
1652	struct eeprom_table_record *bps, int pages)
1653	{
1654	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1655	struct ras_err_handler_data *data;
1656	int ret = 0;
1657
1658	if (!con \|\| !con->eh_data \|\| !bps \|\| pages <= 0)
1659	return 0;
1660
1661	mutex_lock(&con->recovery_lock)rw_enter_write(&con->recovery_lock);
1662	data = con->eh_data;
1663	if (!data)
1664	goto out;
1665
1666	if (data->space_left <= pages)
1667	if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) {
1668	ret = -ENOMEM12;
1669	goto out;
1670	}
1671
1672	memcpy(&data->bps[data->count], bps, pages * sizeof(data->bps))__builtin_memcpy((&data->bps[data->count]), (bps), ( pages sizeof(*data->bps)));
1673	data->count += pages;
1674	data->space_left -= pages;
1675
1676	out:
1677	mutex_unlock(&con->recovery_lock)rw_exit_write(&con->recovery_lock);
1678
1679	return ret;
1680	}
1681
1682	/*
1683	* write error record array to eeprom, the function should be
1684	* protected by recovery_lock
1685	*/
1686	static int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
1687	{
1688	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1689	struct ras_err_handler_data *data;
1690	struct amdgpu_ras_eeprom_control *control;
1691	int save_count;
1692
1693	if (!con \|\| !con->eh_data)
1694	return 0;
1695
1696	control = &con->eeprom_control;
1697	data = con->eh_data;
1698	save_count = data->count - control->num_recs;
1699	/* only new entries are saved */
1700	if (save_count > 0) {
1701	if (amdgpu_ras_eeprom_process_recods(control,
1702	&data->bps[control->num_recs],
1703	true1,
1704	save_count)) {
1705	dev_err(adev->dev, "Failed to save EEPROM table data!")printf("drm:pid%d:%s ERROR " "Failed to save EEPROM table data!" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1706	return -EIO5;
1707	}
1708
1709	dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count)do { } while(0);
1710	}
1711
1712	return 0;
1713	}
1714
1715	/*
1716	* read error record array in eeprom and reserve enough space for
1717	* storing new bad pages
1718	*/
1719	static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
1720	{
1721	struct amdgpu_ras_eeprom_control *control =
1722	&adev->psp.ras.ras->eeprom_control;
1723	struct eeprom_table_record bps = NULL((void )0);
1724	int ret = 0;
1725
1726	/* no bad page record, skip eeprom access */
1727	if (!control->num_recs \|\| (amdgpu_bad_page_threshold == 0))
1728	return ret;
1729
1730	bps = kcalloc(control->num_recs, sizeof(*bps), GFP_KERNEL(0x0001 \| 0x0004));
1731	if (!bps)
1732	return -ENOMEM12;
1733
1734	if (amdgpu_ras_eeprom_process_recods(control, bps, false0,
1735	control->num_recs)) {
1736	dev_err(adev->dev, "Failed to load EEPROM table records!")printf("drm:pid%d:%s ERROR " "Failed to load EEPROM table records!" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1737	ret = -EIO5;
1738	goto out;
1739	}
1740
1741	ret = amdgpu_ras_add_bad_pages(adev, bps, control->num_recs);
1742
1743	out:
1744	kfree(bps);
1745	return ret;
1746	}
1747
1748	/*
1749	* check if an address belongs to bad page
1750	*
1751	* Note: this check is only for umc block
1752	*/
1753	static bool_Bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
1754	uint64_t addr)
1755	{
1756	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1757	struct ras_err_handler_data *data;
1758	int i;
1759	bool_Bool ret = false0;
1760
1761	if (!con \|\| !con->eh_data)
1762	return ret;
1763
1764	mutex_lock(&con->recovery_lock)rw_enter_write(&con->recovery_lock);
1765	data = con->eh_data;
1766	if (!data)
1767	goto out;
1768
1769	addr >>= AMDGPU_GPU_PAGE_SHIFT12;
1770	for (i = 0; i < data->count; i++)
1771	if (addr == data->bps[i].retired_page) {
1772	ret = true1;
1773	goto out;
1774	}
1775
1776	out:
1777	mutex_unlock(&con->recovery_lock)rw_exit_write(&con->recovery_lock);
1778	return ret;
1779	}
1780
1781	static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
1782	uint32_t max_length)
1783	{
1784	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1785	int tmp_threshold = amdgpu_bad_page_threshold;
1786	u64 val;
1787
1788	/*
1789	* Justification of value bad_page_cnt_threshold in ras structure
1790	*
1791	* Generally, -1 <= amdgpu_bad_page_threshold <= max record length
1792	* in eeprom, and introduce two scenarios accordingly.
1793	*
1794	* Bad page retirement enablement:
1795	* - If amdgpu_bad_page_threshold = -1,
1796	* bad_page_cnt_threshold = typical value by formula.
1797	*
1798	* - When the value from user is 0 < amdgpu_bad_page_threshold <
1799	* max record length in eeprom, use it directly.
1800	*
1801	* Bad page retirement disablement:
1802	* - If amdgpu_bad_page_threshold = 0, bad page retirement
1803	* functionality is disabled, and bad_page_cnt_threshold will
1804	* take no effect.
1805	*/
1806
1807	if (tmp_threshold < -1)
1808	tmp_threshold = -1;
1809	else if (tmp_threshold > max_length)
1810	tmp_threshold = max_length;
1811
1812	if (tmp_threshold == -1) {
1813	val = adev->gmc.mc_vram_size;
1814	do_div(val, RAS_BAD_PAGE_RATE)({ uint32_t __base = ((100 * 1024 * 1024ULL)); uint32_t __rem = ((uint64_t)(val)) % __base; (val) = ((uint64_t)(val)) / __base ; __rem; });
1815	con->bad_page_cnt_threshold = min(lower_32_bits(val),(((((u32)(val)))<(max_length))?(((u32)(val))):(max_length) )
1816	max_length)(((((u32)(val)))<(max_length))?(((u32)(val))):(max_length) );
1817	} else {
1818	con->bad_page_cnt_threshold = tmp_threshold;
1819	}
1820	}
1821
1822	/* called in gpu recovery/init */
1823	int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
1824	{
1825	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1826	struct ras_err_handler_data *data;
1827	uint64_t bp;
1828	struct amdgpu_bo bo = NULL((void )0);
1829	int i, ret = 0;
1830
1831	/* Not reserve bad page when amdgpu_bad_page_threshold == 0. */
1832	if (!con \|\| !con->eh_data \|\| (amdgpu_bad_page_threshold == 0))
1833	return 0;
1834
1835	mutex_lock(&con->recovery_lock)rw_enter_write(&con->recovery_lock);
1836	data = con->eh_data;
1837	if (!data)
1838	goto out;
1839	/* reserve vram at driver post stage. */
1840	for (i = data->last_reserved; i < data->count; i++) {
1841	bp = data->bps[i].retired_page;
1842
1843	/* There are two cases of reserve error should be ignored:
1844	* 1) a ras bad page has been allocated (used by someone);
1845	* 2) a ras bad page has been reserved (duplicate error injection
1846	* for one page);
1847	*/
1848	if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT12,
1849	AMDGPU_GPU_PAGE_SIZE4096,
1850	AMDGPU_GEM_DOMAIN_VRAM0x4,
1851	&bo, NULL((void *)0)))
1852	dev_warn(adev->dev, "RAS WARN: reserve vram for "printf("drm:pid%d:%s WARNING " "RAS WARN: reserve vram for " "retired page %llx fail\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bp)
1853	"retired page %llx fail\n", bp)printf("drm:pid%d:%s WARNING " "RAS WARN: reserve vram for " "retired page %llx fail\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bp);
1854
1855	data->bps_bo[i] = bo;
1856	data->last_reserved = i + 1;
1857	bo = NULL((void *)0);
1858	}
1859
1860	/* continue to save bad pages to eeprom even reesrve_vram fails */
1861	ret = amdgpu_ras_save_bad_pages(adev);
1862	out:
1863	mutex_unlock(&con->recovery_lock)rw_exit_write(&con->recovery_lock);
1864	return ret;
1865	}
1866
1867	/* called when driver unload */
1868	static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
1869	{
1870	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1871	struct ras_err_handler_data *data;
1872	struct amdgpu_bo *bo;
1873	int i;
1874
1875	if (!con \|\| !con->eh_data)
1876	return 0;
1877
1878	mutex_lock(&con->recovery_lock)rw_enter_write(&con->recovery_lock);
1879	data = con->eh_data;
1880	if (!data)
1881	goto out;
1882
1883	for (i = data->last_reserved - 1; i >= 0; i--) {
1884	bo = data->bps_bo[i];
1885
1886	amdgpu_bo_free_kernel(&bo, NULL((void )0), NULL((void )0));
1887
1888	data->bps_bo[i] = bo;
1889	data->last_reserved = i;
1890	}
1891	out:
1892	mutex_unlock(&con->recovery_lock)rw_exit_write(&con->recovery_lock);
1893	return 0;
1894	}
1895
1896	int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
1897	{
1898	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1899	struct ras_err_handler_data **data;
1900	uint32_t max_eeprom_records_len = 0;
1901	bool_Bool exc_err_limit = false0;
1902	int ret;
1903
1904	if (con)
1905	data = &con->eh_data;
1906	else
1907	return 0;
1908
1909	data = kmalloc(sizeof(*data), GFP_KERNEL(0x0001 \| 0x0004) \| __GFP_ZERO0x0008);
1910	if (!*data) {
1911	ret = -ENOMEM12;
	Value stored to 'ret' is never read
1912	goto out;
1913	}
1914
1915	rw_init(&con->recovery_lock, "rasrec")_rw_init_flags(&con->recovery_lock, "rasrec", 0, ((void *)0));
1916	INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery);
1917	atomic_set(&con->in_recovery, 0)({ typeof((&con->in_recovery)) __tmp = ((0)); (volatile typeof((&con->in_recovery)) )&(*(&con->in_recovery )) = __tmp; __tmp; });
1918	con->adev = adev;
1919
1920	max_eeprom_records_len = amdgpu_ras_eeprom_get_record_max_length();
1921	amdgpu_ras_validate_threshold(adev, max_eeprom_records_len);
1922
1923	ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
1924	/*
1925	* This calling fails when exc_err_limit is true or
1926	* ret != 0.
1927	*/
1928	if (exc_err_limit \|\| ret)
1929	goto free;
1930
1931	if (con->eeprom_control.num_recs) {
1932	ret = amdgpu_ras_load_bad_pages(adev);
1933	if (ret)
1934	goto free;
1935	ret = amdgpu_ras_reserve_bad_pages(adev);
1936	if (ret)
1937	goto release;
1938	}
1939
1940	return 0;
1941
1942	release:
1943	amdgpu_ras_release_bad_pages(adev);
1944	free:
1945	kfree((*data)->bps);
1946	kfree((*data)->bps_bo);
1947	kfree(*data);
1948	con->eh_data = NULL((void *)0);
1949	out:
1950	dev_warn(adev->dev, "Failed to initialize ras recovery!\n")printf("drm:pid%d:%s WARNING " "Failed to initialize ras recovery!\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1951
1952	/*
1953	* Except error threshold exceeding case, other failure cases in this
1954	* function would not fail amdgpu driver init.
1955	*/
1956	if (!exc_err_limit)
1957	ret = 0;
1958	else
1959	ret = -EINVAL22;
1960
1961	return ret;
1962	}
1963
1964	static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
1965	{
1966	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1967	struct ras_err_handler_data *data = con->eh_data;
1968
1969	/* recovery_init failed to init it, fini is useless */
1970	if (!data)
1971	return 0;
1972
1973	cancel_work_sync(&con->recovery_work);
1974	amdgpu_ras_release_bad_pages(adev);
1975
1976	mutex_lock(&con->recovery_lock)rw_enter_write(&con->recovery_lock);
1977	con->eh_data = NULL((void *)0);
1978	kfree(data->bps);
1979	kfree(data->bps_bo);
1980	kfree(data);
1981	mutex_unlock(&con->recovery_lock)rw_exit_write(&con->recovery_lock);
1982
1983	return 0;
1984	}
1985	/* recovery end */
1986
1987	/* return 0 if ras will reset gpu and repost.*/
1988	int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
1989	unsigned int block)
1990	{
1991	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
1992
1993	if (!ras)
1994	return -EINVAL22;
1995
1996	ras->flags \|= AMDGPU_RAS_FLAG_INIT_NEED_RESET(0x1 << 1);
1997	return 0;
1998	}
1999
2000	static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
2001	{
2002	if (adev->asic_type != CHIP_VEGA10 &&
2003	adev->asic_type != CHIP_VEGA20 &&
2004	adev->asic_type != CHIP_ARCTURUS &&
2005	adev->asic_type != CHIP_SIENNA_CICHLID)
2006	return 1;
2007	else
2008	return 0;
2009	}
2010
2011	/*
2012	* check hardware's ras ability which will be saved in hw_supported.
2013	* if hardware does not support ras, we can skip some ras initializtion and
2014	* forbid some ras operations from IP.
2015	* if software itself, say boot parameter, limit the ras ability. We still
2016	* need allow IP do some limited operations, like disable. In such case,
2017	* we have to initialize ras as normal. but need check if operation is
2018	* allowed or not in each function.
2019	*/
2020	static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
2021	uint32_t hw_supported, uint32_t supported)
2022	{
2023	*hw_supported = 0;
2024	*supported = 0;
2025
2026	if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) \|\| !adev->is_atom_fw \|\|
2027	amdgpu_ras_check_asic_type(adev))
2028	return;
2029
2030	if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
2031	dev_info(adev->dev, "HBM ECC is active.\n")do { } while(0);
2032	*hw_supported \|= (1 << AMDGPU_RAS_BLOCK__UMC \|
2033	1 << AMDGPU_RAS_BLOCK__DF);
2034	} else
2035	dev_info(adev->dev, "HBM ECC is not presented.\n")do { } while(0);
2036
2037	if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
2038	dev_info(adev->dev, "SRAM ECC is active.\n")do { } while(0);
2039	*hw_supported \|= ~(1 << AMDGPU_RAS_BLOCK__UMC \|
2040	1 << AMDGPU_RAS_BLOCK__DF);
2041	} else
2042	dev_info(adev->dev, "SRAM ECC is not presented.\n")do { } while(0);
2043
2044	/* hw_supported needs to be aligned with RAS block mask. */
2045	*hw_supported &= AMDGPU_RAS_BLOCK_MASK((1ULL << AMDGPU_RAS_BLOCK__LAST) - 1);
2046
2047	*supported = amdgpu_ras_enable == 0 ?
2048	0 : *hw_supported & amdgpu_ras_mask;
2049	adev->ras_features = *supported;
2050	}
2051
2052	int amdgpu_ras_init(struct amdgpu_device *adev)
2053	{
2054	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
2055	int r;
2056
2057	if (con)
2058	return 0;
2059
2060	con = kmalloc(sizeof(struct amdgpu_ras) +
2061	sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNTAMDGPU_RAS_BLOCK__LAST,
2062	GFP_KERNEL(0x0001 \| 0x0004)\|__GFP_ZERO0x0008);
2063	if (!con)
2064	return -ENOMEM12;
2065
2066	con->objs = (struct ras_manager *)(con + 1);
2067
2068	amdgpu_ras_set_context(adev, con)((adev)->psp.ras.ras = (con));
2069
2070	amdgpu_ras_check_supported(adev, &con->hw_supported,
2071	&con->supported);
2072	if (!con->hw_supported \|\| (adev->asic_type == CHIP_VEGA10)) {
2073	r = 0;
2074	goto release_con;
2075	}
2076
2077	con->features = 0;
2078	INIT_LIST_HEAD(&con->head);
2079	/* Might need get this flag from vbios. */
2080	con->flags = RAS_DEFAULT_FLAGS((0x1 << 0));
2081
2082	if (adev->nbio.funcs->init_ras_controller_interrupt) {
2083	r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
2084	if (r)
2085	goto release_con;
2086	}
2087
2088	if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
2089	r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
2090	if (r)
2091	goto release_con;
2092	}
2093
2094	if (amdgpu_ras_fs_init(adev)) {
2095	r = -EINVAL22;
2096	goto release_con;
2097	}
2098
2099	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "do { } while(0)
2100	"hardware ability[%x] ras_mask[%x]\n",do { } while(0)
2101	con->hw_supported, con->supported)do { } while(0);
2102	return 0;
2103	release_con:
2104	amdgpu_ras_set_context(adev, NULL)((adev)->psp.ras.ras = (((void *)0)));
2105	kfree(con);
2106
2107	return r;
2108	}
2109
2110	/* helper function to handle common stuff in ip late init phase */
2111	int amdgpu_ras_late_init(struct amdgpu_device *adev,
2112	struct ras_common_if *ras_block,
2113	struct ras_fs_if *fs_info,
2114	struct ras_ih_if *ih_info)
2115	{
2116	int r;
2117
2118	/* disable RAS feature per IP block if it is not supported */
2119	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
2120	amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
2121	return 0;
2122	}
2123
2124	r = amdgpu_ras_feature_enable_on_boot(adev, ras_block, 1);
2125	if (r) {
2126	if (r == -EAGAIN35) {
2127	/* request gpu reset. will run again */
2128	amdgpu_ras_request_reset_on_boot(adev,
2129	ras_block->block);
2130	return 0;
2131	} else if (adev->in_suspend \|\| amdgpu_in_reset(adev)) {
2132	/* in resume phase, if fail to enable ras,
2133	* clean up all ras fs nodes, and disable ras */
2134	goto cleanup;
2135	} else
2136	return r;
2137	}
2138
2139	/* in resume phase, no need to create ras fs node */
2140	if (adev->in_suspend \|\| amdgpu_in_reset(adev))
2141	return 0;
2142
2143	if (ih_info->cb) {
2144	r = amdgpu_ras_interrupt_add_handler(adev, ih_info);
2145	if (r)
2146	goto interrupt;
2147	}
2148
2149	r = amdgpu_ras_sysfs_create(adev, fs_info);
2150	if (r)
2151	goto sysfs;
2152
2153	return 0;
2154	cleanup:
2155	amdgpu_ras_sysfs_remove(adev, ras_block);
2156	sysfs:
2157	if (ih_info->cb)
2158	amdgpu_ras_interrupt_remove_handler(adev, ih_info);
2159	interrupt:
2160	amdgpu_ras_feature_enable(adev, ras_block, 0);
2161	return r;
2162	}
2163
2164	/* helper function to remove ras fs node and interrupt handler */
2165	void amdgpu_ras_late_fini(struct amdgpu_device *adev,
2166	struct ras_common_if *ras_block,
2167	struct ras_ih_if *ih_info)
2168	{
2169	if (!ras_block \|\| !ih_info)
2170	return;
2171
2172	amdgpu_ras_sysfs_remove(adev, ras_block);
2173	if (ih_info->cb)
2174	amdgpu_ras_interrupt_remove_handler(adev, ih_info);
2175	amdgpu_ras_feature_enable(adev, ras_block, 0);
2176	}
2177
2178	/* do some init work after IP late init as dependence.
2179	* and it runs in resume/gpu reset/booting up cases.
2180	*/
2181	void amdgpu_ras_resume(struct amdgpu_device *adev)
2182	{
2183	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
2184	struct ras_manager obj, tmp;
2185
2186	if (!con)
2187	return;
2188
2189	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS(0x1 << 0)) {
2190	/* Set up all other IPs which are not implemented. There is a
2191	* tricky thing that IP's actual ras error type should be
2192	* MULTI_UNCORRECTABLE, but as driver does not handle it, so
2193	* ERROR_NONE make sense anyway.
2194	*/
2195	amdgpu_ras_enable_all_features(adev, 1);
2196
2197	/* We enable ras on all hw_supported block, but as boot
2198	* parameter might disable some of them and one or more IP has
2199	* not implemented yet. So we disable them on behalf.
2200	*/
2201	list_for_each_entry_safe(obj, tmp, &con->head, node)for (obj = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = ((&con->head)->next); (__typeof(obj) ) ( (char )__mptr - __builtin_offsetof(__typeof(obj), node) ) ;}), tmp = ({ const __typeof( ((__typeof(obj) )0)->node ) __mptr = (obj->node.next); (__typeof(obj) )( (char )__mptr - __builtin_offsetof(__typeof(obj), node) );}); &obj-> node != (&con->head); obj = tmp, tmp = ({ const __typeof ( ((__typeof(tmp) )0)->node ) __mptr = (tmp->node.next ); (__typeof(tmp) )( (char )__mptr - __builtin_offsetof(__typeof (*tmp), node) );})) {
2202	if (!amdgpu_ras_is_supported(adev, obj->head.block)) {
2203	amdgpu_ras_feature_enable(adev, &obj->head, 0);
2204	/* there should be no any reference. */
2205	WARN_ON(alive_obj(obj))({ int __ret = !!(((obj)->use)); if (__ret) printf("WARNING %s failed at %s:%d\n" , "((obj)->use)", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_ras.c" , 2205); __builtin_expect(!!(__ret), 0); });
2206	}
2207	}
2208	}
2209
2210	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET(0x1 << 1)) {
2211	con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET(0x1 << 1);
2212	/* setup ras obj state as disabled.
2213	* for init_by_vbios case.
2214	* if we want to enable ras, just enable it in a normal way.
2215	* If we want do disable it, need setup ras obj as enabled,
2216	* then issue another TA disable cmd.
2217	* See feature_enable_on_boot
2218	*/
2219	amdgpu_ras_disable_all_features(adev, 1);
2220	amdgpu_ras_reset_gpu(adev);
2221	}
2222	}
2223
2224	void amdgpu_ras_suspend(struct amdgpu_device *adev)
2225	{
2226	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
2227
2228	if (!con)
2229	return;
2230
2231	amdgpu_ras_disable_all_features(adev, 0);
2232	/* Make sure all ras objects are disabled. */
2233	if (con->features)
2234	amdgpu_ras_disable_all_features(adev, 1);
2235	}
2236
2237	/* do some fini work before IP fini as dependence */
2238	int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
2239	{
2240	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
2241
2242	if (!con)
2243	return 0;
2244
2245	/* Need disable ras on all IPs here before ip [hw/sw]fini */
2246	amdgpu_ras_disable_all_features(adev, 0);
2247	amdgpu_ras_recovery_fini(adev);
2248	return 0;
2249	}
2250
2251	int amdgpu_ras_fini(struct amdgpu_device *adev)
2252	{
2253	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
2254
2255	if (!con)
2256	return 0;
2257
2258	amdgpu_ras_fs_fini(adev);
2259	amdgpu_ras_interrupt_remove_all(adev);
2260
2261	WARN(con->features, "Feature mask is not cleared")({ int __ret = !!(con->features); if (__ret) printf("Feature mask is not cleared" ); __builtin_expect(!!(__ret), 0); });
2262
2263	if (con->features)
2264	amdgpu_ras_disable_all_features(adev, 1);
2265
2266	amdgpu_ras_set_context(adev, NULL)((adev)->psp.ras.ras = (((void *)0)));
2267	kfree(con);
2268
2269	return 0;
2270	}
2271
2272	void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
2273	{
2274	uint32_t hw_supported, supported;
2275
2276	amdgpu_ras_check_supported(adev, &hw_supported, &supported);
2277	if (!hw_supported)
2278	return;
2279
2280	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1)__sync_val_compare_and_swap(&amdgpu_ras_in_intr, 0, 1) == 0) {
2281	dev_info(adev->dev, "uncorrectable hardware error"do { } while(0)
2282	"(ERREVENT_ATHUB_INTERRUPT) detected!\n")do { } while(0);
2283
2284	amdgpu_ras_reset_gpu(adev);
2285	}
2286	}
2287
2288	bool_Bool amdgpu_ras_need_emergency_restart(struct amdgpu_device *adev)
2289	{
2290	if (adev->asic_type == CHIP_VEGA20 &&
2291	adev->pm.fw_version <= 0x283400) {
2292	return !(amdgpu_asic_reset_method(adev)(adev)->asic_funcs->reset_method((adev)) == AMD_RESET_METHOD_BACO) &&
2293	amdgpu_ras_intr_triggered();
2294	}
2295
2296	return false0;
2297	}
2298
2299	bool_Bool amdgpu_ras_check_err_threshold(struct amdgpu_device *adev)
2300	{
2301	struct amdgpu_ras *con = amdgpu_ras_get_context(adev)((adev)->psp.ras.ras);
2302	bool_Bool exc_err_limit = false0;
2303
2304	if (con && (amdgpu_bad_page_threshold != 0))
2305	amdgpu_ras_eeprom_check_err_threshold(&con->eeprom_control,
2306	&exc_err_limit);
2307
2308	/*
2309	* We are only interested in variable exc_err_limit,
2310	* as it says if GPU is in bad state or not.
2311	*/
2312	return exc_err_limit;
2313	}