Bug Summary

File:dev/pci/drm/amd/amdgpu/amdgpu_xgmi.c
Warning:line 416, column 24
Value stored to 'request_adev' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name amdgpu_xgmi.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_xgmi.c
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 *
23 */
24#include <linux/list.h>
25#include "amdgpu.h"
26#include "amdgpu_xgmi.h"
27#include "amdgpu_smu.h"
28#include "amdgpu_ras.h"
29#include "soc15.h"
30#include "df/df_3_6_offset.h"
31#include "xgmi/xgmi_4_0_0_smn.h"
32#include "xgmi/xgmi_4_0_0_sh_mask.h"
33#include "wafl/wafl2_4_0_0_smn.h"
34#include "wafl/wafl2_4_0_0_sh_mask.h"
35
36static DEFINE_MUTEX(xgmi_mutex)struct rwlock xgmi_mutex = { 0, "xgmi_mutex" };
37
38#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE4 4
39
40static DRM_LIST_HEAD(xgmi_hive_list)struct list_head xgmi_hive_list = { &(xgmi_hive_list), &
(xgmi_hive_list) }
;
41
42static const int xgmi_pcs_err_status_reg_vg20[] = {
43 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210,
44 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x100000,
45};
46
47static const int wafl_pcs_err_status_reg_vg20[] = {
48 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210,
49 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210 + 0x100000,
50};
51
52static const int xgmi_pcs_err_status_reg_arct[] = {
53 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210,
54 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x100000,
55 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x500000,
56 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x600000,
57 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x700000,
58 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x800000,
59};
60
61/* same as vg20*/
62static const int wafl_pcs_err_status_reg_arct[] = {
63 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210,
64 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210 + 0x100000,
65};
66
67static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
68 {"XGMI PCS DataLossErr",
69 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)0x00000001L, 0x0},
70 {"XGMI PCS TrainingErr",
71 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TrainingErr)0x00000002L, 0x1},
72 {"XGMI PCS CRCErr",
73 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, CRCErr)0x00000020L, 0x5},
74 {"XGMI PCS BERExceededErr",
75 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, BERExceededErr)0x00000040L, 0x6},
76 {"XGMI PCS TxMetaDataErr",
77 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TxMetaDataErr)0x00000080L, 0x7},
78 {"XGMI PCS ReplayBufParityErr",
79 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayBufParityErr)0x00000100L, 0x8},
80 {"XGMI PCS DataParityErr",
81 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataParityErr)0x00000200L, 0x9},
82 {"XGMI PCS ReplayFifoOverflowErr",
83 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)0x00000400L, 0xa},
84 {"XGMI PCS ReplayFifoUnderflowErr",
85 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)0x00000800L, 0xb},
86 {"XGMI PCS ElasticFifoOverflowErr",
87 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)0x00001000L, 0xc},
88 {"XGMI PCS DeskewErr",
89 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DeskewErr)0x00002000L, 0xd},
90 {"XGMI PCS DataStartupLimitErr",
91 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataStartupLimitErr)0x00008000L, 0xf},
92 {"XGMI PCS FCInitTimeoutErr",
93 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, FCInitTimeoutErr)0x00010000L, 0x10},
94 {"XGMI PCS RecoveryTimeoutErr",
95 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryTimeoutErr)0x00020000L, 0x11},
96 {"XGMI PCS ReadySerialTimeoutErr",
97 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)0x00040000L, 0x12},
98 {"XGMI PCS ReadySerialAttemptErr",
99 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialAttemptErr)0x00080000L, 0x13},
100 {"XGMI PCS RecoveryAttemptErr",
101 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryAttemptErr)0x00100000L, 0x14},
102 {"XGMI PCS RecoveryRelockAttemptErr",
103 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)0x00200000L, 0x15},
104};
105
106static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
107 {"WAFL PCS DataLossErr",
108 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataLossErr)0x00000001L, 0x0},
109 {"WAFL PCS TrainingErr",
110 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TrainingErr)0x00000002L, 0x1},
111 {"WAFL PCS CRCErr",
112 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, CRCErr)0x00000020L, 0x5},
113 {"WAFL PCS BERExceededErr",
114 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, BERExceededErr)0x00000040L, 0x6},
115 {"WAFL PCS TxMetaDataErr",
116 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TxMetaDataErr)0x00000080L, 0x7},
117 {"WAFL PCS ReplayBufParityErr",
118 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayBufParityErr)0x00000100L, 0x8},
119 {"WAFL PCS DataParityErr",
120 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataParityErr)0x00000200L, 0x9},
121 {"WAFL PCS ReplayFifoOverflowErr",
122 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoOverflowErr)0x00000400L, 0xa},
123 {"WAFL PCS ReplayFifoUnderflowErr",
124 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)0x00000800L, 0xb},
125 {"WAFL PCS ElasticFifoOverflowErr",
126 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ElasticFifoOverflowErr)0x00001000L, 0xc},
127 {"WAFL PCS DeskewErr",
128 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DeskewErr)0x00002000L, 0xd},
129 {"WAFL PCS DataStartupLimitErr",
130 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataStartupLimitErr)0x00008000L, 0xf},
131 {"WAFL PCS FCInitTimeoutErr",
132 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, FCInitTimeoutErr)0x00010000L, 0x10},
133 {"WAFL PCS RecoveryTimeoutErr",
134 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryTimeoutErr)0x00020000L, 0x11},
135 {"WAFL PCS ReadySerialTimeoutErr",
136 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialTimeoutErr)0x00040000L, 0x12},
137 {"WAFL PCS ReadySerialAttemptErr",
138 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialAttemptErr)0x00080000L, 0x13},
139 {"WAFL PCS RecoveryAttemptErr",
140 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryAttemptErr)0x00100000L, 0x14},
141 {"WAFL PCS RecoveryRelockAttemptErr",
142 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)0x00200000L, 0x15},
143};
144
145/**
146 * DOC: AMDGPU XGMI Support
147 *
148 * XGMI is a high speed interconnect that joins multiple GPU cards
149 * into a homogeneous memory space that is organized by a collective
150 * hive ID and individual node IDs, both of which are 64-bit numbers.
151 *
152 * The file xgmi_device_id contains the unique per GPU device ID and
153 * is stored in the /sys/class/drm/card${cardno}/device/ directory.
154 *
155 * Inside the device directory a sub-directory 'xgmi_hive_info' is
156 * created which contains the hive ID and the list of nodes.
157 *
158 * The hive ID is stored in:
159 * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
160 *
161 * The node information is stored in numbered directories:
162 * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
163 *
164 * Each device has their own xgmi_hive_info direction with a mirror
165 * set of node sub-directories.
166 *
167 * The XGMI memory space is built by contiguously adding the power of
168 * two padded VRAM space from each node to each other.
169 *
170 */
171
172static struct attribute amdgpu_xgmi_hive_id = {
173 .name = "xgmi_hive_id",
174#ifdef notyet
175 .mode = S_IRUGO
176#endif
177};
178
179static struct attribute *amdgpu_xgmi_hive_attrs[] = {
180 &amdgpu_xgmi_hive_id,
181 NULL((void *)0)
182};
183
184static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
185 struct attribute *attr, char *buf)
186{
187 struct amdgpu_hive_info *hive = container_of(({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
188 kobj, struct amdgpu_hive_info, kobj)({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
;
189
190 if (attr == &amdgpu_xgmi_hive_id)
191 return snprintf(buf, PAGE_SIZE(1 << 12), "%llu\n", hive->hive_id);
192
193 return 0;
194}
195
196static void amdgpu_xgmi_hive_release(struct kobject *kobj)
197{
198 struct amdgpu_hive_info *hive = container_of(({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
199 kobj, struct amdgpu_hive_info, kobj)({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
;
200
201 mutex_destroy(&hive->hive_lock);
202 kfree(hive);
203}
204
205#ifdef notyet
206static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
207 .show = amdgpu_xgmi_show_attrs,
208};
209#endif
210
211struct kobj_type amdgpu_xgmi_hive_type = {
212 .release = amdgpu_xgmi_hive_release,
213#ifdef notyet
214 .sysfs_ops = &amdgpu_xgmi_hive_ops,
215 .default_attrs = amdgpu_xgmi_hive_attrs,
216#endif
217};
218
219static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
220 struct device_attribute *attr,
221 char *buf)
222{
223 struct drm_device *ddev = dev_get_drvdata(dev)((void *)0);
224 struct amdgpu_device *adev = drm_to_adev(ddev);
225
226 return snprintf(buf, PAGE_SIZE(1 << 12), "%llu\n", adev->gmc.xgmi.node_id);
227
228}
229
230#define AMDGPU_XGMI_SET_FICAA(o)((o) | 0x456801) ((o) | 0x456801)
231static ssize_t amdgpu_xgmi_show_error(struct device *dev,
232 struct device_attribute *attr,
233 char *buf)
234{
235 struct drm_device *ddev = dev_get_drvdata(dev)((void *)0);
236 struct amdgpu_device *adev = drm_to_adev(ddev);
237 uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in;
238 uint64_t fica_out;
239 unsigned int error_count = 0;
240
241 ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200)((0x200) | 0x456801);
242 ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208)((0x208) | 0x456801);
243
244 fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
245 if (fica_out != 0x1f)
246 pr_err("xGMI error counters not enabled!\n")printk("\0013" "amdgpu: " "xGMI error counters not enabled!\n"
)
;
247
248 fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
249
250 if ((fica_out & 0xffff) == 2)
251 error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
252
253 adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
254
255 return snprintf(buf, PAGE_SIZE(1 << 12), "%d\n", error_count);
256}
257
258
259static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL)struct device_attribute dev_attr_xgmi_device_id;
260static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL)struct device_attribute dev_attr_xgmi_error;
261
262static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
263 struct amdgpu_hive_info *hive)
264{
265 STUB()do { printf("%s: stub\n", __func__); } while(0);
266 return -ENOSYS78;
267#ifdef notyet
268 int ret = 0;
269 char node[10] = { 0 };
270
271 /* Create xgmi device id file */
272 ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id)0;
273 if (ret) {
274 dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to create device file xgmi_device_id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
275 return ret;
276 }
277
278 /* Create xgmi error file */
279 ret = device_create_file(adev->dev, &dev_attr_xgmi_error)0;
280 if (ret)
281 pr_err("failed to create xgmi_error\n")printk("\0013" "amdgpu: " "failed to create xgmi_error\n");
282
283
284 /* Create sysfs link to hive info folder on the first device */
285 if (hive->kobj.parent != (&adev->dev->kobj)) {
286 ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,0
287 "xgmi_hive_info")0;
288 if (ret) {
289 dev_err(adev->dev, "XGMI: Failed to create link to hive info")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to create link to hive info"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
290 goto remove_file;
291 }
292 }
293
294 snprintf(node, sizeof(node), "node%d", atomic_read(&hive->number_devices)({ typeof(*(&hive->number_devices)) __tmp = *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)); membar_datadep_consumer(); __tmp; })
);
295 /* Create sysfs link form the hive folder to yourself */
296 ret = sysfs_create_link(&hive->kobj, &adev->dev->kobj, node)0;
297 if (ret) {
298 dev_err(adev->dev, "XGMI: Failed to create link from hive info")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to create link from hive info"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
299 goto remove_link;
300 }
301
302 goto success;
303
304
305remove_link:
306 sysfs_remove_link(&adev->dev->kobj, adev_to_drm(adev)->unique);
307
308remove_file:
309 device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
310
311success:
312 return ret;
313#endif
314}
315
316static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
317 struct amdgpu_hive_info *hive)
318{
319#ifdef __linux__
320 char node[10];
321 memset(node, 0, sizeof(node))__builtin_memset((node), (0), (sizeof(node)));
322
323 device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
324 device_remove_file(adev->dev, &dev_attr_xgmi_error);
325
326 if (hive->kobj.parent != (&adev->dev->kobj))
327 sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
328
329 sprintf(node, "node%d", atomic_read(&hive->number_devices)({ typeof(*(&hive->number_devices)) __tmp = *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)); membar_datadep_consumer(); __tmp; })
);
330 sysfs_remove_link(&hive->kobj, node);
331#endif
332}
333
334
335
336struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
337{
338 struct amdgpu_hive_info *hive = NULL((void *)0), *tmp = NULL((void *)0);
339 int ret;
340
341 if (!adev->gmc.xgmi.hive_id)
342 return NULL((void *)0);
343
344 STUB()do { printf("%s: stub\n", __func__); } while(0);
345 return NULL((void *)0);
346#ifdef notyet
347
348 if (adev->hive) {
349 kobject_get(&adev->hive->kobj);
350 return adev->hive;
351 }
352
353 mutex_lock(&xgmi_mutex)rw_enter_write(&xgmi_mutex);
354
355 if (!list_empty(&xgmi_hive_list)) {
356 list_for_each_entry_safe(hive, tmp, &xgmi_hive_list, node)for (hive = ({ const __typeof( ((__typeof(*hive) *)0)->node
) *__mptr = ((&xgmi_hive_list)->next); (__typeof(*hive
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*hive), node
) );}), tmp = ({ const __typeof( ((__typeof(*hive) *)0)->node
) *__mptr = (hive->node.next); (__typeof(*hive) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*hive), node) );}); &
hive->node != (&xgmi_hive_list); hive = tmp, tmp = ({ const
__typeof( ((__typeof(*tmp) *)0)->node ) *__mptr = (tmp->
node.next); (__typeof(*tmp) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp), node) );}))
{
357 if (hive->hive_id == adev->gmc.xgmi.hive_id)
358 goto pro_end;
359 }
360 }
361
362 hive = kzalloc(sizeof(*hive), GFP_KERNEL(0x0001 | 0x0004));
363 if (!hive) {
364 dev_err(adev->dev, "XGMI: allocation failed\n")printf("drm:pid%d:%s *ERROR* " "XGMI: allocation failed\n", (
{struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
365 hive = NULL((void *)0);
366 goto pro_end;
367 }
368
369 /* initialize new hive if not exist */
370 ret = kobject_init_and_add(&hive->kobj,
371 &amdgpu_xgmi_hive_type,
372 &adev->dev->kobj,
373 "%s", "xgmi_hive_info");
374 if (ret) {
375 dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n")printf("drm:pid%d:%s *ERROR* " "XGMI: failed initializing kobject for xgmi hive\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
376 kobject_put(&hive->kobj);
377 kfree(hive);
378 hive = NULL((void *)0);
379 goto pro_end;
380 }
381
382 hive->hive_id = adev->gmc.xgmi.hive_id;
383 INIT_LIST_HEAD(&hive->device_list);
384 INIT_LIST_HEAD(&hive->node);
385 rw_init(&hive->hive_lock, "aghive")_rw_init_flags(&hive->hive_lock, "aghive", 0, ((void *
)0))
;
386 atomic_set(&hive->in_reset, 0)({ typeof(*(&hive->in_reset)) __tmp = ((0)); *(volatile
typeof(*(&hive->in_reset)) *)&(*(&hive->in_reset
)) = __tmp; __tmp; })
;
387 atomic_set(&hive->number_devices, 0)({ typeof(*(&hive->number_devices)) __tmp = ((0)); *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)) = __tmp; __tmp; })
;
388 task_barrier_init(&hive->tb);
389 hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
390 hive->hi_req_gpu = NULL((void *)0);
391 /*
392 * hive pstate on boot is high in vega20 so we have to go to low
393 * pstate on after boot.
394 */
395 hive->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE4;
396 list_add_tail(&hive->node, &xgmi_hive_list);
397
398pro_end:
399 if (hive)
400 kobject_get(&hive->kobj);
401 mutex_unlock(&xgmi_mutex)rw_exit_write(&xgmi_mutex);
402 return hive;
403#endif
404}
405
406void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive)
407{
408 if (hive)
409 kobject_put(&hive->kobj);
410}
411
412int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
413{
414 int ret = 0;
415 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
416 struct amdgpu_device *request_adev = hive->hi_req_gpu ?
Value stored to 'request_adev' during its initialization is never read
417 hive->hi_req_gpu : adev;
418 bool_Bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
419 bool_Bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
420
421 amdgpu_put_xgmi_hive(hive);
422 /* fw bug so temporarily disable pstate switching */
423 return 0;
424
425 if (!hive || adev->asic_type != CHIP_VEGA20)
426 return 0;
427
428 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
429
430 if (is_hi_req)
431 hive->hi_req_count++;
432 else
433 hive->hi_req_count--;
434
435 /*
436 * Vega20 only needs single peer to request pstate high for the hive to
437 * go high but all peers must request pstate low for the hive to go low
438 */
439 if (hive->pstate == pstate ||
440 (!is_hi_req && hive->hi_req_count && !init_low))
441 goto out;
442
443 dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate)do { } while(0);
444
445 ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate);
446 if (ret) {
447 dev_err(request_adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
448 "XGMI: Set pstate failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
449 request_adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
450 request_adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
;
451 goto out;
452 }
453
454 if (init_low)
455 hive->pstate = hive->hi_req_count ?
456 hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
457 else {
458 hive->pstate = pstate;
459 hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
460 adev : NULL((void *)0);
461 }
462out:
463 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
464 return ret;
465}
466
467int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
468{
469 int ret;
470
471 /* Each psp need to set the latest topology */
472 ret = psp_xgmi_set_topology_info(&adev->psp,
473 atomic_read(&hive->number_devices)({ typeof(*(&hive->number_devices)) __tmp = *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)); membar_datadep_consumer(); __tmp; })
,
474 &adev->psp.xgmi_context.top_info);
475 if (ret)
476 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
477 "XGMI: Set topology failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
478 adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
479 adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
;
480
481 return ret;
482}
483
484
485/*
486 * NOTE psp_xgmi_node_info.num_hops layout is as follows:
487 * num_hops[7:6] = link type (0 = xGMI2, 1 = xGMI3, 2/3 = reserved)
488 * num_hops[5:3] = reserved
489 * num_hops[2:0] = number of hops
490 */
491int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
492 struct amdgpu_device *peer_adev)
493{
494 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
495 uint8_t num_hops_mask = 0x7;
496 int i;
497
498 for (i = 0 ; i < top->num_nodes; ++i)
499 if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
500 return top->nodes[i].num_hops & num_hops_mask;
501 return -EINVAL22;
502}
503
504int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
505{
506 struct psp_xgmi_topology_info *top_info;
507 struct amdgpu_hive_info *hive;
508 struct amdgpu_xgmi *entry;
509 struct amdgpu_device *tmp_adev = NULL((void *)0);
510
511 int count = 0, ret = 0;
512
513 if (!adev->gmc.xgmi.supported)
514 return 0;
515
516 if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
517 ret = psp_xgmi_initialize(&adev->psp);
518 if (ret) {
519 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
520 "XGMI: Failed to initialize xgmi session\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
521 return ret;
522 }
523
524 ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
525 if (ret) {
526 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get hive id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
527 "XGMI: Failed to get hive id\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get hive id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
528 return ret;
529 }
530
531 ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
532 if (ret) {
533 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get node id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
534 "XGMI: Failed to get node id\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get node id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
535 return ret;
536 }
537 } else {
538 adev->gmc.xgmi.hive_id = 16;
539 adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
540 }
541
542 hive = amdgpu_get_xgmi_hive(adev);
543 if (!hive) {
544 ret = -EINVAL22;
545 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)
546 "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",printf("drm:pid%d:%s *ERROR* " "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)
547 adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)printf("drm:pid%d:%s *ERROR* " "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)
;
548 goto exit;
549 }
550 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
551
552 top_info = &adev->psp.xgmi_context.top_info;
553
554 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
555 list_for_each_entry(entry, &hive->device_list, head)for (entry = ({ const __typeof( ((__typeof(*entry) *)0)->head
) *__mptr = ((&hive->device_list)->next); (__typeof
(*entry) *)( (char *)__mptr - __builtin_offsetof(__typeof(*entry
), head) );}); &entry->head != (&hive->device_list
); entry = ({ const __typeof( ((__typeof(*entry) *)0)->head
) *__mptr = (entry->head.next); (__typeof(*entry) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*entry), head) );}))
556 top_info->nodes[count++].node_id = entry->node_id;
557 top_info->num_nodes = count;
558 atomic_set(&hive->number_devices, count)({ typeof(*(&hive->number_devices)) __tmp = ((count));
*(volatile typeof(*(&hive->number_devices)) *)&(*
(&hive->number_devices)) = __tmp; __tmp; })
;
559
560 task_barrier_add_task(&hive->tb);
561
562 if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
563 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
564 /* update node list for other device in the hive */
565 if (tmp_adev != adev) {
566 top_info = &tmp_adev->psp.xgmi_context.top_info;
567 top_info->nodes[count - 1].node_id =
568 adev->gmc.xgmi.node_id;
569 top_info->num_nodes = count;
570 }
571 ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
572 if (ret)
573 goto exit_unlock;
574 }
575
576 /* get latest topology info for each device from psp */
577 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
578 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
579 &tmp_adev->psp.xgmi_context.top_info);
580 if (ret) {
581 dev_err(tmp_adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
582 "XGMI: Get topology failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
583 tmp_adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
584 tmp_adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
;
585 /* To do : continue with some node failed or disable the whole hive */
586 goto exit_unlock;
587 }
588 }
589 }
590
591 if (!ret)
592 ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
593
594exit_unlock:
595 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
596exit:
597 if (!ret) {
598 adev->hive = hive;
599 dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",do { } while(0)
600 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id)do { } while(0);
601 } else {
602 amdgpu_put_xgmi_hive(hive);
603 dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret)
604 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret)
605 ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret)
;
606 }
607
608 return ret;
609}
610
611int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
612{
613 struct amdgpu_hive_info *hive = adev->hive;
614
615 if (!adev->gmc.xgmi.supported)
616 return -EINVAL22;
617
618 if (!hive)
619 return -EINVAL22;
620
621 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
622 task_barrier_rem_task(&hive->tb);
623 amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
624 if (hive->hi_req_gpu == adev)
625 hive->hi_req_gpu = NULL((void *)0);
626 list_del(&adev->gmc.xgmi.head);
627 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
628
629 amdgpu_put_xgmi_hive(hive);
630 adev->hive = NULL((void *)0);
631
632 if (atomic_dec_return(&hive->number_devices)__sync_sub_and_fetch((&hive->number_devices), 1) == 0) {
633 /* Remove the hive from global hive list */
634 mutex_lock(&xgmi_mutex)rw_enter_write(&xgmi_mutex);
635 list_del(&hive->node);
636 mutex_unlock(&xgmi_mutex)rw_exit_write(&xgmi_mutex);
637
638 amdgpu_put_xgmi_hive(hive);
639 }
640
641 return psp_xgmi_terminate(&adev->psp);
642}
643
644int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
645{
646 int r;
647 struct ras_ih_if ih_info = {
648 .cb = NULL((void *)0),
649 };
650 struct ras_fs_if fs_info = {
651 .sysfs_name = "xgmi_wafl_err_count",
652 };
653
654 if (!adev->gmc.xgmi.supported ||
655 adev->gmc.xgmi.num_physical_nodes == 0)
656 return 0;
657
658 amdgpu_xgmi_reset_ras_error_count(adev);
659
660 if (!adev->gmc.xgmi.ras_if) {
661 adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL(0x0001 | 0x0004));
662 if (!adev->gmc.xgmi.ras_if)
663 return -ENOMEM12;
664 adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
665 adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
666 adev->gmc.xgmi.ras_if->sub_block_index = 0;
667 strlcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl", sizeof(adev->gmc.xgmi.ras_if->name));
668 }
669 ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
670 r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
671 &fs_info, &ih_info);
672 if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
673 kfree(adev->gmc.xgmi.ras_if);
674 adev->gmc.xgmi.ras_if = NULL((void *)0);
675 }
676
677 return r;
678}
679
680void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
681{
682 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
683 adev->gmc.xgmi.ras_if) {
684 struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if;
685 struct ras_ih_if ih_info = {
686 .cb = NULL((void *)0),
687 };
688
689 amdgpu_ras_late_fini(adev, ras_if, &ih_info);
690 kfree(ras_if);
691 }
692}
693
694uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
695 uint64_t addr)
696{
697 struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi;
698 return (addr + xgmi->physical_node_id * xgmi->node_segment_size);
699}
700
701static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg)
702{
703 WREG32_PCIE(pcs_status_reg, 0xFFFFFFFF)adev->pcie_wreg(adev, (pcs_status_reg), (0xFFFFFFFF));
704 WREG32_PCIE(pcs_status_reg, 0)adev->pcie_wreg(adev, (pcs_status_reg), (0));
705}
706
707void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
708{
709 uint32_t i;
710
711 switch (adev->asic_type) {
712 case CHIP_ARCTURUS:
713 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct)(sizeof((xgmi_pcs_err_status_reg_arct)) / sizeof((xgmi_pcs_err_status_reg_arct
)[0]))
; i++)
714 pcs_clear_status(adev,
715 xgmi_pcs_err_status_reg_arct[i]);
716 break;
717 case CHIP_VEGA20:
718 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20)(sizeof((xgmi_pcs_err_status_reg_vg20)) / sizeof((xgmi_pcs_err_status_reg_vg20
)[0]))
; i++)
719 pcs_clear_status(adev,
720 xgmi_pcs_err_status_reg_vg20[i]);
721 break;
722 default:
723 break;
724 }
725}
726
727static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
728 uint32_t value,
729 uint32_t *ue_count,
730 uint32_t *ce_count,
731 bool_Bool is_xgmi_pcs)
732{
733 int i;
734 int ue_cnt;
735
736 if (is_xgmi_pcs) {
737 /* query xgmi pcs error status,
738 * only ue is supported */
739 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_ras_fields)(sizeof((xgmi_pcs_ras_fields)) / sizeof((xgmi_pcs_ras_fields)
[0]))
; i ++) {
740 ue_cnt = (value &
741 xgmi_pcs_ras_fields[i].pcs_err_mask) >>
742 xgmi_pcs_ras_fields[i].pcs_err_shift;
743 if (ue_cnt) {
744 dev_info(adev->dev, "%s detected\n",do { } while(0)
745 xgmi_pcs_ras_fields[i].err_name)do { } while(0);
746 *ue_count += ue_cnt;
747 }
748 }
749 } else {
750 /* query wafl pcs error status,
751 * only ue is supported */
752 for (i = 0; i < ARRAY_SIZE(wafl_pcs_ras_fields)(sizeof((wafl_pcs_ras_fields)) / sizeof((wafl_pcs_ras_fields)
[0]))
; i++) {
753 ue_cnt = (value &
754 wafl_pcs_ras_fields[i].pcs_err_mask) >>
755 wafl_pcs_ras_fields[i].pcs_err_shift;
756 if (ue_cnt) {
757 dev_info(adev->dev, "%s detected\n",do { } while(0)
758 wafl_pcs_ras_fields[i].err_name)do { } while(0);
759 *ue_count += ue_cnt;
760 }
761 }
762 }
763
764 return 0;
765}
766
767int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
768 void *ras_error_status)
769{
770 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
771 int i;
772 uint32_t data;
773 uint32_t ue_cnt = 0, ce_cnt = 0;
774
775 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
776 return -EINVAL22;
777
778 err_data->ue_count = 0;
779 err_data->ce_count = 0;
780
781 switch (adev->asic_type) {
782 case CHIP_ARCTURUS:
783 /* check xgmi pcs error */
784 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct)(sizeof((xgmi_pcs_err_status_reg_arct)) / sizeof((xgmi_pcs_err_status_reg_arct
)[0]))
; i++) {
785 data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i])adev->pcie_rreg(adev, (xgmi_pcs_err_status_reg_arct[i]));
786 if (data)
787 amdgpu_xgmi_query_pcs_error_status(adev,
788 data, &ue_cnt, &ce_cnt, true1);
789 }
790 /* check wafl pcs error */
791 for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct)(sizeof((wafl_pcs_err_status_reg_arct)) / sizeof((wafl_pcs_err_status_reg_arct
)[0]))
; i++) {
792 data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i])adev->pcie_rreg(adev, (wafl_pcs_err_status_reg_arct[i]));
793 if (data)
794 amdgpu_xgmi_query_pcs_error_status(adev,
795 data, &ue_cnt, &ce_cnt, false0);
796 }
797 break;
798 case CHIP_VEGA20:
799 default:
800 /* check xgmi pcs error */
801 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20)(sizeof((xgmi_pcs_err_status_reg_vg20)) / sizeof((xgmi_pcs_err_status_reg_vg20
)[0]))
; i++) {
802 data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i])adev->pcie_rreg(adev, (xgmi_pcs_err_status_reg_vg20[i]));
803 if (data)
804 amdgpu_xgmi_query_pcs_error_status(adev,
805 data, &ue_cnt, &ce_cnt, true1);
806 }
807 /* check wafl pcs error */
808 for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20)(sizeof((wafl_pcs_err_status_reg_vg20)) / sizeof((wafl_pcs_err_status_reg_vg20
)[0]))
; i++) {
809 data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i])adev->pcie_rreg(adev, (wafl_pcs_err_status_reg_vg20[i]));
810 if (data)
811 amdgpu_xgmi_query_pcs_error_status(adev,
812 data, &ue_cnt, &ce_cnt, false0);
813 }
814 break;
815 }
816
817 amdgpu_xgmi_reset_ras_error_count(adev);
818
819 err_data->ue_count += ue_cnt;
820 err_data->ce_count += ce_cnt;
821
822 return 0;
823}