Bug Summary

File:dev/pci/drm/amd/amdgpu/amdgpu_xgmi.c
Warning:line 479, column 2
Value stored to 'init_low' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name amdgpu_xgmi.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_xgmi.c
1/*
2 * Copyright 2018 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 *
23 */
24#include <linux/list.h>
25#include "amdgpu.h"
26#include "amdgpu_xgmi.h"
27#include "amdgpu_ras.h"
28#include "soc15.h"
29#include "df/df_3_6_offset.h"
30#include "xgmi/xgmi_4_0_0_smn.h"
31#include "xgmi/xgmi_4_0_0_sh_mask.h"
32#include "wafl/wafl2_4_0_0_smn.h"
33#include "wafl/wafl2_4_0_0_sh_mask.h"
34
35#include "amdgpu_reset.h"
36
37#define smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c 0x11a0020c
38#define smnPCS_GOPX1_PCS_ERROR_STATUS0x12200210 0x12200210
39
40static DEFINE_MUTEX(xgmi_mutex)struct rwlock xgmi_mutex = { 0, "xgmi_mutex" };
41
42#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE4 4
43
44static DRM_LIST_HEAD(xgmi_hive_list)struct list_head xgmi_hive_list = { &(xgmi_hive_list), &
(xgmi_hive_list) }
;
45
46static const int xgmi_pcs_err_status_reg_vg20[] = {
47 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210,
48 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x100000,
49};
50
51static const int wafl_pcs_err_status_reg_vg20[] = {
52 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210,
53 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210 + 0x100000,
54};
55
56static const int xgmi_pcs_err_status_reg_arct[] = {
57 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210,
58 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x100000,
59 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x500000,
60 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x600000,
61 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x700000,
62 smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS0x11af0210 + 0x800000,
63};
64
65/* same as vg20*/
66static const int wafl_pcs_err_status_reg_arct[] = {
67 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210,
68 smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS0x11cf0210 + 0x100000,
69};
70
71static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = {
72 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c,
73 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x100000,
74 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x200000,
75 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x300000,
76 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x400000,
77 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x500000,
78 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x600000,
79 smnPCS_XGMI3X16_PCS_ERROR_STATUS0x11a0020c + 0x700000
80};
81
82static const int walf_pcs_err_status_reg_aldebaran[] = {
83 smnPCS_GOPX1_PCS_ERROR_STATUS0x12200210,
84 smnPCS_GOPX1_PCS_ERROR_STATUS0x12200210 + 0x100000
85};
86
87static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = {
88 {"XGMI PCS DataLossErr",
89 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)0x00000001L, 0x0},
90 {"XGMI PCS TrainingErr",
91 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TrainingErr)0x00000002L, 0x1},
92 {"XGMI PCS CRCErr",
93 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, CRCErr)0x00000020L, 0x5},
94 {"XGMI PCS BERExceededErr",
95 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, BERExceededErr)0x00000040L, 0x6},
96 {"XGMI PCS TxMetaDataErr",
97 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, TxMetaDataErr)0x00000080L, 0x7},
98 {"XGMI PCS ReplayBufParityErr",
99 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayBufParityErr)0x00000100L, 0x8},
100 {"XGMI PCS DataParityErr",
101 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataParityErr)0x00000200L, 0x9},
102 {"XGMI PCS ReplayFifoOverflowErr",
103 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)0x00000400L, 0xa},
104 {"XGMI PCS ReplayFifoUnderflowErr",
105 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)0x00000800L, 0xb},
106 {"XGMI PCS ElasticFifoOverflowErr",
107 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)0x00001000L, 0xc},
108 {"XGMI PCS DeskewErr",
109 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DeskewErr)0x00002000L, 0xd},
110 {"XGMI PCS DataStartupLimitErr",
111 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataStartupLimitErr)0x00008000L, 0xf},
112 {"XGMI PCS FCInitTimeoutErr",
113 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, FCInitTimeoutErr)0x00010000L, 0x10},
114 {"XGMI PCS RecoveryTimeoutErr",
115 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryTimeoutErr)0x00020000L, 0x11},
116 {"XGMI PCS ReadySerialTimeoutErr",
117 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)0x00040000L, 0x12},
118 {"XGMI PCS ReadySerialAttemptErr",
119 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, ReadySerialAttemptErr)0x00080000L, 0x13},
120 {"XGMI PCS RecoveryAttemptErr",
121 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryAttemptErr)0x00100000L, 0x14},
122 {"XGMI PCS RecoveryRelockAttemptErr",
123 SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)0x00200000L, 0x15},
124};
125
126static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
127 {"WAFL PCS DataLossErr",
128 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataLossErr)0x00000001L, 0x0},
129 {"WAFL PCS TrainingErr",
130 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TrainingErr)0x00000002L, 0x1},
131 {"WAFL PCS CRCErr",
132 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, CRCErr)0x00000020L, 0x5},
133 {"WAFL PCS BERExceededErr",
134 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, BERExceededErr)0x00000040L, 0x6},
135 {"WAFL PCS TxMetaDataErr",
136 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, TxMetaDataErr)0x00000080L, 0x7},
137 {"WAFL PCS ReplayBufParityErr",
138 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayBufParityErr)0x00000100L, 0x8},
139 {"WAFL PCS DataParityErr",
140 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataParityErr)0x00000200L, 0x9},
141 {"WAFL PCS ReplayFifoOverflowErr",
142 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoOverflowErr)0x00000400L, 0xa},
143 {"WAFL PCS ReplayFifoUnderflowErr",
144 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)0x00000800L, 0xb},
145 {"WAFL PCS ElasticFifoOverflowErr",
146 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ElasticFifoOverflowErr)0x00001000L, 0xc},
147 {"WAFL PCS DeskewErr",
148 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DeskewErr)0x00002000L, 0xd},
149 {"WAFL PCS DataStartupLimitErr",
150 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, DataStartupLimitErr)0x00008000L, 0xf},
151 {"WAFL PCS FCInitTimeoutErr",
152 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, FCInitTimeoutErr)0x00010000L, 0x10},
153 {"WAFL PCS RecoveryTimeoutErr",
154 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryTimeoutErr)0x00020000L, 0x11},
155 {"WAFL PCS ReadySerialTimeoutErr",
156 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialTimeoutErr)0x00040000L, 0x12},
157 {"WAFL PCS ReadySerialAttemptErr",
158 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, ReadySerialAttemptErr)0x00080000L, 0x13},
159 {"WAFL PCS RecoveryAttemptErr",
160 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryAttemptErr)0x00100000L, 0x14},
161 {"WAFL PCS RecoveryRelockAttemptErr",
162 SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)0x00200000L, 0x15},
163};
164
165/**
166 * DOC: AMDGPU XGMI Support
167 *
168 * XGMI is a high speed interconnect that joins multiple GPU cards
169 * into a homogeneous memory space that is organized by a collective
170 * hive ID and individual node IDs, both of which are 64-bit numbers.
171 *
172 * The file xgmi_device_id contains the unique per GPU device ID and
173 * is stored in the /sys/class/drm/card${cardno}/device/ directory.
174 *
175 * Inside the device directory a sub-directory 'xgmi_hive_info' is
176 * created which contains the hive ID and the list of nodes.
177 *
178 * The hive ID is stored in:
179 * /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
180 *
181 * The node information is stored in numbered directories:
182 * /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
183 *
184 * Each device has their own xgmi_hive_info direction with a mirror
185 * set of node sub-directories.
186 *
187 * The XGMI memory space is built by contiguously adding the power of
188 * two padded VRAM space from each node to each other.
189 *
190 */
191
192static struct attribute amdgpu_xgmi_hive_id = {
193 .name = "xgmi_hive_id",
194#ifdef notyet
195 .mode = S_IRUGO
196#endif
197};
198
199static struct attribute *amdgpu_xgmi_hive_attrs[] = {
200 &amdgpu_xgmi_hive_id,
201 NULL((void *)0)
202};
203ATTRIBUTE_GROUPS(amdgpu_xgmi_hive);
204
205static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
206 struct attribute *attr, char *buf)
207{
208 struct amdgpu_hive_info *hive = container_of(({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
209 kobj, struct amdgpu_hive_info, kobj)({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
;
210
211 if (attr == &amdgpu_xgmi_hive_id)
212 return snprintf(buf, PAGE_SIZE(1 << 12), "%llu\n", hive->hive_id);
213
214 return 0;
215}
216
217static void amdgpu_xgmi_hive_release(struct kobject *kobj)
218{
219 struct amdgpu_hive_info *hive = container_of(({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
220 kobj, struct amdgpu_hive_info, kobj)({ const __typeof( ((struct amdgpu_hive_info *)0)->kobj ) *
__mptr = (kobj); (struct amdgpu_hive_info *)( (char *)__mptr -
__builtin_offsetof(struct amdgpu_hive_info, kobj) );})
;
221
222 amdgpu_reset_put_reset_domain(hive->reset_domain);
223 hive->reset_domain = NULL((void *)0);
224
225 mutex_destroy(&hive->hive_lock);
226 kfree(hive);
227}
228
229#ifdef notyet
230static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
231 .show = amdgpu_xgmi_show_attrs,
232};
233#endif
234
235struct kobj_type amdgpu_xgmi_hive_type = {
236 .release = amdgpu_xgmi_hive_release,
237#ifdef notyet
238 .sysfs_ops = &amdgpu_xgmi_hive_ops,
239 .default_groups = amdgpu_xgmi_hive_groups,
240#endif
241};
242
243static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
244 struct device_attribute *attr,
245 char *buf)
246{
247 struct drm_device *ddev = dev_get_drvdata(dev);
248 struct amdgpu_device *adev = drm_to_adev(ddev);
249
250 return sysfs_emit(buf, "%llu\n", adev->gmc.xgmi.node_id);
251
252}
253
254#define AMDGPU_XGMI_SET_FICAA(o)((o) | 0x456801) ((o) | 0x456801)
255static ssize_t amdgpu_xgmi_show_error(struct device *dev,
256 struct device_attribute *attr,
257 char *buf)
258{
259 struct drm_device *ddev = dev_get_drvdata(dev);
260 struct amdgpu_device *adev = drm_to_adev(ddev);
261 uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in;
262 uint64_t fica_out;
263 unsigned int error_count = 0;
264
265 ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200)((0x200) | 0x456801);
266 ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208)((0x208) | 0x456801);
267
268 if ((!adev->df.funcs) ||
269 (!adev->df.funcs->get_fica) ||
270 (!adev->df.funcs->set_fica))
271 return -EINVAL22;
272
273 fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_ctl_in);
274 if (fica_out != 0x1f)
275 pr_err("xGMI error counters not enabled!\n")printk("\0013" "amdgpu: " "xGMI error counters not enabled!\n"
)
;
276
277 fica_out = adev->df.funcs->get_fica(adev, ficaa_pie_status_in);
278
279 if ((fica_out & 0xffff) == 2)
280 error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63);
281
282 adev->df.funcs->set_fica(adev, ficaa_pie_status_in, 0, 0);
283
284 return sysfs_emit(buf, "%u\n", error_count);
285}
286
287
288static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL)struct device_attribute dev_attr_xgmi_device_id;
289static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL)struct device_attribute dev_attr_xgmi_error;
290
291static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
292 struct amdgpu_hive_info *hive)
293{
294 STUB()do { printf("%s: stub\n", __func__); } while(0);
295 return -ENOSYS78;
296#ifdef notyet
297 int ret = 0;
298 char node[10] = { 0 };
299
300 /* Create xgmi device id file */
301 ret = device_create_file(adev->dev, &dev_attr_xgmi_device_id)0;
302 if (ret) {
303 dev_err(adev->dev, "XGMI: Failed to create device file xgmi_device_id\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to create device file xgmi_device_id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
304 return ret;
305 }
306
307 /* Create xgmi error file */
308 ret = device_create_file(adev->dev, &dev_attr_xgmi_error)0;
309 if (ret)
310 pr_err("failed to create xgmi_error\n")printk("\0013" "amdgpu: " "failed to create xgmi_error\n");
311
312
313 /* Create sysfs link to hive info folder on the first device */
314 if (hive->kobj.parent != (&adev->dev->kobj)) {
315 ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,0
316 "xgmi_hive_info")0;
317 if (ret) {
318 dev_err(adev->dev, "XGMI: Failed to create link to hive info")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to create link to hive info"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
319 goto remove_file;
320 }
321 }
322
323 snprintf(node, sizeof(node), "node%d", atomic_read(&hive->number_devices)({ typeof(*(&hive->number_devices)) __tmp = *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)); membar_datadep_consumer(); __tmp; })
);
324 /* Create sysfs link form the hive folder to yourself */
325 ret = sysfs_create_link(&hive->kobj, &adev->dev->kobj, node)0;
326 if (ret) {
327 dev_err(adev->dev, "XGMI: Failed to create link from hive info")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to create link from hive info"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
328 goto remove_link;
329 }
330
331 goto success;
332
333
334remove_link:
335 sysfs_remove_link(&adev->dev->kobj, adev_to_drm(adev)->unique);
336
337remove_file:
338 device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
339
340success:
341 return ret;
342#endif
343}
344
345static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
346 struct amdgpu_hive_info *hive)
347{
348#ifdef __linux__
349 char node[10];
350 memset(node, 0, sizeof(node))__builtin_memset((node), (0), (sizeof(node)));
351
352 device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
353 device_remove_file(adev->dev, &dev_attr_xgmi_error);
354
355 if (hive->kobj.parent != (&adev->dev->kobj))
356 sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
357
358 sprintf(node, "node%d", atomic_read(&hive->number_devices)({ typeof(*(&hive->number_devices)) __tmp = *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)); membar_datadep_consumer(); __tmp; })
);
359 sysfs_remove_link(&hive->kobj, node);
360#endif
361}
362
363
364
365struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
366{
367 struct amdgpu_hive_info *hive = NULL((void *)0);
368 int ret;
369
370 if (!adev->gmc.xgmi.hive_id)
371 return NULL((void *)0);
372
373 STUB()do { printf("%s: stub\n", __func__); } while(0);
374 return NULL((void *)0);
375#ifdef notyet
376
377 if (adev->hive) {
378 kobject_get(&adev->hive->kobj);
379 return adev->hive;
380 }
381
382 mutex_lock(&xgmi_mutex)rw_enter_write(&xgmi_mutex);
383
384 list_for_each_entry(hive, &xgmi_hive_list, node)for (hive = ({ const __typeof( ((__typeof(*hive) *)0)->node
) *__mptr = ((&xgmi_hive_list)->next); (__typeof(*hive
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*hive), node
) );}); &hive->node != (&xgmi_hive_list); hive = (
{ const __typeof( ((__typeof(*hive) *)0)->node ) *__mptr =
(hive->node.next); (__typeof(*hive) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*hive), node) );}))
{
385 if (hive->hive_id == adev->gmc.xgmi.hive_id)
386 goto pro_end;
387 }
388
389 hive = kzalloc(sizeof(*hive), GFP_KERNEL(0x0001 | 0x0004));
390 if (!hive) {
391 dev_err(adev->dev, "XGMI: allocation failed\n")printf("drm:pid%d:%s *ERROR* " "XGMI: allocation failed\n", (
{struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
392 hive = NULL((void *)0);
393 goto pro_end;
394 }
395
396 /* initialize new hive if not exist */
397 ret = kobject_init_and_add(&hive->kobj,
398 &amdgpu_xgmi_hive_type,
399 &adev->dev->kobj,
400 "%s", "xgmi_hive_info");
401 if (ret) {
402 dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n")printf("drm:pid%d:%s *ERROR* " "XGMI: failed initializing kobject for xgmi hive\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
403 kobject_put(&hive->kobj);
404 hive = NULL((void *)0);
405 goto pro_end;
406 }
407
408 /**
409 * Only init hive->reset_domain for none SRIOV configuration. For SRIOV,
410 * Host driver decide how to reset the GPU either through FLR or chain reset.
411 * Guest side will get individual notifications from the host for the FLR
412 * if necessary.
413 */
414 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
415 /**
416 * Avoid recreating reset domain when hive is reconstructed for the case
417 * of reset the devices in the XGMI hive during probe for passthrough GPU
418 * See https://www.spinics.net/lists/amd-gfx/msg58836.html
419 */
420 if (adev->reset_domain->type != XGMI_HIVE) {
421 hive->reset_domain =
422 amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive");
423 if (!hive->reset_domain) {
424 dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n")printf("drm:pid%d:%s *ERROR* " "XGMI: failed initializing reset domain for xgmi hive\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
425 ret = -ENOMEM12;
426 kobject_put(&hive->kobj);
427 hive = NULL((void *)0);
428 goto pro_end;
429 }
430 } else {
431 amdgpu_reset_get_reset_domain(adev->reset_domain);
432 hive->reset_domain = adev->reset_domain;
433 }
434 }
435
436 hive->hive_id = adev->gmc.xgmi.hive_id;
437 INIT_LIST_HEAD(&hive->device_list);
438 INIT_LIST_HEAD(&hive->node);
439 rw_init(&hive->hive_lock, "aghive")_rw_init_flags(&hive->hive_lock, "aghive", 0, ((void *
)0))
;
440 atomic_set(&hive->number_devices, 0)({ typeof(*(&hive->number_devices)) __tmp = ((0)); *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)) = __tmp; __tmp; })
;
441 task_barrier_init(&hive->tb);
442 hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
443 hive->hi_req_gpu = NULL((void *)0);
444
445 /*
446 * hive pstate on boot is high in vega20 so we have to go to low
447 * pstate on after boot.
448 */
449 hive->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE4;
450 list_add_tail(&hive->node, &xgmi_hive_list);
451
452pro_end:
453 if (hive)
454 kobject_get(&hive->kobj);
455 mutex_unlock(&xgmi_mutex)rw_exit_write(&xgmi_mutex);
456 return hive;
457#endif
458}
459
460void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive)
461{
462 if (hive)
463 kobject_put(&hive->kobj);
464}
465
466int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
467{
468 int ret = 0;
469 struct amdgpu_hive_info *hive;
470 struct amdgpu_device *request_adev;
471 bool_Bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
472 bool_Bool init_low;
473
474 hive = amdgpu_get_xgmi_hive(adev);
475 if (!hive)
476 return 0;
477
478 request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev;
479 init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
Value stored to 'init_low' is never read
480 amdgpu_put_xgmi_hive(hive);
481 /* fw bug so temporarily disable pstate switching */
482 return 0;
483
484 if (!hive || adev->asic_type != CHIP_VEGA20)
485 return 0;
486
487 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
488
489 if (is_hi_req)
490 hive->hi_req_count++;
491 else
492 hive->hi_req_count--;
493
494 /*
495 * Vega20 only needs single peer to request pstate high for the hive to
496 * go high but all peers must request pstate low for the hive to go low
497 */
498 if (hive->pstate == pstate ||
499 (!is_hi_req && hive->hi_req_count && !init_low))
500 goto out;
501
502 dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate)do { } while(0);
503
504 ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate);
505 if (ret) {
506 dev_err(request_adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
507 "XGMI: Set pstate failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
508 request_adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
509 request_adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Set pstate failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , request_adev
->gmc.xgmi.node_id, request_adev->gmc.xgmi.hive_id, ret
)
;
510 goto out;
511 }
512
513 if (init_low)
514 hive->pstate = hive->hi_req_count ?
515 hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
516 else {
517 hive->pstate = pstate;
518 hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
519 adev : NULL((void *)0);
520 }
521out:
522 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
523 return ret;
524}
525
526int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
527{
528 int ret;
529
530 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
531 return 0;
532
533 /* Each psp need to set the latest topology */
534 ret = psp_xgmi_set_topology_info(&adev->psp,
535 atomic_read(&hive->number_devices)({ typeof(*(&hive->number_devices)) __tmp = *(volatile
typeof(*(&hive->number_devices)) *)&(*(&hive->
number_devices)); membar_datadep_consumer(); __tmp; })
,
536 &adev->psp.xgmi_context.top_info);
537 if (ret)
538 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
539 "XGMI: Set topology failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
540 adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
541 adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Set topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret)
;
542
543 return ret;
544}
545
546
547/*
548 * NOTE psp_xgmi_node_info.num_hops layout is as follows:
549 * num_hops[7:6] = link type (0 = xGMI2, 1 = xGMI3, 2/3 = reserved)
550 * num_hops[5:3] = reserved
551 * num_hops[2:0] = number of hops
552 */
553int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
554 struct amdgpu_device *peer_adev)
555{
556 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
557 uint8_t num_hops_mask = 0x7;
558 int i;
559
560 for (i = 0 ; i < top->num_nodes; ++i)
561 if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
562 return top->nodes[i].num_hops & num_hops_mask;
563 return -EINVAL22;
564}
565
566int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev,
567 struct amdgpu_device *peer_adev)
568{
569 struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
570 int i;
571
572 for (i = 0 ; i < top->num_nodes; ++i)
573 if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
574 return top->nodes[i].num_links;
575 return -EINVAL22;
576}
577
578/*
579 * Devices that support extended data require the entire hive to initialize with
580 * the shared memory buffer flag set.
581 *
582 * Hive locks and conditions apply - see amdgpu_xgmi_add_device
583 */
584static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_info *hive,
585 bool_Bool set_extended_data)
586{
587 struct amdgpu_device *tmp_adev;
588 int ret;
589
590 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
591 ret = psp_xgmi_initialize(&tmp_adev->psp, set_extended_data, false0);
592 if (ret) {
593 dev_err(tmp_adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session for data partition %i\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , set_extended_data
)
594 "XGMI: Failed to initialize xgmi session for data partition %i\n",printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session for data partition %i\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , set_extended_data
)
595 set_extended_data)printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session for data partition %i\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , set_extended_data
)
;
596 return ret;
597 }
598
599 }
600
601 return 0;
602}
603
604int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
605{
606 struct psp_xgmi_topology_info *top_info;
607 struct amdgpu_hive_info *hive;
608 struct amdgpu_xgmi *entry;
609 struct amdgpu_device *tmp_adev = NULL((void *)0);
610
611 int count = 0, ret = 0;
612
613 if (!adev->gmc.xgmi.supported)
614 return 0;
615
616 if (!adev->gmc.xgmi.pending_reset &&
617 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
618 ret = psp_xgmi_initialize(&adev->psp, false0, true1);
619 if (ret) {
620 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
621 "XGMI: Failed to initialize xgmi session\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to initialize xgmi session\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
622 return ret;
623 }
624
625 ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
626 if (ret) {
627 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get hive id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
628 "XGMI: Failed to get hive id\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get hive id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
629 return ret;
630 }
631
632 ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
633 if (ret) {
634 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get node id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
635 "XGMI: Failed to get node id\n")printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to get node id\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
636 return ret;
637 }
638 } else {
639 adev->gmc.xgmi.hive_id = 16;
640 adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
641 }
642
643 hive = amdgpu_get_xgmi_hive(adev);
644 if (!hive) {
645 ret = -EINVAL22;
646 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)
647 "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",printf("drm:pid%d:%s *ERROR* " "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)
648 adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)printf("drm:pid%d:%s *ERROR* " "XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.node_id, adev->gmc.xgmi.hive_id)
;
649 goto exit;
650 }
651 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
652
653 top_info = &adev->psp.xgmi_context.top_info;
654
655 list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
656 list_for_each_entry(entry, &hive->device_list, head)for (entry = ({ const __typeof( ((__typeof(*entry) *)0)->head
) *__mptr = ((&hive->device_list)->next); (__typeof
(*entry) *)( (char *)__mptr - __builtin_offsetof(__typeof(*entry
), head) );}); &entry->head != (&hive->device_list
); entry = ({ const __typeof( ((__typeof(*entry) *)0)->head
) *__mptr = (entry->head.next); (__typeof(*entry) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*entry), head) );}))
657 top_info->nodes[count++].node_id = entry->node_id;
658 top_info->num_nodes = count;
659 atomic_set(&hive->number_devices, count)({ typeof(*(&hive->number_devices)) __tmp = ((count));
*(volatile typeof(*(&hive->number_devices)) *)&(*
(&hive->number_devices)) = __tmp; __tmp; })
;
660
661 task_barrier_add_task(&hive->tb);
662
663 if (!adev->gmc.xgmi.pending_reset &&
664 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) {
665 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
666 /* update node list for other device in the hive */
667 if (tmp_adev != adev) {
668 top_info = &tmp_adev->psp.xgmi_context.top_info;
669 top_info->nodes[count - 1].node_id =
670 adev->gmc.xgmi.node_id;
671 top_info->num_nodes = count;
672 }
673 ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
674 if (ret)
675 goto exit_unlock;
676 }
677
678 /* get latest topology info for each device from psp */
679 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
680 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
681 &tmp_adev->psp.xgmi_context.top_info, false0);
682 if (ret) {
683 dev_err(tmp_adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
684 "XGMI: Get topology failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
685 tmp_adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
686 tmp_adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
;
687 /* To do : continue with some node failed or disable the whole hive */
688 goto exit_unlock;
689 }
690 }
691
692 /* get topology again for hives that support extended data */
693 if (adev->psp.xgmi_context.supports_extended_data) {
694
695 /* initialize the hive to get extended data. */
696 ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, true1);
697 if (ret)
698 goto exit_unlock;
699
700 /* get the extended data. */
701 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
702 ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
703 &tmp_adev->psp.xgmi_context.top_info, true1);
704 if (ret) {
705 dev_err(tmp_adev->dev,printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
706 "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d",printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
707 tmp_adev->gmc.xgmi.node_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
708 tmp_adev->gmc.xgmi.hive_id, ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Get topology for extended data failure on device %llx, hive %llx, ret %d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , tmp_adev
->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret)
;
709 goto exit_unlock;
710 }
711 }
712
713 /* initialize the hive to get non-extended data for the next round. */
714 ret = amdgpu_xgmi_initialize_hive_get_data_partition(hive, false0);
715 if (ret)
716 goto exit_unlock;
717
718 }
719 }
720
721 if (!ret && !adev->gmc.xgmi.pending_reset)
722 ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
723
724exit_unlock:
725 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
726exit:
727 if (!ret) {
728 adev->hive = hive;
729 dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",do { } while(0)
730 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id)do { } while(0);
731 } else {
732 amdgpu_put_xgmi_hive(hive);
733 dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret)
734 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret)
735 ret)printf("drm:pid%d:%s *ERROR* " "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , adev->
gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id, ret)
;
736 }
737
738 return ret;
739}
740
741int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
742{
743 struct amdgpu_hive_info *hive = adev->hive;
744
745 if (!adev->gmc.xgmi.supported)
746 return -EINVAL22;
747
748 if (!hive)
749 return -EINVAL22;
750
751 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
752 task_barrier_rem_task(&hive->tb);
753 amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
754 if (hive->hi_req_gpu == adev)
755 hive->hi_req_gpu = NULL((void *)0);
756 list_del(&adev->gmc.xgmi.head);
757 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
758
759 amdgpu_put_xgmi_hive(hive);
760 adev->hive = NULL((void *)0);
761
762 if (atomic_dec_return(&hive->number_devices)__sync_sub_and_fetch((&hive->number_devices), 1) == 0) {
763 /* Remove the hive from global hive list */
764 mutex_lock(&xgmi_mutex)rw_enter_write(&xgmi_mutex);
765 list_del(&hive->node);
766 mutex_unlock(&xgmi_mutex)rw_exit_write(&xgmi_mutex);
767
768 amdgpu_put_xgmi_hive(hive);
769 }
770
771 return 0;
772}
773
774static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
775{
776 if (!adev->gmc.xgmi.supported ||
777 adev->gmc.xgmi.num_physical_nodes == 0)
778 return 0;
779
780 adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
781
782 return amdgpu_ras_block_late_init(adev, ras_block);
783}
784
785uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
786 uint64_t addr)
787{
788 struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi;
789 return (addr + xgmi->physical_node_id * xgmi->node_segment_size);
790}
791
792static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg)
793{
794 WREG32_PCIE(pcs_status_reg, 0xFFFFFFFF)adev->pcie_wreg(adev, (pcs_status_reg), (0xFFFFFFFF));
795 WREG32_PCIE(pcs_status_reg, 0)adev->pcie_wreg(adev, (pcs_status_reg), (0));
796}
797
798static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
799{
800 uint32_t i;
801
802 switch (adev->asic_type) {
803 case CHIP_ARCTURUS:
804 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct)(sizeof((xgmi_pcs_err_status_reg_arct)) / sizeof((xgmi_pcs_err_status_reg_arct
)[0]))
; i++)
805 pcs_clear_status(adev,
806 xgmi_pcs_err_status_reg_arct[i]);
807 break;
808 case CHIP_VEGA20:
809 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20)(sizeof((xgmi_pcs_err_status_reg_vg20)) / sizeof((xgmi_pcs_err_status_reg_vg20
)[0]))
; i++)
810 pcs_clear_status(adev,
811 xgmi_pcs_err_status_reg_vg20[i]);
812 break;
813 case CHIP_ALDEBARAN:
814 for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran)(sizeof((xgmi3x16_pcs_err_status_reg_aldebaran)) / sizeof((xgmi3x16_pcs_err_status_reg_aldebaran
)[0]))
; i++)
815 pcs_clear_status(adev,
816 xgmi3x16_pcs_err_status_reg_aldebaran[i]);
817 for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran)(sizeof((walf_pcs_err_status_reg_aldebaran)) / sizeof((walf_pcs_err_status_reg_aldebaran
)[0]))
; i++)
818 pcs_clear_status(adev,
819 walf_pcs_err_status_reg_aldebaran[i]);
820 break;
821 default:
822 break;
823 }
824}
825
826static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
827 uint32_t value,
828 uint32_t *ue_count,
829 uint32_t *ce_count,
830 bool_Bool is_xgmi_pcs)
831{
832 int i;
833 int ue_cnt;
834
835 if (is_xgmi_pcs) {
836 /* query xgmi pcs error status,
837 * only ue is supported */
838 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_ras_fields)(sizeof((xgmi_pcs_ras_fields)) / sizeof((xgmi_pcs_ras_fields)
[0]))
; i ++) {
839 ue_cnt = (value &
840 xgmi_pcs_ras_fields[i].pcs_err_mask) >>
841 xgmi_pcs_ras_fields[i].pcs_err_shift;
842 if (ue_cnt) {
843 dev_info(adev->dev, "%s detected\n",do { } while(0)
844 xgmi_pcs_ras_fields[i].err_name)do { } while(0);
845 *ue_count += ue_cnt;
846 }
847 }
848 } else {
849 /* query wafl pcs error status,
850 * only ue is supported */
851 for (i = 0; i < ARRAY_SIZE(wafl_pcs_ras_fields)(sizeof((wafl_pcs_ras_fields)) / sizeof((wafl_pcs_ras_fields)
[0]))
; i++) {
852 ue_cnt = (value &
853 wafl_pcs_ras_fields[i].pcs_err_mask) >>
854 wafl_pcs_ras_fields[i].pcs_err_shift;
855 if (ue_cnt) {
856 dev_info(adev->dev, "%s detected\n",do { } while(0)
857 wafl_pcs_ras_fields[i].err_name)do { } while(0);
858 *ue_count += ue_cnt;
859 }
860 }
861 }
862
863 return 0;
864}
865
866static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
867 void *ras_error_status)
868{
869 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
870 int i;
871 uint32_t data;
872 uint32_t ue_cnt = 0, ce_cnt = 0;
873
874 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
875 return ;
876
877 err_data->ue_count = 0;
878 err_data->ce_count = 0;
879
880 switch (adev->asic_type) {
881 case CHIP_ARCTURUS:
882 /* check xgmi pcs error */
883 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct)(sizeof((xgmi_pcs_err_status_reg_arct)) / sizeof((xgmi_pcs_err_status_reg_arct
)[0]))
; i++) {
884 data = RREG32_PCIE(xgmi_pcs_err_status_reg_arct[i])adev->pcie_rreg(adev, (xgmi_pcs_err_status_reg_arct[i]));
885 if (data)
886 amdgpu_xgmi_query_pcs_error_status(adev,
887 data, &ue_cnt, &ce_cnt, true1);
888 }
889 /* check wafl pcs error */
890 for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_arct)(sizeof((wafl_pcs_err_status_reg_arct)) / sizeof((wafl_pcs_err_status_reg_arct
)[0]))
; i++) {
891 data = RREG32_PCIE(wafl_pcs_err_status_reg_arct[i])adev->pcie_rreg(adev, (wafl_pcs_err_status_reg_arct[i]));
892 if (data)
893 amdgpu_xgmi_query_pcs_error_status(adev,
894 data, &ue_cnt, &ce_cnt, false0);
895 }
896 break;
897 case CHIP_VEGA20:
898 /* check xgmi pcs error */
899 for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20)(sizeof((xgmi_pcs_err_status_reg_vg20)) / sizeof((xgmi_pcs_err_status_reg_vg20
)[0]))
; i++) {
900 data = RREG32_PCIE(xgmi_pcs_err_status_reg_vg20[i])adev->pcie_rreg(adev, (xgmi_pcs_err_status_reg_vg20[i]));
901 if (data)
902 amdgpu_xgmi_query_pcs_error_status(adev,
903 data, &ue_cnt, &ce_cnt, true1);
904 }
905 /* check wafl pcs error */
906 for (i = 0; i < ARRAY_SIZE(wafl_pcs_err_status_reg_vg20)(sizeof((wafl_pcs_err_status_reg_vg20)) / sizeof((wafl_pcs_err_status_reg_vg20
)[0]))
; i++) {
907 data = RREG32_PCIE(wafl_pcs_err_status_reg_vg20[i])adev->pcie_rreg(adev, (wafl_pcs_err_status_reg_vg20[i]));
908 if (data)
909 amdgpu_xgmi_query_pcs_error_status(adev,
910 data, &ue_cnt, &ce_cnt, false0);
911 }
912 break;
913 case CHIP_ALDEBARAN:
914 /* check xgmi3x16 pcs error */
915 for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran)(sizeof((xgmi3x16_pcs_err_status_reg_aldebaran)) / sizeof((xgmi3x16_pcs_err_status_reg_aldebaran
)[0]))
; i++) {
916 data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i])adev->pcie_rreg(adev, (xgmi3x16_pcs_err_status_reg_aldebaran
[i]))
;
917 if (data)
918 amdgpu_xgmi_query_pcs_error_status(adev,
919 data, &ue_cnt, &ce_cnt, true1);
920 }
921 /* check wafl pcs error */
922 for (i = 0; i < ARRAY_SIZE(walf_pcs_err_status_reg_aldebaran)(sizeof((walf_pcs_err_status_reg_aldebaran)) / sizeof((walf_pcs_err_status_reg_aldebaran
)[0]))
; i++) {
923 data = RREG32_PCIE(walf_pcs_err_status_reg_aldebaran[i])adev->pcie_rreg(adev, (walf_pcs_err_status_reg_aldebaran[i
]))
;
924 if (data)
925 amdgpu_xgmi_query_pcs_error_status(adev,
926 data, &ue_cnt, &ce_cnt, false0);
927 }
928 break;
929 default:
930 dev_warn(adev->dev, "XGMI RAS error query not supported")printf("drm:pid%d:%s *WARNING* " "XGMI RAS error query not supported"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
931 break;
932 }
933
934 adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);
935
936 err_data->ue_count += ue_cnt;
937 err_data->ce_count += ce_cnt;
938}
939
940/* Trigger XGMI/WAFL error */
941static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if)
942{
943 int ret = 0;
944 struct ta_ras_trigger_error_input *block_info =
945 (struct ta_ras_trigger_error_input *)inject_if;
946
947 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
948 dev_warn(adev->dev, "Failed to disallow df cstate")printf("drm:pid%d:%s *WARNING* " "Failed to disallow df cstate"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
949
950 if (amdgpu_dpm_allow_xgmi_power_down(adev, false0))
951 dev_warn(adev->dev, "Failed to disallow XGMI power down")printf("drm:pid%d:%s *WARNING* " "Failed to disallow XGMI power down"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
952
953 ret = psp_ras_trigger_error(&adev->psp, block_info);
954
955 if (amdgpu_ras_intr_triggered())
956 return ret;
957
958 if (amdgpu_dpm_allow_xgmi_power_down(adev, true1))
959 dev_warn(adev->dev, "Failed to allow XGMI power down")printf("drm:pid%d:%s *WARNING* " "Failed to allow XGMI power down"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
960
961 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW))
962 dev_warn(adev->dev, "Failed to allow df cstate")printf("drm:pid%d:%s *WARNING* " "Failed to allow df cstate",
({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
963
964 return ret;
965}
966
967struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
968 .query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
969 .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
970 .ras_error_inject = amdgpu_ras_error_inject_xgmi,
971};
972
973struct amdgpu_xgmi_ras xgmi_ras = {
974 .ras_block = {
975 .ras_comm = {
976 .name = "xgmi_wafl",
977 .block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
978 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
979 },
980 .hw_ops = &xgmi_ras_hw_ops,
981 .ras_late_init = amdgpu_xgmi_ras_late_init,
982 },
983};