Bug Summary

File:dev/pci/drm/amd/amdgpu/amdgpu_device.c
Warning:line 5498, column 3
Access to field 'next' results in a dereference of a null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name amdgpu_device.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
1/*
2 * Copyright 2008 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 * Alex Deucher
26 * Jerome Glisse
27 */
28#include <linux/power_supply.h>
29#include <linux/kthread.h>
30#include <linux/module.h>
31#include <linux/console.h>
32#include <linux/slab.h>
33#include <linux/iommu.h>
34#include <linux/pci.h>
35#include <linux/devcoredump.h>
36#include <generated/utsrelease.h>
37#include <linux/pci-p2pdma.h>
38#include <linux/apple-gmux.h>
39
40#include <drm/drm_aperture.h>
41#include <drm/drm_atomic_helper.h>
42#include <drm/drm_probe_helper.h>
43#include <drm/amdgpu_drm.h>
44#include <linux/device.h>
45#include <linux/vgaarb.h>
46#include <linux/vga_switcheroo.h>
47#include <linux/efi.h>
48#include "amdgpu.h"
49#include "amdgpu_trace.h"
50#include "amdgpu_i2c.h"
51#include "atom.h"
52#include "amdgpu_atombios.h"
53#include "amdgpu_atomfirmware.h"
54#include "amd_pcie.h"
55#ifdef CONFIG_DRM_AMDGPU_SI
56#include "si.h"
57#endif
58#ifdef CONFIG_DRM_AMDGPU_CIK
59#include "cik.h"
60#endif
61#include "vi.h"
62#include "soc15.h"
63#include "nv.h"
64#include "bif/bif_4_1_d.h"
65#include <linux/firmware.h>
66#include "amdgpu_vf_error.h"
67
68#include "amdgpu_amdkfd.h"
69#include "amdgpu_pm.h"
70
71#include "amdgpu_xgmi.h"
72#include "amdgpu_ras.h"
73#include "amdgpu_pmu.h"
74#include "amdgpu_fru_eeprom.h"
75#include "amdgpu_reset.h"
76
77#include <linux/suspend.h>
78#include <drm/task_barrier.h>
79#include <linux/pm_runtime.h>
80
81#include <drm/drm_drv.h>
82
83#if IS_ENABLED(CONFIG_X86)1 && defined(__linux__)
84#include <asm/intel-family.h>
85#endif
86
87MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
88MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
89MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
90MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
91MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
92MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
93MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
94
95#define AMDGPU_RESUME_MS2000 2000
96#define AMDGPU_MAX_RETRY_LIMIT2 2
97#define AMDGPU_RETRY_SRIOV_RESET(r)((r) == -16 || (r) == -60 || (r) == -22) ((r) == -EBUSY16 || (r) == -ETIMEDOUT60 || (r) == -EINVAL22)
98
99static const struct drm_driver amdgpu_kms_driver;
100
101const char *amdgpu_asic_name[] = {
102 "TAHITI",
103 "PITCAIRN",
104 "VERDE",
105 "OLAND",
106 "HAINAN",
107 "BONAIRE",
108 "KAVERI",
109 "KABINI",
110 "HAWAII",
111 "MULLINS",
112 "TOPAZ",
113 "TONGA",
114 "FIJI",
115 "CARRIZO",
116 "STONEY",
117 "POLARIS10",
118 "POLARIS11",
119 "POLARIS12",
120 "VEGAM",
121 "VEGA10",
122 "VEGA12",
123 "VEGA20",
124 "RAVEN",
125 "ARCTURUS",
126 "RENOIR",
127 "ALDEBARAN",
128 "NAVI10",
129 "CYAN_SKILLFISH",
130 "NAVI14",
131 "NAVI12",
132 "SIENNA_CICHLID",
133 "NAVY_FLOUNDER",
134 "VANGOGH",
135 "DIMGREY_CAVEFISH",
136 "BEIGE_GOBY",
137 "YELLOW_CARP",
138 "IP DISCOVERY",
139 "LAST",
140};
141
142/**
143 * DOC: pcie_replay_count
144 *
145 * The amdgpu driver provides a sysfs API for reporting the total number
146 * of PCIe replays (NAKs)
147 * The file pcie_replay_count is used for this and returns the total
148 * number of replays as a sum of the NAKs generated and NAKs received
149 */
150
151static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
152 struct device_attribute *attr, char *buf)
153{
154 struct drm_device *ddev = dev_get_drvdata(dev);
155 struct amdgpu_device *adev = drm_to_adev(ddev);
156 uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev)((adev)->asic_funcs->get_pcie_replay_count((adev)));
157
158 return sysfs_emit(buf, "%llu\n", cnt);
159}
160
161static DEVICE_ATTR(pcie_replay_count, S_IRUGO,struct device_attribute dev_attr_pcie_replay_count
162 amdgpu_device_get_pcie_replay_count, NULL)struct device_attribute dev_attr_pcie_replay_count;
163
164static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
165
166/**
167 * DOC: product_name
168 *
169 * The amdgpu driver provides a sysfs API for reporting the product name
170 * for the device
171 * The file serial_number is used for this and returns the product name
172 * as returned from the FRU.
173 * NOTE: This is only available for certain server cards
174 */
175
176static ssize_t amdgpu_device_get_product_name(struct device *dev,
177 struct device_attribute *attr, char *buf)
178{
179 struct drm_device *ddev = dev_get_drvdata(dev);
180 struct amdgpu_device *adev = drm_to_adev(ddev);
181
182 return sysfs_emit(buf, "%s\n", adev->product_name);
183}
184
185static DEVICE_ATTR(product_name, S_IRUGO,struct device_attribute dev_attr_product_name
186 amdgpu_device_get_product_name, NULL)struct device_attribute dev_attr_product_name;
187
188/**
189 * DOC: product_number
190 *
191 * The amdgpu driver provides a sysfs API for reporting the part number
192 * for the device
193 * The file serial_number is used for this and returns the part number
194 * as returned from the FRU.
195 * NOTE: This is only available for certain server cards
196 */
197
198static ssize_t amdgpu_device_get_product_number(struct device *dev,
199 struct device_attribute *attr, char *buf)
200{
201 struct drm_device *ddev = dev_get_drvdata(dev);
202 struct amdgpu_device *adev = drm_to_adev(ddev);
203
204 return sysfs_emit(buf, "%s\n", adev->product_number);
205}
206
207static DEVICE_ATTR(product_number, S_IRUGO,struct device_attribute dev_attr_product_number
208 amdgpu_device_get_product_number, NULL)struct device_attribute dev_attr_product_number;
209
210/**
211 * DOC: serial_number
212 *
213 * The amdgpu driver provides a sysfs API for reporting the serial number
214 * for the device
215 * The file serial_number is used for this and returns the serial number
216 * as returned from the FRU.
217 * NOTE: This is only available for certain server cards
218 */
219
220static ssize_t amdgpu_device_get_serial_number(struct device *dev,
221 struct device_attribute *attr, char *buf)
222{
223 struct drm_device *ddev = dev_get_drvdata(dev);
224 struct amdgpu_device *adev = drm_to_adev(ddev);
225
226 return sysfs_emit(buf, "%s\n", adev->serial);
227}
228
229static DEVICE_ATTR(serial_number, S_IRUGO,struct device_attribute dev_attr_serial_number
230 amdgpu_device_get_serial_number, NULL)struct device_attribute dev_attr_serial_number;
231
232/**
233 * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
234 *
235 * @dev: drm_device pointer
236 *
237 * Returns true if the device is a dGPU with ATPX power control,
238 * otherwise return false.
239 */
240bool_Bool amdgpu_device_supports_px(struct drm_device *dev)
241{
242 struct amdgpu_device *adev = drm_to_adev(dev);
243
244 if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
245 return true1;
246 return false0;
247}
248
249/**
250 * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
251 *
252 * @dev: drm_device pointer
253 *
254 * Returns true if the device is a dGPU with ACPI power control,
255 * otherwise return false.
256 */
257bool_Bool amdgpu_device_supports_boco(struct drm_device *dev)
258{
259 struct amdgpu_device *adev = drm_to_adev(dev);
260
261 if (adev->has_pr3 ||
262 ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
263 return true1;
264 return false0;
265}
266
267/**
268 * amdgpu_device_supports_baco - Does the device support BACO
269 *
270 * @dev: drm_device pointer
271 *
272 * Returns true if the device supporte BACO,
273 * otherwise return false.
274 */
275bool_Bool amdgpu_device_supports_baco(struct drm_device *dev)
276{
277 struct amdgpu_device *adev = drm_to_adev(dev);
278
279 return amdgpu_asic_supports_baco(adev)(adev)->asic_funcs->supports_baco((adev));
280}
281
282/**
283 * amdgpu_device_supports_smart_shift - Is the device dGPU with
284 * smart shift support
285 *
286 * @dev: drm_device pointer
287 *
288 * Returns true if the device is a dGPU with Smart Shift support,
289 * otherwise returns false.
290 */
291bool_Bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
292{
293 return (amdgpu_device_supports_boco(dev) &&
294 amdgpu_acpi_is_power_shift_control_supported());
295}
296
297/*
298 * VRAM access helper functions
299 */
300
301/**
302 * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
303 *
304 * @adev: amdgpu_device pointer
305 * @pos: offset of the buffer in vram
306 * @buf: virtual address of the buffer in system memory
307 * @size: read/write size, sizeof(@buf) must > @size
308 * @write: true - write to vram, otherwise - read from vram
309 */
310void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
311 void *buf, size_t size, bool_Bool write)
312{
313 unsigned long flags;
314 uint32_t hi = ~0, tmp = 0;
315 uint32_t *data = buf;
316 uint64_t last;
317 int idx;
318
319 if (!drm_dev_enter(adev_to_drm(adev), &idx))
320 return;
321
322 BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4))((!(!(((pos) & ((4) - 1)) == 0) || !(((size) & ((4) -
1)) == 0))) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 322, "!(!(((pos) & ((4) - 1)) == 0) || !(((size) & ((4) - 1)) == 0))"
))
;
323
324 spin_lock_irqsave(&adev->mmio_idx_lock, flags)do { flags = 0; mtx_enter(&adev->mmio_idx_lock); } while
(0)
;
325 for (last = pos + size; pos < last; pos += 4) {
326 tmp = pos >> 31;
327
328 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000)amdgpu_device_wreg(adev, (0x0), (((uint32_t)pos) | 0x80000000
), (1<<1))
;
329 if (tmp != hi) {
330 WREG32_NO_KIQ(mmMM_INDEX_HI, tmp)amdgpu_device_wreg(adev, (0x6), (tmp), (1<<1));
331 hi = tmp;
332 }
333 if (write)
334 WREG32_NO_KIQ(mmMM_DATA, *data++)amdgpu_device_wreg(adev, (0x1), (*data++), (1<<1));
335 else
336 *data++ = RREG32_NO_KIQ(mmMM_DATA)amdgpu_device_rreg(adev, (0x1), (1<<1));
337 }
338
339 spin_unlock_irqrestore(&adev->mmio_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->mmio_idx_lock); }
while (0)
;
340 drm_dev_exit(idx);
341}
342
343/**
344 * amdgpu_device_aper_access - access vram by vram aperature
345 *
346 * @adev: amdgpu_device pointer
347 * @pos: offset of the buffer in vram
348 * @buf: virtual address of the buffer in system memory
349 * @size: read/write size, sizeof(@buf) must > @size
350 * @write: true - write to vram, otherwise - read from vram
351 *
352 * The return value means how many bytes have been transferred.
353 */
354size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
355 void *buf, size_t size, bool_Bool write)
356{
357#ifdef CONFIG_64BIT1
358 void __iomem *addr;
359 size_t count = 0;
360 uint64_t last;
361
362 if (!adev->mman.aper_base_kaddr)
363 return 0;
364
365 last = min(pos + size, adev->gmc.visible_vram_size)(((pos + size)<(adev->gmc.visible_vram_size))?(pos + size
):(adev->gmc.visible_vram_size))
;
366 if (last > pos) {
367 addr = adev->mman.aper_base_kaddr + pos;
368 count = last - pos;
369
370 if (write) {
371 memcpy_toio(addr, buf, count)__builtin_memcpy((addr), (buf), (count));
372 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
373 amdgpu_device_flush_hdp(adev, NULL((void *)0));
374 } else {
375 amdgpu_device_invalidate_hdp(adev, NULL((void *)0));
376 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
377 memcpy_fromio(buf, addr, count)__builtin_memcpy((buf), (addr), (count));
378 }
379
380 }
381
382 return count;
383#else
384 return 0;
385#endif
386}
387
388/**
389 * amdgpu_device_vram_access - read/write a buffer in vram
390 *
391 * @adev: amdgpu_device pointer
392 * @pos: offset of the buffer in vram
393 * @buf: virtual address of the buffer in system memory
394 * @size: read/write size, sizeof(@buf) must > @size
395 * @write: true - write to vram, otherwise - read from vram
396 */
397void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
398 void *buf, size_t size, bool_Bool write)
399{
400 size_t count;
401
402 /* try to using vram apreature to access vram first */
403 count = amdgpu_device_aper_access(adev, pos, buf, size, write);
404 size -= count;
405 if (size) {
406 /* using MM to access rest vram */
407 pos += count;
408 buf += count;
409 amdgpu_device_mm_access(adev, pos, buf, size, write);
410 }
411}
412
413/*
414 * register access helper functions.
415 */
416
417/* Check if hw access should be skipped because of hotplug or device error */
418bool_Bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
419{
420 if (adev->no_hw_access)
421 return true1;
422
423#ifdef CONFIG_LOCKDEP
424 /*
425 * This is a bit complicated to understand, so worth a comment. What we assert
426 * here is that the GPU reset is not running on another thread in parallel.
427 *
428 * For this we trylock the read side of the reset semaphore, if that succeeds
429 * we know that the reset is not running in paralell.
430 *
431 * If the trylock fails we assert that we are either already holding the read
432 * side of the lock or are the reset thread itself and hold the write side of
433 * the lock.
434 */
435 if (in_task()) {
436 if (down_read_trylock(&adev->reset_domain->sem)(rw_enter(&adev->reset_domain->sem, 0x0002UL | 0x0040UL
) == 0)
)
437 up_read(&adev->reset_domain->sem)rw_exit_read(&adev->reset_domain->sem);
438 else
439 lockdep_assert_held(&adev->reset_domain->sem)do { (void)(&adev->reset_domain->sem); } while(0);
440 }
441#endif
442 return false0;
443}
444
445/**
446 * amdgpu_device_rreg - read a memory mapped IO or indirect register
447 *
448 * @adev: amdgpu_device pointer
449 * @reg: dword aligned register offset
450 * @acc_flags: access flags which require special behavior
451 *
452 * Returns the 32 bit value from the offset specified.
453 */
454uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
455 uint32_t reg, uint32_t acc_flags)
456{
457 uint32_t ret;
458
459 if (amdgpu_device_skip_hw_access(adev))
460 return 0;
461
462 if ((reg * 4) < adev->rmmio_size) {
463 if (!(acc_flags & AMDGPU_REGS_NO_KIQ(1<<1)) &&
464 amdgpu_sriov_runtime(adev)((adev)->virt.caps & (1 << 4)) &&
465 down_read_trylock(&adev->reset_domain->sem)(rw_enter(&adev->reset_domain->sem, 0x0002UL | 0x0040UL
) == 0)
) {
466 ret = amdgpu_kiq_rreg(adev, reg);
467 up_read(&adev->reset_domain->sem)rw_exit_read(&adev->reset_domain->sem);
468 } else {
469 ret = readl(((void __iomem *)adev->rmmio) + (reg * 4))ioread32(((void *)adev->rmmio) + (reg * 4));
470 }
471 } else {
472 ret = adev->pcie_rreg(adev, reg * 4);
473 }
474
475 trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
476
477 return ret;
478}
479
480/*
481 * MMIO register read with bytes helper functions
482 * @offset:bytes offset from MMIO start
483 *
484*/
485
486/**
487 * amdgpu_mm_rreg8 - read a memory mapped IO register
488 *
489 * @adev: amdgpu_device pointer
490 * @offset: byte aligned register offset
491 *
492 * Returns the 8 bit value from the offset specified.
493 */
494uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
495{
496 if (amdgpu_device_skip_hw_access(adev))
497 return 0;
498
499 if (offset < adev->rmmio_size)
500 return (readb(adev->rmmio + offset)ioread8(adev->rmmio + offset));
501 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 501); } while (0)
;
502}
503
504/*
505 * MMIO register write with bytes helper functions
506 * @offset:bytes offset from MMIO start
507 * @value: the value want to be written to the register
508 *
509*/
510/**
511 * amdgpu_mm_wreg8 - read a memory mapped IO register
512 *
513 * @adev: amdgpu_device pointer
514 * @offset: byte aligned register offset
515 * @value: 8 bit value to write
516 *
517 * Writes the value specified to the offset specified.
518 */
519void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
520{
521 if (amdgpu_device_skip_hw_access(adev))
522 return;
523
524 if (offset < adev->rmmio_size)
525 writeb(value, adev->rmmio + offset)iowrite8(value, adev->rmmio + offset);
526 else
527 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 527); } while (0)
;
528}
529
530/**
531 * amdgpu_device_wreg - write to a memory mapped IO or indirect register
532 *
533 * @adev: amdgpu_device pointer
534 * @reg: dword aligned register offset
535 * @v: 32 bit value to write to the register
536 * @acc_flags: access flags which require special behavior
537 *
538 * Writes the value specified to the offset specified.
539 */
540void amdgpu_device_wreg(struct amdgpu_device *adev,
541 uint32_t reg, uint32_t v,
542 uint32_t acc_flags)
543{
544 if (amdgpu_device_skip_hw_access(adev))
545 return;
546
547 if ((reg * 4) < adev->rmmio_size) {
548 if (!(acc_flags & AMDGPU_REGS_NO_KIQ(1<<1)) &&
549 amdgpu_sriov_runtime(adev)((adev)->virt.caps & (1 << 4)) &&
550 down_read_trylock(&adev->reset_domain->sem)(rw_enter(&adev->reset_domain->sem, 0x0002UL | 0x0040UL
) == 0)
) {
551 amdgpu_kiq_wreg(adev, reg, v);
552 up_read(&adev->reset_domain->sem)rw_exit_read(&adev->reset_domain->sem);
553 } else {
554 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4))iowrite32(v, ((void *)adev->rmmio) + (reg * 4));
555 }
556 } else {
557 adev->pcie_wreg(adev, reg * 4, v);
558 }
559
560 trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
561}
562
563/**
564 * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
565 *
566 * @adev: amdgpu_device pointer
567 * @reg: mmio/rlc register
568 * @v: value to write
569 *
570 * this function is invoked only for the debugfs register access
571 */
572void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
573 uint32_t reg, uint32_t v)
574{
575 if (amdgpu_device_skip_hw_access(adev))
576 return;
577
578 if (amdgpu_sriov_fullaccess(adev)((((adev))->virt.caps & (1 << 2)) && !((
(adev))->virt.caps & (1 << 4)))
&&
579 adev->gfx.rlc.funcs &&
580 adev->gfx.rlc.funcs->is_rlcg_access_range) {
581 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
582 return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
583 } else if ((reg * 4) >= adev->rmmio_size) {
584 adev->pcie_wreg(adev, reg * 4, v);
585 } else {
586 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4))iowrite32(v, ((void *)adev->rmmio) + (reg * 4));
587 }
588}
589
590/**
591 * amdgpu_mm_rdoorbell - read a doorbell dword
592 *
593 * @adev: amdgpu_device pointer
594 * @index: doorbell index
595 *
596 * Returns the value in the doorbell aperture at the
597 * requested doorbell index (CIK).
598 */
599u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
600{
601 if (amdgpu_device_skip_hw_access(adev))
602 return 0;
603
604 if (index < adev->doorbell.num_doorbells) {
605 return readl(adev->doorbell.ptr + index)ioread32(adev->doorbell.ptr + index);
606 } else {
607 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index)__drm_err("reading beyond doorbell aperture: 0x%08x!\n", index
)
;
608 return 0;
609 }
610}
611
612/**
613 * amdgpu_mm_wdoorbell - write a doorbell dword
614 *
615 * @adev: amdgpu_device pointer
616 * @index: doorbell index
617 * @v: value to write
618 *
619 * Writes @v to the doorbell aperture at the
620 * requested doorbell index (CIK).
621 */
622void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
623{
624 if (amdgpu_device_skip_hw_access(adev))
625 return;
626
627 if (index < adev->doorbell.num_doorbells) {
628 writel(v, adev->doorbell.ptr + index)iowrite32(v, adev->doorbell.ptr + index);
629 } else {
630 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index)__drm_err("writing beyond doorbell aperture: 0x%08x!\n", index
)
;
631 }
632}
633
634/**
635 * amdgpu_mm_rdoorbell64 - read a doorbell Qword
636 *
637 * @adev: amdgpu_device pointer
638 * @index: doorbell index
639 *
640 * Returns the value in the doorbell aperture at the
641 * requested doorbell index (VEGA10+).
642 */
643u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
644{
645 if (amdgpu_device_skip_hw_access(adev))
646 return 0;
647
648 if (index < adev->doorbell.num_doorbells) {
649 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index))({ typeof(*((atomic64_t *)(adev->doorbell.ptr + index))) __tmp
= *(volatile typeof(*((atomic64_t *)(adev->doorbell.ptr +
index))) *)&(*((atomic64_t *)(adev->doorbell.ptr + index
))); membar_datadep_consumer(); __tmp; })
;
650 } else {
651 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index)__drm_err("reading beyond doorbell aperture: 0x%08x!\n", index
)
;
652 return 0;
653 }
654}
655
656/**
657 * amdgpu_mm_wdoorbell64 - write a doorbell Qword
658 *
659 * @adev: amdgpu_device pointer
660 * @index: doorbell index
661 * @v: value to write
662 *
663 * Writes @v to the doorbell aperture at the
664 * requested doorbell index (VEGA10+).
665 */
666void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
667{
668 if (amdgpu_device_skip_hw_access(adev))
669 return;
670
671 if (index < adev->doorbell.num_doorbells) {
672 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v)({ typeof(*((atomic64_t *)(adev->doorbell.ptr + index))) __tmp
= ((v)); *(volatile typeof(*((atomic64_t *)(adev->doorbell
.ptr + index))) *)&(*((atomic64_t *)(adev->doorbell.ptr
+ index))) = __tmp; __tmp; })
;
673 } else {
674 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index)__drm_err("writing beyond doorbell aperture: 0x%08x!\n", index
)
;
675 }
676}
677
678/**
679 * amdgpu_device_indirect_rreg - read an indirect register
680 *
681 * @adev: amdgpu_device pointer
682 * @pcie_index: mmio register offset
683 * @pcie_data: mmio register offset
684 * @reg_addr: indirect register address to read from
685 *
686 * Returns the value of indirect register @reg_addr
687 */
688u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
689 u32 pcie_index, u32 pcie_data,
690 u32 reg_addr)
691{
692 unsigned long flags;
693 u32 r;
694 void __iomem *pcie_index_offset;
695 void __iomem *pcie_data_offset;
696
697 spin_lock_irqsave(&adev->pcie_idx_lock, flags)do { flags = 0; mtx_enter(&adev->pcie_idx_lock); } while
(0)
;
698 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
699 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
700
701 writel(reg_addr, pcie_index_offset)iowrite32(reg_addr, pcie_index_offset);
702 readl(pcie_index_offset)ioread32(pcie_index_offset);
703 r = readl(pcie_data_offset)ioread32(pcie_data_offset);
704 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->pcie_idx_lock); }
while (0)
;
705
706 return r;
707}
708
709/**
710 * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
711 *
712 * @adev: amdgpu_device pointer
713 * @pcie_index: mmio register offset
714 * @pcie_data: mmio register offset
715 * @reg_addr: indirect register address to read from
716 *
717 * Returns the value of indirect register @reg_addr
718 */
719u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
720 u32 pcie_index, u32 pcie_data,
721 u32 reg_addr)
722{
723 unsigned long flags;
724 u64 r;
725 void __iomem *pcie_index_offset;
726 void __iomem *pcie_data_offset;
727
728 spin_lock_irqsave(&adev->pcie_idx_lock, flags)do { flags = 0; mtx_enter(&adev->pcie_idx_lock); } while
(0)
;
729 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
730 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
731
732 /* read low 32 bits */
733 writel(reg_addr, pcie_index_offset)iowrite32(reg_addr, pcie_index_offset);
734 readl(pcie_index_offset)ioread32(pcie_index_offset);
735 r = readl(pcie_data_offset)ioread32(pcie_data_offset);
736 /* read high 32 bits */
737 writel(reg_addr + 4, pcie_index_offset)iowrite32(reg_addr + 4, pcie_index_offset);
738 readl(pcie_index_offset)ioread32(pcie_index_offset);
739 r |= ((u64)readl(pcie_data_offset)ioread32(pcie_data_offset) << 32);
740 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->pcie_idx_lock); }
while (0)
;
741
742 return r;
743}
744
745/**
746 * amdgpu_device_indirect_wreg - write an indirect register address
747 *
748 * @adev: amdgpu_device pointer
749 * @pcie_index: mmio register offset
750 * @pcie_data: mmio register offset
751 * @reg_addr: indirect register offset
752 * @reg_data: indirect register data
753 *
754 */
755void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
756 u32 pcie_index, u32 pcie_data,
757 u32 reg_addr, u32 reg_data)
758{
759 unsigned long flags;
760 void __iomem *pcie_index_offset;
761 void __iomem *pcie_data_offset;
762
763 spin_lock_irqsave(&adev->pcie_idx_lock, flags)do { flags = 0; mtx_enter(&adev->pcie_idx_lock); } while
(0)
;
764 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
765 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
766
767 writel(reg_addr, pcie_index_offset)iowrite32(reg_addr, pcie_index_offset);
768 readl(pcie_index_offset)ioread32(pcie_index_offset);
769 writel(reg_data, pcie_data_offset)iowrite32(reg_data, pcie_data_offset);
770 readl(pcie_data_offset)ioread32(pcie_data_offset);
771 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->pcie_idx_lock); }
while (0)
;
772}
773
774/**
775 * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
776 *
777 * @adev: amdgpu_device pointer
778 * @pcie_index: mmio register offset
779 * @pcie_data: mmio register offset
780 * @reg_addr: indirect register offset
781 * @reg_data: indirect register data
782 *
783 */
784void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
785 u32 pcie_index, u32 pcie_data,
786 u32 reg_addr, u64 reg_data)
787{
788 unsigned long flags;
789 void __iomem *pcie_index_offset;
790 void __iomem *pcie_data_offset;
791
792 spin_lock_irqsave(&adev->pcie_idx_lock, flags)do { flags = 0; mtx_enter(&adev->pcie_idx_lock); } while
(0)
;
793 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
794 pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
795
796 /* write low 32 bits */
797 writel(reg_addr, pcie_index_offset)iowrite32(reg_addr, pcie_index_offset);
798 readl(pcie_index_offset)ioread32(pcie_index_offset);
799 writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset)iowrite32((u32)(reg_data & 0xffffffffULL), pcie_data_offset
)
;
800 readl(pcie_data_offset)ioread32(pcie_data_offset);
801 /* write high 32 bits */
802 writel(reg_addr + 4, pcie_index_offset)iowrite32(reg_addr + 4, pcie_index_offset);
803 readl(pcie_index_offset)ioread32(pcie_index_offset);
804 writel((u32)(reg_data >> 32), pcie_data_offset)iowrite32((u32)(reg_data >> 32), pcie_data_offset);
805 readl(pcie_data_offset)ioread32(pcie_data_offset);
806 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->pcie_idx_lock); }
while (0)
;
807}
808
809/**
810 * amdgpu_invalid_rreg - dummy reg read function
811 *
812 * @adev: amdgpu_device pointer
813 * @reg: offset of register
814 *
815 * Dummy register read function. Used for register blocks
816 * that certain asics don't have (all asics).
817 * Returns the value in the register.
818 */
819static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
820{
821 DRM_ERROR("Invalid callback to read register 0x%04X\n", reg)__drm_err("Invalid callback to read register 0x%04X\n", reg);
822 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 822); } while (0)
;
823 return 0;
824}
825
826/**
827 * amdgpu_invalid_wreg - dummy reg write function
828 *
829 * @adev: amdgpu_device pointer
830 * @reg: offset of register
831 * @v: value to write to the register
832 *
833 * Dummy register read function. Used for register blocks
834 * that certain asics don't have (all asics).
835 */
836static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
837{
838 DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",__drm_err("Invalid callback to write register 0x%04X with 0x%08X\n"
, reg, v)
839 reg, v)__drm_err("Invalid callback to write register 0x%04X with 0x%08X\n"
, reg, v)
;
840 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 840); } while (0)
;
841}
842
843/**
844 * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
845 *
846 * @adev: amdgpu_device pointer
847 * @reg: offset of register
848 *
849 * Dummy register read function. Used for register blocks
850 * that certain asics don't have (all asics).
851 * Returns the value in the register.
852 */
853static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
854{
855 DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg)__drm_err("Invalid callback to read 64 bit register 0x%04X\n"
, reg)
;
856 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 856); } while (0)
;
857 return 0;
858}
859
860/**
861 * amdgpu_invalid_wreg64 - dummy reg write function
862 *
863 * @adev: amdgpu_device pointer
864 * @reg: offset of register
865 * @v: value to write to the register
866 *
867 * Dummy register read function. Used for register blocks
868 * that certain asics don't have (all asics).
869 */
870static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
871{
872 DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",__drm_err("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n"
, reg, v)
873 reg, v)__drm_err("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n"
, reg, v)
;
874 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 874); } while (0)
;
875}
876
877/**
878 * amdgpu_block_invalid_rreg - dummy reg read function
879 *
880 * @adev: amdgpu_device pointer
881 * @block: offset of instance
882 * @reg: offset of register
883 *
884 * Dummy register read function. Used for register blocks
885 * that certain asics don't have (all asics).
886 * Returns the value in the register.
887 */
888static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
889 uint32_t block, uint32_t reg)
890{
891 DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",__drm_err("Invalid callback to read register 0x%04X in block 0x%04X\n"
, reg, block)
892 reg, block)__drm_err("Invalid callback to read register 0x%04X in block 0x%04X\n"
, reg, block)
;
893 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 893); } while (0)
;
894 return 0;
895}
896
897/**
898 * amdgpu_block_invalid_wreg - dummy reg write function
899 *
900 * @adev: amdgpu_device pointer
901 * @block: offset of instance
902 * @reg: offset of register
903 * @v: value to write to the register
904 *
905 * Dummy register read function. Used for register blocks
906 * that certain asics don't have (all asics).
907 */
908static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
909 uint32_t block,
910 uint32_t reg, uint32_t v)
911{
912 DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",__drm_err("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n"
, reg, block, v)
913 reg, block, v)__drm_err("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n"
, reg, block, v)
;
914 BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 914); } while (0)
;
915}
916
917/**
918 * amdgpu_device_asic_init - Wrapper for atom asic_init
919 *
920 * @adev: amdgpu_device pointer
921 *
922 * Does any asic specific work and then calls atom asic init.
923 */
924static int amdgpu_device_asic_init(struct amdgpu_device *adev)
925{
926 amdgpu_asic_pre_asic_init(adev)(adev)->asic_funcs->pre_asic_init((adev));
927
928 if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)(((11) << 16) | ((0) << 8) | (0)))
929 return amdgpu_atomfirmware_asic_init(adev, true1);
930 else
931 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
932}
933
934/**
935 * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
936 *
937 * @adev: amdgpu_device pointer
938 *
939 * Allocates a scratch page of VRAM for use by various things in the
940 * driver.
941 */
942static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
943{
944 return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE4096,
945 PAGE_SIZE(1 << 12), AMDGPU_GEM_DOMAIN_VRAM0x4,
946 &adev->vram_scratch.robj,
947 &adev->vram_scratch.gpu_addr,
948 (void **)&adev->vram_scratch.ptr);
949}
950
951/**
952 * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
953 *
954 * @adev: amdgpu_device pointer
955 *
956 * Frees the VRAM scratch page.
957 */
958static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
959{
960 amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL((void *)0), NULL((void *)0));
961}
962
963/**
964 * amdgpu_device_program_register_sequence - program an array of registers.
965 *
966 * @adev: amdgpu_device pointer
967 * @registers: pointer to the register array
968 * @array_size: size of the register array
969 *
970 * Programs an array or registers with and and or masks.
971 * This is a helper for setting golden registers.
972 */
973void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
974 const u32 *registers,
975 const u32 array_size)
976{
977 u32 tmp, reg, and_mask, or_mask;
978 int i;
979
980 if (array_size % 3)
981 return;
982
983 for (i = 0; i < array_size; i +=3) {
984 reg = registers[i + 0];
985 and_mask = registers[i + 1];
986 or_mask = registers[i + 2];
987
988 if (and_mask == 0xffffffff) {
989 tmp = or_mask;
990 } else {
991 tmp = RREG32(reg)amdgpu_device_rreg(adev, (reg), 0);
992 tmp &= ~and_mask;
993 if (adev->family >= AMDGPU_FAMILY_AI141)
994 tmp |= (or_mask & and_mask);
995 else
996 tmp |= or_mask;
997 }
998 WREG32(reg, tmp)amdgpu_device_wreg(adev, (reg), (tmp), 0);
999 }
1000}
1001
1002/**
1003 * amdgpu_device_pci_config_reset - reset the GPU
1004 *
1005 * @adev: amdgpu_device pointer
1006 *
1007 * Resets the GPU using the pci config reset sequence.
1008 * Only applicable to asics prior to vega10.
1009 */
1010void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1011{
1012 pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA0x39d5e86b);
1013}
1014
1015/**
1016 * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1017 *
1018 * @adev: amdgpu_device pointer
1019 *
1020 * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1021 */
1022int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1023{
1024 STUB()do { printf("%s: stub\n", __func__); } while(0);
1025 return -ENOSYS78;
1026#ifdef notyet
1027 return pci_reset_function(adev->pdev);
1028#endif
1029}
1030
1031/*
1032 * GPU doorbell aperture helpers function.
1033 */
1034/**
1035 * amdgpu_device_doorbell_init - Init doorbell driver information.
1036 *
1037 * @adev: amdgpu_device pointer
1038 *
1039 * Init doorbell driver information (CIK)
1040 * Returns 0 on success, error on failure.
1041 */
1042static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1043{
1044
1045 /* No doorbell on SI hardware generation */
1046 if (adev->asic_type < CHIP_BONAIRE) {
1047 adev->doorbell.base = 0;
1048 adev->doorbell.size = 0;
1049 adev->doorbell.num_doorbells = 0;
1050 adev->doorbell.ptr = NULL((void *)0);
1051 return 0;
1052 }
1053
1054#ifdef __linux__
1055 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1056 return -EINVAL22;
1057#endif
1058
1059 amdgpu_asic_init_doorbell_index(adev)(adev)->asic_funcs->init_doorbell_index((adev));
1060
1061 /* doorbell bar mapping */
1062#ifdef __linux__
1063 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1064 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1065#endif
1066
1067 if (adev->enable_mes) {
1068 adev->doorbell.num_doorbells =
1069 adev->doorbell.size / sizeof(u32);
1070 } else {
1071 adev->doorbell.num_doorbells =
1072 min_t(u32, adev->doorbell.size / sizeof(u32),({ u32 __min_a = (adev->doorbell.size / sizeof(u32)); u32 __min_b
= (adev->doorbell_index.max_assignment+1); __min_a < __min_b
? __min_a : __min_b; })
1073 adev->doorbell_index.max_assignment+1)({ u32 __min_a = (adev->doorbell.size / sizeof(u32)); u32 __min_b
= (adev->doorbell_index.max_assignment+1); __min_a < __min_b
? __min_a : __min_b; })
;
1074 if (adev->doorbell.num_doorbells == 0)
1075 return -EINVAL22;
1076
1077 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1078 * paging queue doorbell use the second page. The
1079 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1080 * doorbells are in the first page. So with paging queue enabled,
1081 * the max num_doorbells should + 1 page (0x400 in dword)
1082 */
1083 if (adev->asic_type >= CHIP_VEGA10)
1084 adev->doorbell.num_doorbells += 0x400;
1085 }
1086
1087#ifdef __linux__
1088 adev->doorbell.ptr = ioremap(adev->doorbell.base,
1089 adev->doorbell.num_doorbells *
1090 sizeof(u32));
1091 if (adev->doorbell.ptr == NULL((void *)0))
1092 return -ENOMEM12;
1093#endif
1094
1095 return 0;
1096}
1097
1098/**
1099 * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1100 *
1101 * @adev: amdgpu_device pointer
1102 *
1103 * Tear down doorbell driver information (CIK)
1104 */
1105static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1106{
1107#ifdef __linux__
1108 iounmap(adev->doorbell.ptr);
1109#else
1110 if (adev->doorbell.size > 0)
1111 bus_space_unmap(adev->doorbell.bst, adev->doorbell.bsh,
1112 adev->doorbell.size);
1113#endif
1114 adev->doorbell.ptr = NULL((void *)0);
1115}
1116
1117
1118
1119/*
1120 * amdgpu_device_wb_*()
1121 * Writeback is the method by which the GPU updates special pages in memory
1122 * with the status of certain GPU events (fences, ring pointers,etc.).
1123 */
1124
1125/**
1126 * amdgpu_device_wb_fini - Disable Writeback and free memory
1127 *
1128 * @adev: amdgpu_device pointer
1129 *
1130 * Disables Writeback and frees the Writeback memory (all asics).
1131 * Used at driver shutdown.
1132 */
1133static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1134{
1135 if (adev->wb.wb_obj) {
1136 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1137 &adev->wb.gpu_addr,
1138 (void **)&adev->wb.wb);
1139 adev->wb.wb_obj = NULL((void *)0);
1140 }
1141}
1142
1143/**
1144 * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1145 *
1146 * @adev: amdgpu_device pointer
1147 *
1148 * Initializes writeback and allocates writeback memory (all asics).
1149 * Used at driver startup.
1150 * Returns 0 on success or an -error on failure.
1151 */
1152static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1153{
1154 int r;
1155
1156 if (adev->wb.wb_obj == NULL((void *)0)) {
1157 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1158 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB256 * sizeof(uint32_t) * 8,
1159 PAGE_SIZE(1 << 12), AMDGPU_GEM_DOMAIN_GTT0x2,
1160 &adev->wb.wb_obj, &adev->wb.gpu_addr,
1161 (void **)&adev->wb.wb);
1162 if (r) {
1163 dev_warn(adev->dev, "(%d) create WB bo failed\n", r)printf("drm:pid%d:%s *WARNING* " "(%d) create WB bo failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r)
;
1164 return r;
1165 }
1166
1167 adev->wb.num_wb = AMDGPU_MAX_WB256;
1168 memset(&adev->wb.used, 0, sizeof(adev->wb.used))__builtin_memset((&adev->wb.used), (0), (sizeof(adev->
wb.used)))
;
1169
1170 /* clear wb memory */
1171 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8)__builtin_memset(((char *)adev->wb.wb), (0), (256 * sizeof
(uint32_t) * 8))
;
1172 }
1173
1174 return 0;
1175}
1176
1177/**
1178 * amdgpu_device_wb_get - Allocate a wb entry
1179 *
1180 * @adev: amdgpu_device pointer
1181 * @wb: wb index
1182 *
1183 * Allocate a wb slot for use by the driver (all asics).
1184 * Returns 0 on success or -EINVAL on failure.
1185 */
1186int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1187{
1188 unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1189
1190 if (offset < adev->wb.num_wb) {
1191 __set_bit(offset, adev->wb.used);
1192 *wb = offset << 3; /* convert to dw offset */
1193 return 0;
1194 } else {
1195 return -EINVAL22;
1196 }
1197}
1198
1199/**
1200 * amdgpu_device_wb_free - Free a wb entry
1201 *
1202 * @adev: amdgpu_device pointer
1203 * @wb: wb index
1204 *
1205 * Free a wb slot allocated for use by the driver (all asics)
1206 */
1207void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1208{
1209 wb >>= 3;
1210 if (wb < adev->wb.num_wb)
1211 __clear_bit(wb, adev->wb.used);
1212}
1213
1214/**
1215 * amdgpu_device_resize_fb_bar - try to resize FB BAR
1216 *
1217 * @adev: amdgpu_device pointer
1218 *
1219 * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1220 * to fail, but if any of the BARs is not accessible after the size we abort
1221 * driver loading by returning -ENODEV.
1222 */
1223int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1224{
1225#ifdef __linux__
1226 int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1227 struct pci_bus *root;
1228 struct resource *res;
1229 unsigned i;
1230 u16 cmd;
1231 int r;
1232
1233 if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)0)
1234 return 0;
1235
1236 /* Bypass for VF */
1237 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
1238 return 0;
1239
1240 /* skip if the bios has already enabled large BAR */
1241 if (adev->gmc.real_vram_size &&
1242 (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1243 return 0;
1244
1245 /* Check if the root BUS has 64bit memory resources */
1246 root = adev->pdev->bus;
1247 while (root->parent)
1248 root = root->parent;
1249
1250 pci_bus_for_each_resource(root, res, i) {
1251 if (res && res->flags & (IORESOURCE_MEM0x0001 | IORESOURCE_MEM_64) &&
1252 res->start > 0x100000000ull)
1253 break;
1254 }
1255
1256 /* Trying to resize is pointless without a root hub window above 4GB */
1257 if (!res)
1258 return 0;
1259
1260 /* Limit the BAR size to what is available */
1261 rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,(((fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1)<
(rbar_size))?(fls(pci_rebar_get_possible_sizes(adev->pdev,
0)) - 1):(rbar_size))
1262 rbar_size)(((fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1)<
(rbar_size))?(fls(pci_rebar_get_possible_sizes(adev->pdev,
0)) - 1):(rbar_size))
;
1263
1264 /* Disable memory decoding while we change the BAR addresses and size */
1265 pci_read_config_word(adev->pdev, PCI_COMMAND0x04, &cmd);
1266 pci_write_config_word(adev->pdev, PCI_COMMAND0x04,
1267 cmd & ~PCI_COMMAND_MEMORY0x00000002);
1268
1269 /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1270 amdgpu_device_doorbell_fini(adev);
1271 if (adev->asic_type >= CHIP_BONAIRE)
1272 pci_release_resource(adev->pdev, 2);
1273
1274 pci_release_resource(adev->pdev, 0);
1275
1276 r = pci_resize_resource(adev->pdev, 0, rbar_size);
1277 if (r == -ENOSPC28)
1278 DRM_INFO("Not enough PCI address space for a large BAR.")printk("\0016" "[" "drm" "] " "Not enough PCI address space for a large BAR."
)
;
1279 else if (r && r != -ENOTSUPP91)
1280 DRM_ERROR("Problem resizing BAR0 (%d).", r)__drm_err("Problem resizing BAR0 (%d).", r);
1281
1282 pci_assign_unassigned_bus_resources(adev->pdev->bus);
1283
1284 /* When the doorbell or fb BAR isn't available we have no chance of
1285 * using the device.
1286 */
1287 r = amdgpu_device_doorbell_init(adev);
1288 if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1289 return -ENODEV19;
1290
1291 pci_write_config_word(adev->pdev, PCI_COMMAND0x04, cmd);
1292#endif /* __linux__ */
1293
1294 return 0;
1295}
1296
1297/*
1298 * GPU helpers function.
1299 */
1300/**
1301 * amdgpu_device_need_post - check if the hw need post or not
1302 *
1303 * @adev: amdgpu_device pointer
1304 *
1305 * Check if the asic has been initialized (all asics) at driver startup
1306 * or post is needed if hw reset is performed.
1307 * Returns true if need or false if not.
1308 */
1309bool_Bool amdgpu_device_need_post(struct amdgpu_device *adev)
1310{
1311 uint32_t reg;
1312
1313 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
1314 return false0;
1315
1316 if (amdgpu_passthrough(adev)((adev)->virt.caps & (1 << 3))) {
1317 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1318 * some old smc fw still need driver do vPost otherwise gpu hang, while
1319 * those smc fw version above 22.15 doesn't have this flaw, so we force
1320 * vpost executed for smc version below 22.15
1321 */
1322 if (adev->asic_type == CHIP_FIJI) {
1323 int err;
1324 uint32_t fw_ver;
1325 err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1326 /* force vPost if error occured */
1327 if (err)
1328 return true1;
1329
1330 fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1331 if (fw_ver < 0x00160e00)
1332 return true1;
1333 }
1334 }
1335
1336 /* Don't post if we need to reset whole hive on init */
1337 if (adev->gmc.xgmi.pending_reset)
1338 return false0;
1339
1340 if (adev->has_hw_reset) {
1341 adev->has_hw_reset = false0;
1342 return true1;
1343 }
1344
1345 /* bios scratch used on CIK+ */
1346 if (adev->asic_type >= CHIP_BONAIRE)
1347 return amdgpu_atombios_scratch_need_asic_init(adev);
1348
1349 /* check MEM_SIZE for older asics */
1350 reg = amdgpu_asic_get_config_memsize(adev)(adev)->asic_funcs->get_config_memsize((adev));
1351
1352 if ((reg != 0) && (reg != 0xffffffff))
1353 return false0;
1354
1355 return true1;
1356}
1357
1358/*
1359 * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
1360 * speed switching. Until we have confirmation from Intel that a specific host
1361 * supports it, it's safer that we keep it disabled for all.
1362 *
1363 * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1364 * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1365 */
1366bool_Bool amdgpu_device_pcie_dynamic_switching_supported(void)
1367{
1368#if IS_ENABLED(CONFIG_X86)1
1369#ifdef __linux__
1370 struct cpuinfo_x86 *c = &cpu_data(0);
1371
1372 if (c->x86_vendor == X86_VENDOR_INTEL)
1373#else
1374 if (strcmp(cpu_vendor, "GenuineIntel") == 0)
1375#endif
1376 return false0;
1377#endif
1378 return true1;
1379}
1380
1381/**
1382 * amdgpu_device_should_use_aspm - check if the device should program ASPM
1383 *
1384 * @adev: amdgpu_device pointer
1385 *
1386 * Confirm whether the module parameter and pcie bridge agree that ASPM should
1387 * be set for this device.
1388 *
1389 * Returns true if it should be used or false if not.
1390 */
1391bool_Bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1392{
1393 switch (amdgpu_aspm) {
1394 case -1:
1395 break;
1396 case 0:
1397 return false0;
1398 case 1:
1399 return true1;
1400 default:
1401 return false0;
1402 }
1403 return pcie_aspm_enabled(adev->pdev);
1404}
1405
1406bool_Bool amdgpu_device_aspm_support_quirk(void)
1407{
1408#if IS_ENABLED(CONFIG_X86)1
1409 struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1410
1411 return !(ci->ci_family == 6 && ci->ci_model == 0x97);
1412#else
1413 return true1;
1414#endif
1415}
1416
1417/* if we get transitioned to only one device, take VGA back */
1418/**
1419 * amdgpu_device_vga_set_decode - enable/disable vga decode
1420 *
1421 * @pdev: PCI device pointer
1422 * @state: enable/disable vga decode
1423 *
1424 * Enable/disable vga decode (all asics).
1425 * Returns VGA resource flags.
1426 */
1427#ifdef notyet
1428static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1429 bool_Bool state)
1430{
1431 struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1432 amdgpu_asic_set_vga_state(adev, state)(adev)->asic_funcs->set_vga_state((adev), (state));
1433 if (state)
1434 return VGA_RSRC_LEGACY_IO0x01 | VGA_RSRC_LEGACY_MEM |
1435 VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1436 else
1437 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1438}
1439#endif
1440
1441/**
1442 * amdgpu_device_check_block_size - validate the vm block size
1443 *
1444 * @adev: amdgpu_device pointer
1445 *
1446 * Validates the vm block size specified via module parameter.
1447 * The vm block size defines number of bits in page table versus page directory,
1448 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1449 * page table and the remaining bits are in the page directory.
1450 */
1451static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1452{
1453 /* defines number of bits in page table versus page directory,
1454 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1455 * page table and the remaining bits are in the page directory */
1456 if (amdgpu_vm_block_size == -1)
1457 return;
1458
1459 if (amdgpu_vm_block_size < 9) {
1460 dev_warn(adev->dev, "VM page table size (%d) too small\n",printf("drm:pid%d:%s *WARNING* " "VM page table size (%d) too small\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_vm_block_size
)
1461 amdgpu_vm_block_size)printf("drm:pid%d:%s *WARNING* " "VM page table size (%d) too small\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_vm_block_size
)
;
1462 amdgpu_vm_block_size = -1;
1463 }
1464}
1465
1466/**
1467 * amdgpu_device_check_vm_size - validate the vm size
1468 *
1469 * @adev: amdgpu_device pointer
1470 *
1471 * Validates the vm size in GB specified via module parameter.
1472 * The VM size is the size of the GPU virtual memory space in GB.
1473 */
1474static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1475{
1476 /* no need to check the default value */
1477 if (amdgpu_vm_size == -1)
1478 return;
1479
1480 if (amdgpu_vm_size < 1) {
1481 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",printf("drm:pid%d:%s *WARNING* " "VM size (%d) too small, min is 1GB\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_vm_size
)
1482 amdgpu_vm_size)printf("drm:pid%d:%s *WARNING* " "VM size (%d) too small, min is 1GB\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_vm_size
)
;
1483 amdgpu_vm_size = -1;
1484 }
1485}
1486
1487static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1488{
1489#ifdef __linux__
1490 struct sysinfo si;
1491#endif
1492 bool_Bool is_os_64 = (sizeof(void *) == 8);
1493 uint64_t total_memory;
1494 uint64_t dram_size_seven_GB = 0x1B8000000;
1495 uint64_t dram_size_three_GB = 0xB8000000;
1496
1497 if (amdgpu_smu_memory_pool_size == 0)
1498 return;
1499
1500 if (!is_os_64) {
1501 DRM_WARN("Not 64-bit OS, feature not supported\n")printk("\0014" "[" "drm" "] " "Not 64-bit OS, feature not supported\n"
)
;
1502 goto def_value;
1503 }
1504#ifdef __linux__
1505 si_meminfo(&si);
1506 total_memory = (uint64_t)si.totalram * si.mem_unit;
1507#else
1508 total_memory = ptoa(physmem)((paddr_t)(physmem) << 12);
1509#endif
1510
1511 if ((amdgpu_smu_memory_pool_size == 1) ||
1512 (amdgpu_smu_memory_pool_size == 2)) {
1513 if (total_memory < dram_size_three_GB)
1514 goto def_value1;
1515 } else if ((amdgpu_smu_memory_pool_size == 4) ||
1516 (amdgpu_smu_memory_pool_size == 8)) {
1517 if (total_memory < dram_size_seven_GB)
1518 goto def_value1;
1519 } else {
1520 DRM_WARN("Smu memory pool size not supported\n")printk("\0014" "[" "drm" "] " "Smu memory pool size not supported\n"
)
;
1521 goto def_value;
1522 }
1523 adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1524
1525 return;
1526
1527def_value1:
1528 DRM_WARN("No enough system memory\n")printk("\0014" "[" "drm" "] " "No enough system memory\n");
1529def_value:
1530 adev->pm.smu_prv_buffer_size = 0;
1531}
1532
1533static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1534{
1535 if (!(adev->flags & AMD_IS_APU) ||
1536 adev->asic_type < CHIP_RAVEN)
1537 return 0;
1538
1539 switch (adev->asic_type) {
1540 case CHIP_RAVEN:
1541 if (adev->pdev->device == 0x15dd)
1542 adev->apu_flags |= AMD_APU_IS_RAVEN;
1543 if (adev->pdev->device == 0x15d8)
1544 adev->apu_flags |= AMD_APU_IS_PICASSO;
1545 break;
1546 case CHIP_RENOIR:
1547 if ((adev->pdev->device == 0x1636) ||
1548 (adev->pdev->device == 0x164c))
1549 adev->apu_flags |= AMD_APU_IS_RENOIR;
1550 else
1551 adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1552 break;
1553 case CHIP_VANGOGH:
1554 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1555 break;
1556 case CHIP_YELLOW_CARP:
1557 break;
1558 case CHIP_CYAN_SKILLFISH:
1559 if ((adev->pdev->device == 0x13FE) ||
1560 (adev->pdev->device == 0x143F))
1561 adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1562 break;
1563 default:
1564 break;
1565 }
1566
1567 return 0;
1568}
1569
1570/**
1571 * amdgpu_device_check_arguments - validate module params
1572 *
1573 * @adev: amdgpu_device pointer
1574 *
1575 * Validates certain module parameters and updates
1576 * the associated values used by the driver (all asics).
1577 */
1578static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1579{
1580 if (amdgpu_sched_jobs < 4) {
1581 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",printf("drm:pid%d:%s *WARNING* " "sched jobs (%d) must be at least 4\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_jobs
)
1582 amdgpu_sched_jobs)printf("drm:pid%d:%s *WARNING* " "sched jobs (%d) must be at least 4\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_jobs
)
;
1583 amdgpu_sched_jobs = 4;
1584 } else if (!is_power_of_2(amdgpu_sched_jobs)(((amdgpu_sched_jobs) != 0) && (((amdgpu_sched_jobs) -
1) & (amdgpu_sched_jobs)) == 0)
){
1585 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",printf("drm:pid%d:%s *WARNING* " "sched jobs (%d) must be a power of 2\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_jobs
)
1586 amdgpu_sched_jobs)printf("drm:pid%d:%s *WARNING* " "sched jobs (%d) must be a power of 2\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_jobs
)
;
1587 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1588 }
1589
1590 if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1591 /* gart size must be greater or equal to 32M */
1592 dev_warn(adev->dev, "gart size (%d) too small\n",printf("drm:pid%d:%s *WARNING* " "gart size (%d) too small\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_gart_size
)
1593 amdgpu_gart_size)printf("drm:pid%d:%s *WARNING* " "gart size (%d) too small\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_gart_size
)
;
1594 amdgpu_gart_size = -1;
1595 }
1596
1597 if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1598 /* gtt size must be greater or equal to 32M */
1599 dev_warn(adev->dev, "gtt size (%d) too small\n",printf("drm:pid%d:%s *WARNING* " "gtt size (%d) too small\n",
({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_gtt_size
)
1600 amdgpu_gtt_size)printf("drm:pid%d:%s *WARNING* " "gtt size (%d) too small\n",
({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_gtt_size
)
;
1601 amdgpu_gtt_size = -1;
1602 }
1603
1604 /* valid range is between 4 and 9 inclusive */
1605 if (amdgpu_vm_fragment_size != -1 &&
1606 (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1607 dev_warn(adev->dev, "valid range is between 4 and 9\n")printf("drm:pid%d:%s *WARNING* " "valid range is between 4 and 9\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
1608 amdgpu_vm_fragment_size = -1;
1609 }
1610
1611 if (amdgpu_sched_hw_submission < 2) {
1612 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",printf("drm:pid%d:%s *WARNING* " "sched hw submission jobs (%d) must be at least 2\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_hw_submission
)
1613 amdgpu_sched_hw_submission)printf("drm:pid%d:%s *WARNING* " "sched hw submission jobs (%d) must be at least 2\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_hw_submission
)
;
1614 amdgpu_sched_hw_submission = 2;
1615 } else if (!is_power_of_2(amdgpu_sched_hw_submission)(((amdgpu_sched_hw_submission) != 0) && (((amdgpu_sched_hw_submission
) - 1) & (amdgpu_sched_hw_submission)) == 0)
) {
1616 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",printf("drm:pid%d:%s *WARNING* " "sched hw submission jobs (%d) must be a power of 2\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_hw_submission
)
1617 amdgpu_sched_hw_submission)printf("drm:pid%d:%s *WARNING* " "sched hw submission jobs (%d) must be a power of 2\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_sched_hw_submission
)
;
1618 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1619 }
1620
1621 if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1622 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n")printf("drm:pid%d:%s *WARNING* " "invalid option for reset method, reverting to default\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
1623 amdgpu_reset_method = -1;
1624 }
1625
1626 amdgpu_device_check_smu_prv_buffer_size(adev);
1627
1628 amdgpu_device_check_vm_size(adev);
1629
1630 amdgpu_device_check_block_size(adev);
1631
1632 adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1633
1634 return 0;
1635}
1636
1637#ifdef __linux__
1638/**
1639 * amdgpu_switcheroo_set_state - set switcheroo state
1640 *
1641 * @pdev: pci dev pointer
1642 * @state: vga_switcheroo state
1643 *
1644 * Callback for the switcheroo driver. Suspends or resumes the
1645 * the asics before or after it is powered up using ACPI methods.
1646 */
1647static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1648 enum vga_switcheroo_state state)
1649{
1650 struct drm_device *dev = pci_get_drvdata(pdev);
1651 int r;
1652
1653 if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1654 return;
1655
1656 if (state == VGA_SWITCHEROO_ON) {
1657 pr_info("switched on\n")do { } while(0);
1658 /* don't suspend or resume card normally */
1659 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1660
1661 pci_set_power_state(pdev, PCI_D0);
1662 amdgpu_device_load_pci_state(pdev);
1663 r = pci_enable_device(pdev);
1664 if (r)
1665 DRM_WARN("pci_enable_device failed (%d)\n", r)printk("\0014" "[" "drm" "] " "pci_enable_device failed (%d)\n"
, r)
;
1666 amdgpu_device_resume(dev, true1);
1667
1668 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1669 } else {
1670 pr_info("switched off\n")do { } while(0);
1671 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1672 amdgpu_device_suspend(dev, true1);
1673 amdgpu_device_cache_pci_state(pdev);
1674 /* Shut down the device */
1675 pci_disable_device(pdev);
1676 pci_set_power_state(pdev, PCI_D3cold);
1677 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1678 }
1679}
1680
1681/**
1682 * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1683 *
1684 * @pdev: pci dev pointer
1685 *
1686 * Callback for the switcheroo driver. Check of the switcheroo
1687 * state can be changed.
1688 * Returns true if the state can be changed, false if not.
1689 */
1690static bool_Bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1691{
1692 struct drm_device *dev = pci_get_drvdata(pdev);
1693
1694 /*
1695 * FIXME: open_count is protected by drm_global_mutex but that would lead to
1696 * locking inversion with the driver load path. And the access here is
1697 * completely racy anyway. So don't bother with locking for now.
1698 */
1699 return atomic_read(&dev->open_count)({ typeof(*(&dev->open_count)) __tmp = *(volatile typeof
(*(&dev->open_count)) *)&(*(&dev->open_count
)); membar_datadep_consumer(); __tmp; })
== 0;
1700}
1701#endif /* __linux__ */
1702
1703static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1704#ifdef notyet
1705 .set_gpu_state = amdgpu_switcheroo_set_state,
1706 .reprobe = NULL((void *)0),
1707 .can_switch = amdgpu_switcheroo_can_switch,
1708#endif
1709};
1710
1711/**
1712 * amdgpu_device_ip_set_clockgating_state - set the CG state
1713 *
1714 * @dev: amdgpu_device pointer
1715 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1716 * @state: clockgating state (gate or ungate)
1717 *
1718 * Sets the requested clockgating state for all instances of
1719 * the hardware IP specified.
1720 * Returns the error code from the last instance.
1721 */
1722int amdgpu_device_ip_set_clockgating_state(void *dev,
1723 enum amd_ip_block_type block_type,
1724 enum amd_clockgating_state state)
1725{
1726 struct amdgpu_device *adev = dev;
1727 int i, r = 0;
1728
1729 for (i = 0; i < adev->num_ip_blocks; i++) {
1730 if (!adev->ip_blocks[i].status.valid)
1731 continue;
1732 if (adev->ip_blocks[i].version->type != block_type)
1733 continue;
1734 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1735 continue;
1736 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1737 (void *)adev, state);
1738 if (r)
1739 DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",__drm_err("set_clockgating_state of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
1740 adev->ip_blocks[i].version->funcs->name, r)__drm_err("set_clockgating_state of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
1741 }
1742 return r;
1743}
1744
1745/**
1746 * amdgpu_device_ip_set_powergating_state - set the PG state
1747 *
1748 * @dev: amdgpu_device pointer
1749 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1750 * @state: powergating state (gate or ungate)
1751 *
1752 * Sets the requested powergating state for all instances of
1753 * the hardware IP specified.
1754 * Returns the error code from the last instance.
1755 */
1756int amdgpu_device_ip_set_powergating_state(void *dev,
1757 enum amd_ip_block_type block_type,
1758 enum amd_powergating_state state)
1759{
1760 struct amdgpu_device *adev = dev;
1761 int i, r = 0;
1762
1763 for (i = 0; i < adev->num_ip_blocks; i++) {
1764 if (!adev->ip_blocks[i].status.valid)
1765 continue;
1766 if (adev->ip_blocks[i].version->type != block_type)
1767 continue;
1768 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1769 continue;
1770 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1771 (void *)adev, state);
1772 if (r)
1773 DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",__drm_err("set_powergating_state of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
1774 adev->ip_blocks[i].version->funcs->name, r)__drm_err("set_powergating_state of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
1775 }
1776 return r;
1777}
1778
1779/**
1780 * amdgpu_device_ip_get_clockgating_state - get the CG state
1781 *
1782 * @adev: amdgpu_device pointer
1783 * @flags: clockgating feature flags
1784 *
1785 * Walks the list of IPs on the device and updates the clockgating
1786 * flags for each IP.
1787 * Updates @flags with the feature flags for each hardware IP where
1788 * clockgating is enabled.
1789 */
1790void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1791 u64 *flags)
1792{
1793 int i;
1794
1795 for (i = 0; i < adev->num_ip_blocks; i++) {
1796 if (!adev->ip_blocks[i].status.valid)
1797 continue;
1798 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1799 adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1800 }
1801}
1802
1803/**
1804 * amdgpu_device_ip_wait_for_idle - wait for idle
1805 *
1806 * @adev: amdgpu_device pointer
1807 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1808 *
1809 * Waits for the request hardware IP to be idle.
1810 * Returns 0 for success or a negative error code on failure.
1811 */
1812int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1813 enum amd_ip_block_type block_type)
1814{
1815 int i, r;
1816
1817 for (i = 0; i < adev->num_ip_blocks; i++) {
1818 if (!adev->ip_blocks[i].status.valid)
1819 continue;
1820 if (adev->ip_blocks[i].version->type == block_type) {
1821 r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1822 if (r)
1823 return r;
1824 break;
1825 }
1826 }
1827 return 0;
1828
1829}
1830
1831/**
1832 * amdgpu_device_ip_is_idle - is the hardware IP idle
1833 *
1834 * @adev: amdgpu_device pointer
1835 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1836 *
1837 * Check if the hardware IP is idle or not.
1838 * Returns true if it the IP is idle, false if not.
1839 */
1840bool_Bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1841 enum amd_ip_block_type block_type)
1842{
1843 int i;
1844
1845 for (i = 0; i < adev->num_ip_blocks; i++) {
1846 if (!adev->ip_blocks[i].status.valid)
1847 continue;
1848 if (adev->ip_blocks[i].version->type == block_type)
1849 return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1850 }
1851 return true1;
1852
1853}
1854
1855/**
1856 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1857 *
1858 * @adev: amdgpu_device pointer
1859 * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1860 *
1861 * Returns a pointer to the hardware IP block structure
1862 * if it exists for the asic, otherwise NULL.
1863 */
1864struct amdgpu_ip_block *
1865amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1866 enum amd_ip_block_type type)
1867{
1868 int i;
1869
1870 for (i = 0; i < adev->num_ip_blocks; i++)
1871 if (adev->ip_blocks[i].version->type == type)
1872 return &adev->ip_blocks[i];
1873
1874 return NULL((void *)0);
1875}
1876
1877/**
1878 * amdgpu_device_ip_block_version_cmp
1879 *
1880 * @adev: amdgpu_device pointer
1881 * @type: enum amd_ip_block_type
1882 * @major: major version
1883 * @minor: minor version
1884 *
1885 * return 0 if equal or greater
1886 * return 1 if smaller or the ip_block doesn't exist
1887 */
1888int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1889 enum amd_ip_block_type type,
1890 u32 major, u32 minor)
1891{
1892 struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1893
1894 if (ip_block && ((ip_block->version->major > major) ||
1895 ((ip_block->version->major == major) &&
1896 (ip_block->version->minor >= minor))))
1897 return 0;
1898
1899 return 1;
1900}
1901
1902/**
1903 * amdgpu_device_ip_block_add
1904 *
1905 * @adev: amdgpu_device pointer
1906 * @ip_block_version: pointer to the IP to add
1907 *
1908 * Adds the IP block driver information to the collection of IPs
1909 * on the asic.
1910 */
1911int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1912 const struct amdgpu_ip_block_version *ip_block_version)
1913{
1914 if (!ip_block_version)
1915 return -EINVAL22;
1916
1917 switch (ip_block_version->type) {
1918 case AMD_IP_BLOCK_TYPE_VCN:
1919 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1920 return 0;
1921 break;
1922 case AMD_IP_BLOCK_TYPE_JPEG:
1923 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1924 return 0;
1925 break;
1926 default:
1927 break;
1928 }
1929
1930 DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,printk("\0016" "[" "drm" "] " "add ip block number %d <%s>\n"
, adev->num_ip_blocks, ip_block_version->funcs->name
)
1931 ip_block_version->funcs->name)printk("\0016" "[" "drm" "] " "add ip block number %d <%s>\n"
, adev->num_ip_blocks, ip_block_version->funcs->name
)
;
1932
1933 adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1934
1935 return 0;
1936}
1937
1938/**
1939 * amdgpu_device_enable_virtual_display - enable virtual display feature
1940 *
1941 * @adev: amdgpu_device pointer
1942 *
1943 * Enabled the virtual display feature if the user has enabled it via
1944 * the module parameter virtual_display. This feature provides a virtual
1945 * display hardware on headless boards or in virtualized environments.
1946 * This function parses and validates the configuration string specified by
1947 * the user and configues the virtual display configuration (number of
1948 * virtual connectors, crtcs, etc.) specified.
1949 */
1950static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1951{
1952 adev->enable_virtual_display = false0;
1953
1954#ifdef notyet
1955 if (amdgpu_virtual_display) {
1956 const char *pci_address_name = pci_name(adev->pdev);
1957 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1958
1959 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL(0x0001 | 0x0004));
1960 pciaddstr_tmp = pciaddstr;
1961 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1962 pciaddname = strsep(&pciaddname_tmp, ",");
1963 if (!strcmp("all", pciaddname)
1964 || !strcmp(pci_address_name, pciaddname)) {
1965 long num_crtc;
1966 int res = -1;
1967
1968 adev->enable_virtual_display = true1;
1969
1970 if (pciaddname_tmp)
1971 res = kstrtol(pciaddname_tmp, 10,
1972 &num_crtc);
1973
1974 if (!res) {
1975 if (num_crtc < 1)
1976 num_crtc = 1;
1977 if (num_crtc > 6)
1978 num_crtc = 6;
1979 adev->mode_info.num_crtc = num_crtc;
1980 } else {
1981 adev->mode_info.num_crtc = 1;
1982 }
1983 break;
1984 }
1985 }
1986
1987 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",printk("\0016" "[" "drm" "] " "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n"
, amdgpu_virtual_display, pci_address_name, adev->enable_virtual_display
, adev->mode_info.num_crtc)
1988 amdgpu_virtual_display, pci_address_name,printk("\0016" "[" "drm" "] " "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n"
, amdgpu_virtual_display, pci_address_name, adev->enable_virtual_display
, adev->mode_info.num_crtc)
1989 adev->enable_virtual_display, adev->mode_info.num_crtc)printk("\0016" "[" "drm" "] " "virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n"
, amdgpu_virtual_display, pci_address_name, adev->enable_virtual_display
, adev->mode_info.num_crtc)
;
1990
1991 kfree(pciaddstr);
1992 }
1993#endif
1994}
1995
1996/**
1997 * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1998 *
1999 * @adev: amdgpu_device pointer
2000 *
2001 * Parses the asic configuration parameters specified in the gpu info
2002 * firmware and makes them availale to the driver for use in configuring
2003 * the asic.
2004 * Returns 0 on success, -EINVAL on failure.
2005 */
2006static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2007{
2008 const char *chip_name;
2009 char fw_name[40];
2010 int err;
2011 const struct gpu_info_firmware_header_v1_0 *hdr;
2012
2013 adev->firmware.gpu_info_fw = NULL((void *)0);
2014
2015 if (adev->mman.discovery_bin) {
2016 /*
2017 * FIXME: The bounding box is still needed by Navi12, so
2018 * temporarily read it from gpu_info firmware. Should be dropped
2019 * when DAL no longer needs it.
2020 */
2021 if (adev->asic_type != CHIP_NAVI12)
2022 return 0;
2023 }
2024
2025 switch (adev->asic_type) {
2026 default:
2027 return 0;
2028 case CHIP_VEGA10:
2029 chip_name = "vega10";
2030 break;
2031 case CHIP_VEGA12:
2032 chip_name = "vega12";
2033 break;
2034 case CHIP_RAVEN:
2035 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2036 chip_name = "raven2";
2037 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2038 chip_name = "picasso";
2039 else
2040 chip_name = "raven";
2041 break;
2042 case CHIP_ARCTURUS:
2043 chip_name = "arcturus";
2044 break;
2045 case CHIP_NAVI12:
2046 chip_name = "navi12";
2047 break;
2048 }
2049
2050 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2051 err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
2052 if (err) {
2053 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "Failed to load gpu_info firmware \"%s\"\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , fw_name
)
2054 "Failed to load gpu_info firmware \"%s\"\n",printf("drm:pid%d:%s *ERROR* " "Failed to load gpu_info firmware \"%s\"\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , fw_name
)
2055 fw_name)printf("drm:pid%d:%s *ERROR* " "Failed to load gpu_info firmware \"%s\"\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , fw_name
)
;
2056 goto out;
2057 }
2058 err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
2059 if (err) {
2060 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "Failed to validate gpu_info firmware \"%s\"\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , fw_name
)
2061 "Failed to validate gpu_info firmware \"%s\"\n",printf("drm:pid%d:%s *ERROR* " "Failed to validate gpu_info firmware \"%s\"\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , fw_name
)
2062 fw_name)printf("drm:pid%d:%s *ERROR* " "Failed to validate gpu_info firmware \"%s\"\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , fw_name
)
;
2063 goto out;
2064 }
2065
2066 hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2067 amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2068
2069 switch (hdr->version_major) {
2070 case 1:
2071 {
2072 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2073 (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2074 le32_to_cpu(hdr->header.ucode_array_offset_bytes)((__uint32_t)(hdr->header.ucode_array_offset_bytes)));
2075
2076 /*
2077 * Should be droped when DAL no longer needs it.
2078 */
2079 if (adev->asic_type == CHIP_NAVI12)
2080 goto parse_soc_bounding_box;
2081
2082 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se)((__uint32_t)(gpu_info_fw->gc_num_se));
2083 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh)((__uint32_t)(gpu_info_fw->gc_num_cu_per_sh));
2084 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se)((__uint32_t)(gpu_info_fw->gc_num_sh_per_se));
2085 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se)((__uint32_t)(gpu_info_fw->gc_num_rb_per_se));
2086 adev->gfx.config.max_texture_channel_caches =
2087 le32_to_cpu(gpu_info_fw->gc_num_tccs)((__uint32_t)(gpu_info_fw->gc_num_tccs));
2088 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs)((__uint32_t)(gpu_info_fw->gc_num_gprs));
2089 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds)((__uint32_t)(gpu_info_fw->gc_num_max_gs_thds));
2090 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth)((__uint32_t)(gpu_info_fw->gc_gs_table_depth));
2091 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth)((__uint32_t)(gpu_info_fw->gc_gsprim_buff_depth));
2092 adev->gfx.config.double_offchip_lds_buf =
2093 le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer)((__uint32_t)(gpu_info_fw->gc_double_offchip_lds_buffer));
2094 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size)((__uint32_t)(gpu_info_fw->gc_wave_size));
2095 adev->gfx.cu_info.max_waves_per_simd =
2096 le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd)((__uint32_t)(gpu_info_fw->gc_max_waves_per_simd));
2097 adev->gfx.cu_info.max_scratch_slots_per_cu =
2098 le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu)((__uint32_t)(gpu_info_fw->gc_max_scratch_slots_per_cu));
2099 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size)((__uint32_t)(gpu_info_fw->gc_lds_size));
2100 if (hdr->version_minor >= 1) {
2101 const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2102 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2103 le32_to_cpu(hdr->header.ucode_array_offset_bytes)((__uint32_t)(hdr->header.ucode_array_offset_bytes)));
2104 adev->gfx.config.num_sc_per_sh =
2105 le32_to_cpu(gpu_info_fw->num_sc_per_sh)((__uint32_t)(gpu_info_fw->num_sc_per_sh));
2106 adev->gfx.config.num_packer_per_sc =
2107 le32_to_cpu(gpu_info_fw->num_packer_per_sc)((__uint32_t)(gpu_info_fw->num_packer_per_sc));
2108 }
2109
2110parse_soc_bounding_box:
2111 /*
2112 * soc bounding box info is not integrated in disocovery table,
2113 * we always need to parse it from gpu info firmware if needed.
2114 */
2115 if (hdr->version_minor == 2) {
2116 const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2117 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2118 le32_to_cpu(hdr->header.ucode_array_offset_bytes)((__uint32_t)(hdr->header.ucode_array_offset_bytes)));
2119 adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2120 }
2121 break;
2122 }
2123 default:
2124 dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "Unsupported gpu_info table %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , hdr->
header.ucode_version)
2125 "Unsupported gpu_info table %d\n", hdr->header.ucode_version)printf("drm:pid%d:%s *ERROR* " "Unsupported gpu_info table %d\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , hdr->
header.ucode_version)
;
2126 err = -EINVAL22;
2127 goto out;
2128 }
2129out:
2130 return err;
2131}
2132
2133/**
2134 * amdgpu_device_ip_early_init - run early init for hardware IPs
2135 *
2136 * @adev: amdgpu_device pointer
2137 *
2138 * Early initialization pass for hardware IPs. The hardware IPs that make
2139 * up each asic are discovered each IP's early_init callback is run. This
2140 * is the first stage in initializing the asic.
2141 * Returns 0 on success, negative error code on failure.
2142 */
2143static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2144{
2145 struct pci_dev *parent;
2146 int i, r;
2147
2148 amdgpu_device_enable_virtual_display(adev);
2149
2150 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
2151 r = amdgpu_virt_request_full_gpu(adev, true1);
2152 if (r)
2153 return r;
2154 }
2155
2156 switch (adev->asic_type) {
2157#ifdef CONFIG_DRM_AMDGPU_SI
2158 case CHIP_VERDE:
2159 case CHIP_TAHITI:
2160 case CHIP_PITCAIRN:
2161 case CHIP_OLAND:
2162 case CHIP_HAINAN:
2163 adev->family = AMDGPU_FAMILY_SI110;
2164 r = si_set_ip_blocks(adev);
2165 if (r)
2166 return r;
2167 break;
2168#endif
2169#ifdef CONFIG_DRM_AMDGPU_CIK
2170 case CHIP_BONAIRE:
2171 case CHIP_HAWAII:
2172 case CHIP_KAVERI:
2173 case CHIP_KABINI:
2174 case CHIP_MULLINS:
2175 if (adev->flags & AMD_IS_APU)
2176 adev->family = AMDGPU_FAMILY_KV125;
2177 else
2178 adev->family = AMDGPU_FAMILY_CI120;
2179
2180 r = cik_set_ip_blocks(adev);
2181 if (r)
2182 return r;
2183 break;
2184#endif
2185 case CHIP_TOPAZ:
2186 case CHIP_TONGA:
2187 case CHIP_FIJI:
2188 case CHIP_POLARIS10:
2189 case CHIP_POLARIS11:
2190 case CHIP_POLARIS12:
2191 case CHIP_VEGAM:
2192 case CHIP_CARRIZO:
2193 case CHIP_STONEY:
2194 if (adev->flags & AMD_IS_APU)
2195 adev->family = AMDGPU_FAMILY_CZ135;
2196 else
2197 adev->family = AMDGPU_FAMILY_VI130;
2198
2199 r = vi_set_ip_blocks(adev);
2200 if (r)
2201 return r;
2202 break;
2203 default:
2204 r = amdgpu_discovery_set_ip_blocks(adev);
2205 if (r)
2206 return r;
2207 break;
2208 }
2209
2210 if (amdgpu_has_atpx() &&
2211 (amdgpu_is_atpx_hybrid() ||
2212 amdgpu_has_atpx_dgpu_power_cntl()) &&
2213 ((adev->flags & AMD_IS_APU) == 0) &&
2214 !dev_is_removable(&adev->pdev->dev)0)
2215 adev->flags |= AMD_IS_PX;
2216
2217 if (!(adev->flags & AMD_IS_APU)) {
2218#ifdef notyet
2219 parent = pcie_find_root_port(adev->pdev);
2220 adev->has_pr3 = parent ? pci_pr3_present(parent) : false0;
2221#else
2222 adev->has_pr3 = false0;
2223#endif
2224 }
2225
2226 amdgpu_amdkfd_device_probe(adev);
2227
2228 adev->pm.pp_feature = amdgpu_pp_feature_mask;
2229 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2230 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2231 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) && adev->asic_type == CHIP_SIENNA_CICHLID)
2232 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2233
2234 for (i = 0; i < adev->num_ip_blocks; i++) {
2235 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2236 DRM_ERROR("disabled ip block: %d <%s>\n",__drm_err("disabled ip block: %d <%s>\n", i, adev->ip_blocks
[i].version->funcs->name)
2237 i, adev->ip_blocks[i].version->funcs->name)__drm_err("disabled ip block: %d <%s>\n", i, adev->ip_blocks
[i].version->funcs->name)
;
2238 adev->ip_blocks[i].status.valid = false0;
2239 } else {
2240 if (adev->ip_blocks[i].version->funcs->early_init) {
2241 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2242 if (r == -ENOENT2) {
2243 adev->ip_blocks[i].status.valid = false0;
2244 } else if (r) {
2245 DRM_ERROR("early_init of IP block <%s> failed %d\n",__drm_err("early_init of IP block <%s> failed %d\n", adev
->ip_blocks[i].version->funcs->name, r)
2246 adev->ip_blocks[i].version->funcs->name, r)__drm_err("early_init of IP block <%s> failed %d\n", adev
->ip_blocks[i].version->funcs->name, r)
;
2247 return r;
2248 } else {
2249 adev->ip_blocks[i].status.valid = true1;
2250 }
2251 } else {
2252 adev->ip_blocks[i].status.valid = true1;
2253 }
2254 }
2255 /* get the vbios after the asic_funcs are set up */
2256 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2257 r = amdgpu_device_parse_gpu_info_fw(adev);
2258 if (r)
2259 return r;
2260
2261 /* Read BIOS */
2262 if (!amdgpu_get_bios(adev))
2263 return -EINVAL22;
2264
2265 r = amdgpu_atombios_init(adev);
2266 if (r) {
2267 dev_err(adev->dev, "amdgpu_atombios_init failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_atombios_init failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
2268 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2269 return r;
2270 }
2271
2272 /*get pf2vf msg info at it's earliest time*/
2273 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
2274 amdgpu_virt_init_data_exchange(adev);
2275
2276 }
2277 }
2278
2279 adev->cg_flags &= amdgpu_cg_mask;
2280 adev->pg_flags &= amdgpu_pg_mask;
2281
2282 return 0;
2283}
2284
2285static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2286{
2287 int i, r;
2288
2289 for (i = 0; i < adev->num_ip_blocks; i++) {
2290 if (!adev->ip_blocks[i].status.sw)
2291 continue;
2292 if (adev->ip_blocks[i].status.hw)
2293 continue;
2294 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2295 (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2296 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2297 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2298 if (r) {
2299 DRM_ERROR("hw_init of IP block <%s> failed %d\n",__drm_err("hw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
2300 adev->ip_blocks[i].version->funcs->name, r)__drm_err("hw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
2301 return r;
2302 }
2303 adev->ip_blocks[i].status.hw = true1;
2304 }
2305 }
2306
2307 return 0;
2308}
2309
2310static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2311{
2312 int i, r;
2313
2314 for (i = 0; i < adev->num_ip_blocks; i++) {
2315 if (!adev->ip_blocks[i].status.sw)
2316 continue;
2317 if (adev->ip_blocks[i].status.hw)
2318 continue;
2319 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2320 if (r) {
2321 DRM_ERROR("hw_init of IP block <%s> failed %d\n",__drm_err("hw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
2322 adev->ip_blocks[i].version->funcs->name, r)__drm_err("hw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
2323 return r;
2324 }
2325 adev->ip_blocks[i].status.hw = true1;
2326 }
2327
2328 return 0;
2329}
2330
2331static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2332{
2333 int r = 0;
2334 int i;
2335 uint32_t smu_version;
2336
2337 if (adev->asic_type >= CHIP_VEGA10) {
2338 for (i = 0; i < adev->num_ip_blocks; i++) {
2339 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2340 continue;
2341
2342 if (!adev->ip_blocks[i].status.sw)
2343 continue;
2344
2345 /* no need to do the fw loading again if already done*/
2346 if (adev->ip_blocks[i].status.hw == true1)
2347 break;
2348
2349 if (amdgpu_in_reset(adev) || adev->in_suspend) {
2350 r = adev->ip_blocks[i].version->funcs->resume(adev);
2351 if (r) {
2352 DRM_ERROR("resume of IP block <%s> failed %d\n",__drm_err("resume of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
2353 adev->ip_blocks[i].version->funcs->name, r)__drm_err("resume of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
2354 return r;
2355 }
2356 } else {
2357 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2358 if (r) {
2359 DRM_ERROR("hw_init of IP block <%s> failed %d\n",__drm_err("hw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
2360 adev->ip_blocks[i].version->funcs->name, r)__drm_err("hw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
2361 return r;
2362 }
2363 }
2364
2365 adev->ip_blocks[i].status.hw = true1;
2366 break;
2367 }
2368 }
2369
2370 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) || adev->asic_type == CHIP_TONGA)
2371 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2372
2373 return r;
2374}
2375
2376static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2377{
2378 long timeout;
2379 int r, i;
2380
2381 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
2382 struct amdgpu_ring *ring = adev->rings[i];
2383
2384 /* No need to setup the GPU scheduler for rings that don't need it */
2385 if (!ring || ring->no_scheduler)
2386 continue;
2387
2388 switch (ring->funcs->type) {
2389 case AMDGPU_RING_TYPE_GFX:
2390 timeout = adev->gfx_timeout;
2391 break;
2392 case AMDGPU_RING_TYPE_COMPUTE:
2393 timeout = adev->compute_timeout;
2394 break;
2395 case AMDGPU_RING_TYPE_SDMA:
2396 timeout = adev->sdma_timeout;
2397 break;
2398 default:
2399 timeout = adev->video_timeout;
2400 break;
2401 }
2402
2403 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2404 ring->num_hw_submission, amdgpu_job_hang_limit,
2405 timeout, adev->reset_domain->wq,
2406 ring->sched_score, ring->name,
2407 adev->dev);
2408 if (r) {
2409 DRM_ERROR("Failed to create scheduler on ring %s.\n",__drm_err("Failed to create scheduler on ring %s.\n", ring->
name)
2410 ring->name)__drm_err("Failed to create scheduler on ring %s.\n", ring->
name)
;
2411 return r;
2412 }
2413 }
2414
2415 return 0;
2416}
2417
2418
2419/**
2420 * amdgpu_device_ip_init - run init for hardware IPs
2421 *
2422 * @adev: amdgpu_device pointer
2423 *
2424 * Main initialization pass for hardware IPs. The list of all the hardware
2425 * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2426 * are run. sw_init initializes the software state associated with each IP
2427 * and hw_init initializes the hardware associated with each IP.
2428 * Returns 0 on success, negative error code on failure.
2429 */
2430static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2431{
2432 int i, r;
2433
2434 r = amdgpu_ras_init(adev);
2435 if (r)
2436 return r;
2437
2438 for (i = 0; i < adev->num_ip_blocks; i++) {
2439 if (!adev->ip_blocks[i].status.valid)
2440 continue;
2441 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2442 if (r) {
2443 DRM_ERROR("sw_init of IP block <%s> failed %d\n",__drm_err("sw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
2444 adev->ip_blocks[i].version->funcs->name, r)__drm_err("sw_init of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
2445 goto init_failed;
2446 }
2447 adev->ip_blocks[i].status.sw = true1;
2448
2449 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2450 /* need to do common hw init early so everything is set up for gmc */
2451 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2452 if (r) {
2453 DRM_ERROR("hw_init %d failed %d\n", i, r)__drm_err("hw_init %d failed %d\n", i, r);
2454 goto init_failed;
2455 }
2456 adev->ip_blocks[i].status.hw = true1;
2457 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2458 /* need to do gmc hw init early so we can allocate gpu mem */
2459 /* Try to reserve bad pages early */
2460 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
2461 amdgpu_virt_exchange_data(adev);
2462
2463 r = amdgpu_device_vram_scratch_init(adev);
2464 if (r) {
2465 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r)__drm_err("amdgpu_vram_scratch_init failed %d\n", r);
2466 goto init_failed;
2467 }
2468 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2469 if (r) {
2470 DRM_ERROR("hw_init %d failed %d\n", i, r)__drm_err("hw_init %d failed %d\n", i, r);
2471 goto init_failed;
2472 }
2473 r = amdgpu_device_wb_init(adev);
2474 if (r) {
2475 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r)__drm_err("amdgpu_device_wb_init failed %d\n", r);
2476 goto init_failed;
2477 }
2478 adev->ip_blocks[i].status.hw = true1;
2479
2480 /* right after GMC hw init, we create CSA */
2481 if (amdgpu_mcbp || amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
2482 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2483 AMDGPU_GEM_DOMAIN_VRAM0x4,
2484 AMDGPU_CSA_SIZE(128 * 1024));
2485 if (r) {
2486 DRM_ERROR("allocate CSA failed %d\n", r)__drm_err("allocate CSA failed %d\n", r);
2487 goto init_failed;
2488 }
2489 }
2490 }
2491 }
2492
2493 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
2494 amdgpu_virt_init_data_exchange(adev);
2495
2496 r = amdgpu_ib_pool_init(adev);
2497 if (r) {
2498 dev_err(adev->dev, "IB initialization failed (%d).\n", r)printf("drm:pid%d:%s *ERROR* " "IB initialization failed (%d).\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r)
;
2499 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2500 goto init_failed;
2501 }
2502
2503 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2504 if (r)
2505 goto init_failed;
2506
2507 r = amdgpu_device_ip_hw_init_phase1(adev);
2508 if (r)
2509 goto init_failed;
2510
2511 r = amdgpu_device_fw_loading(adev);
2512 if (r)
2513 goto init_failed;
2514
2515 r = amdgpu_device_ip_hw_init_phase2(adev);
2516 if (r)
2517 goto init_failed;
2518
2519 /*
2520 * retired pages will be loaded from eeprom and reserved here,
2521 * it should be called after amdgpu_device_ip_hw_init_phase2 since
2522 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2523 * for I2C communication which only true at this point.
2524 *
2525 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2526 * failure from bad gpu situation and stop amdgpu init process
2527 * accordingly. For other failed cases, it will still release all
2528 * the resource and print error message, rather than returning one
2529 * negative value to upper level.
2530 *
2531 * Note: theoretically, this should be called before all vram allocations
2532 * to protect retired page from abusing
2533 */
2534 r = amdgpu_ras_recovery_init(adev);
2535 if (r)
2536 goto init_failed;
2537
2538 /**
2539 * In case of XGMI grab extra reference for reset domain for this device
2540 */
2541 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2542 if (amdgpu_xgmi_add_device(adev) == 0) {
2543 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
2544 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2545
2546 if (WARN_ON(!hive)({ int __ret = !!(!hive); if (__ret) printf("WARNING %s failed at %s:%d\n"
, "!hive", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 2546); __builtin_expect(!!(__ret), 0); })
) {
2547 r = -ENOENT2;
2548 goto init_failed;
2549 }
2550
2551 if (!hive->reset_domain ||
2552 !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2553 r = -ENOENT2;
2554 amdgpu_put_xgmi_hive(hive);
2555 goto init_failed;
2556 }
2557
2558 /* Drop the early temporary reset domain we created for device */
2559 amdgpu_reset_put_reset_domain(adev->reset_domain);
2560 adev->reset_domain = hive->reset_domain;
2561 amdgpu_put_xgmi_hive(hive);
2562 }
2563 }
2564 }
2565
2566 r = amdgpu_device_init_schedulers(adev);
2567 if (r)
2568 goto init_failed;
2569
2570 /* Don't init kfd if whole hive need to be reset during init */
2571 if (!adev->gmc.xgmi.pending_reset)
2572 amdgpu_amdkfd_device_init(adev);
2573
2574 amdgpu_fru_get_product_info(adev);
2575
2576init_failed:
2577
2578 return r;
2579}
2580
2581/**
2582 * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2583 *
2584 * @adev: amdgpu_device pointer
2585 *
2586 * Writes a reset magic value to the gart pointer in VRAM. The driver calls
2587 * this function before a GPU reset. If the value is retained after a
2588 * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
2589 */
2590static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2591{
2592 memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM)__builtin_memcpy((adev->reset_magic), (adev->gart.ptr),
(64))
;
2593}
2594
2595/**
2596 * amdgpu_device_check_vram_lost - check if vram is valid
2597 *
2598 * @adev: amdgpu_device pointer
2599 *
2600 * Checks the reset magic value written to the gart pointer in VRAM.
2601 * The driver calls this after a GPU reset to see if the contents of
2602 * VRAM is lost or now.
2603 * returns true if vram is lost, false if not.
2604 */
2605static bool_Bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2606{
2607 if (memcmp(adev->gart.ptr, adev->reset_magic,__builtin_memcmp((adev->gart.ptr), (adev->reset_magic),
(64))
2608 AMDGPU_RESET_MAGIC_NUM)__builtin_memcmp((adev->gart.ptr), (adev->reset_magic),
(64))
)
2609 return true1;
2610
2611 if (!amdgpu_in_reset(adev))
2612 return false0;
2613
2614 /*
2615 * For all ASICs with baco/mode1 reset, the VRAM is
2616 * always assumed to be lost.
2617 */
2618 switch (amdgpu_asic_reset_method(adev)(adev)->asic_funcs->reset_method((adev))) {
2619 case AMD_RESET_METHOD_BACO:
2620 case AMD_RESET_METHOD_MODE1:
2621 return true1;
2622 default:
2623 return false0;
2624 }
2625}
2626
2627/**
2628 * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2629 *
2630 * @adev: amdgpu_device pointer
2631 * @state: clockgating state (gate or ungate)
2632 *
2633 * The list of all the hardware IPs that make up the asic is walked and the
2634 * set_clockgating_state callbacks are run.
2635 * Late initialization pass enabling clockgating for hardware IPs.
2636 * Fini or suspend, pass disabling clockgating for hardware IPs.
2637 * Returns 0 on success, negative error code on failure.
2638 */
2639
2640int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2641 enum amd_clockgating_state state)
2642{
2643 int i, j, r;
2644
2645 if (amdgpu_emu_mode == 1)
2646 return 0;
2647
2648 for (j = 0; j < adev->num_ip_blocks; j++) {
2649 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2650 if (!adev->ip_blocks[i].status.late_initialized)
2651 continue;
2652 /* skip CG for GFX on S0ix */
2653 if (adev->in_s0ix &&
2654 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2655 continue;
2656 /* skip CG for VCE/UVD, it's handled specially */
2657 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2658 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2659 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2660 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2661 adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2662 /* enable clockgating to save power */
2663 r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2664 state);
2665 if (r) {
2666 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",__drm_err("set_clockgating_state(gate) of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
2667 adev->ip_blocks[i].version->funcs->name, r)__drm_err("set_clockgating_state(gate) of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
2668 return r;
2669 }
2670 }
2671 }
2672
2673 return 0;
2674}
2675
2676int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2677 enum amd_powergating_state state)
2678{
2679 int i, j, r;
2680
2681 if (amdgpu_emu_mode == 1)
2682 return 0;
2683
2684 for (j = 0; j < adev->num_ip_blocks; j++) {
2685 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2686 if (!adev->ip_blocks[i].status.late_initialized)
2687 continue;
2688 /* skip PG for GFX on S0ix */
2689 if (adev->in_s0ix &&
2690 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
2691 continue;
2692 /* skip CG for VCE/UVD, it's handled specially */
2693 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2694 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2695 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2696 adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2697 adev->ip_blocks[i].version->funcs->set_powergating_state) {
2698 /* enable powergating to save power */
2699 r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2700 state);
2701 if (r) {
2702 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",__drm_err("set_powergating_state(gate) of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
2703 adev->ip_blocks[i].version->funcs->name, r)__drm_err("set_powergating_state(gate) of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
2704 return r;
2705 }
2706 }
2707 }
2708 return 0;
2709}
2710
2711static int amdgpu_device_enable_mgpu_fan_boost(void)
2712{
2713 struct amdgpu_gpu_instance *gpu_ins;
2714 struct amdgpu_device *adev;
2715 int i, ret = 0;
2716
2717 mutex_lock(&mgpu_info.mutex)rw_enter_write(&mgpu_info.mutex);
2718
2719 /*
2720 * MGPU fan boost feature should be enabled
2721 * only when there are two or more dGPUs in
2722 * the system
2723 */
2724 if (mgpu_info.num_dgpu < 2)
2725 goto out;
2726
2727 for (i = 0; i < mgpu_info.num_dgpu; i++) {
2728 gpu_ins = &(mgpu_info.gpu_ins[i]);
2729 adev = gpu_ins->adev;
2730 if (!(adev->flags & AMD_IS_APU) &&
2731 !gpu_ins->mgpu_fan_enabled) {
2732 ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2733 if (ret)
2734 break;
2735
2736 gpu_ins->mgpu_fan_enabled = 1;
2737 }
2738 }
2739
2740out:
2741 mutex_unlock(&mgpu_info.mutex)rw_exit_write(&mgpu_info.mutex);
2742
2743 return ret;
2744}
2745
2746/**
2747 * amdgpu_device_ip_late_init - run late init for hardware IPs
2748 *
2749 * @adev: amdgpu_device pointer
2750 *
2751 * Late initialization pass for hardware IPs. The list of all the hardware
2752 * IPs that make up the asic is walked and the late_init callbacks are run.
2753 * late_init covers any special initialization that an IP requires
2754 * after all of the have been initialized or something that needs to happen
2755 * late in the init process.
2756 * Returns 0 on success, negative error code on failure.
2757 */
2758static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2759{
2760 struct amdgpu_gpu_instance *gpu_instance;
2761 int i = 0, r;
2762
2763 for (i = 0; i < adev->num_ip_blocks; i++) {
2764 if (!adev->ip_blocks[i].status.hw)
2765 continue;
2766 if (adev->ip_blocks[i].version->funcs->late_init) {
2767 r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2768 if (r) {
2769 DRM_ERROR("late_init of IP block <%s> failed %d\n",__drm_err("late_init of IP block <%s> failed %d\n", adev
->ip_blocks[i].version->funcs->name, r)
2770 adev->ip_blocks[i].version->funcs->name, r)__drm_err("late_init of IP block <%s> failed %d\n", adev
->ip_blocks[i].version->funcs->name, r)
;
2771 return r;
2772 }
2773 }
2774 adev->ip_blocks[i].status.late_initialized = true1;
2775 }
2776
2777 r = amdgpu_ras_late_init(adev);
2778 if (r) {
2779 DRM_ERROR("amdgpu_ras_late_init failed %d", r)__drm_err("amdgpu_ras_late_init failed %d", r);
2780 return r;
2781 }
2782
2783 amdgpu_ras_set_error_query_ready(adev, true1);
2784
2785 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2786 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2787
2788 amdgpu_device_fill_reset_magic(adev);
2789
2790 r = amdgpu_device_enable_mgpu_fan_boost();
2791 if (r)
2792 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r)__drm_err("enable mgpu fan boost failed (%d).\n", r);
2793
2794 /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2795 if (amdgpu_passthrough(adev)((adev)->virt.caps & (1 << 3)) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)||
2796 adev->asic_type == CHIP_ALDEBARAN ))
2797 amdgpu_dpm_handle_passthrough_sbr(adev, true1);
2798
2799 if (adev->gmc.xgmi.num_physical_nodes > 1) {
2800 mutex_lock(&mgpu_info.mutex)rw_enter_write(&mgpu_info.mutex);
2801
2802 /*
2803 * Reset device p-state to low as this was booted with high.
2804 *
2805 * This should be performed only after all devices from the same
2806 * hive get initialized.
2807 *
2808 * However, it's unknown how many device in the hive in advance.
2809 * As this is counted one by one during devices initializations.
2810 *
2811 * So, we wait for all XGMI interlinked devices initialized.
2812 * This may bring some delays as those devices may come from
2813 * different hives. But that should be OK.
2814 */
2815 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2816 for (i = 0; i < mgpu_info.num_gpu; i++) {
2817 gpu_instance = &(mgpu_info.gpu_ins[i]);
2818 if (gpu_instance->adev->flags & AMD_IS_APU)
2819 continue;
2820
2821 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2822 AMDGPU_XGMI_PSTATE_MIN);
2823 if (r) {
2824 DRM_ERROR("pstate setting failed (%d).\n", r)__drm_err("pstate setting failed (%d).\n", r);
2825 break;
2826 }
2827 }
2828 }
2829
2830 mutex_unlock(&mgpu_info.mutex)rw_exit_write(&mgpu_info.mutex);
2831 }
2832
2833 return 0;
2834}
2835
2836/**
2837 * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2838 *
2839 * @adev: amdgpu_device pointer
2840 *
2841 * For ASICs need to disable SMC first
2842 */
2843static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2844{
2845 int i, r;
2846
2847 if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)(((9) << 16) | ((0) << 8) | (0)))
2848 return;
2849
2850 for (i = 0; i < adev->num_ip_blocks; i++) {
2851 if (!adev->ip_blocks[i].status.hw)
2852 continue;
2853 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2854 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2855 /* XXX handle errors */
2856 if (r) {
2857 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",___drm_dbg(((void *)0), DRM_UT_CORE, "hw_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
2858 adev->ip_blocks[i].version->funcs->name, r)___drm_dbg(((void *)0), DRM_UT_CORE, "hw_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
2859 }
2860 adev->ip_blocks[i].status.hw = false0;
2861 break;
2862 }
2863 }
2864}
2865
2866static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2867{
2868 int i, r;
2869
2870 for (i = 0; i < adev->num_ip_blocks; i++) {
2871 if (!adev->ip_blocks[i].version->funcs->early_fini)
2872 continue;
2873
2874 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2875 if (r) {
2876 DRM_DEBUG("early_fini of IP block <%s> failed %d\n",___drm_dbg(((void *)0), DRM_UT_CORE, "early_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
2877 adev->ip_blocks[i].version->funcs->name, r)___drm_dbg(((void *)0), DRM_UT_CORE, "early_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
2878 }
2879 }
2880
2881 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2882 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2883
2884 amdgpu_amdkfd_suspend(adev, false0);
2885
2886 /* Workaroud for ASICs need to disable SMC first */
2887 amdgpu_device_smu_fini_early(adev);
2888
2889 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2890 if (!adev->ip_blocks[i].status.hw)
2891 continue;
2892
2893 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2894 /* XXX handle errors */
2895 if (r) {
2896 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",___drm_dbg(((void *)0), DRM_UT_CORE, "hw_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
2897 adev->ip_blocks[i].version->funcs->name, r)___drm_dbg(((void *)0), DRM_UT_CORE, "hw_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
2898 }
2899
2900 adev->ip_blocks[i].status.hw = false0;
2901 }
2902
2903 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
2904 if (amdgpu_virt_release_full_gpu(adev, false0))
2905 DRM_ERROR("failed to release exclusive mode on fini\n")__drm_err("failed to release exclusive mode on fini\n");
2906 }
2907
2908 return 0;
2909}
2910
2911/**
2912 * amdgpu_device_ip_fini - run fini for hardware IPs
2913 *
2914 * @adev: amdgpu_device pointer
2915 *
2916 * Main teardown pass for hardware IPs. The list of all the hardware
2917 * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2918 * are run. hw_fini tears down the hardware associated with each IP
2919 * and sw_fini tears down any software state associated with each IP.
2920 * Returns 0 on success, negative error code on failure.
2921 */
2922static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2923{
2924 int i, r;
2925
2926 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) && adev->virt.ras_init_done)
2927 amdgpu_virt_release_ras_err_handler_data(adev);
2928
2929 if (adev->gmc.xgmi.num_physical_nodes > 1)
2930 amdgpu_xgmi_remove_device(adev);
2931
2932 amdgpu_amdkfd_device_fini_sw(adev);
2933
2934 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2935 if (!adev->ip_blocks[i].status.sw)
2936 continue;
2937
2938 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2939 amdgpu_ucode_free_bo(adev);
2940 amdgpu_free_static_csa(&adev->virt.csa_obj);
2941 amdgpu_device_wb_fini(adev);
2942 amdgpu_device_vram_scratch_fini(adev);
2943 amdgpu_ib_pool_fini(adev);
2944 }
2945
2946 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2947 /* XXX handle errors */
2948 if (r) {
2949 DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",___drm_dbg(((void *)0), DRM_UT_CORE, "sw_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
2950 adev->ip_blocks[i].version->funcs->name, r)___drm_dbg(((void *)0), DRM_UT_CORE, "sw_fini of IP block <%s> failed %d\n"
, adev->ip_blocks[i].version->funcs->name, r)
;
2951 }
2952 adev->ip_blocks[i].status.sw = false0;
2953 adev->ip_blocks[i].status.valid = false0;
2954 }
2955
2956 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2957 if (!adev->ip_blocks[i].status.late_initialized)
2958 continue;
2959 if (adev->ip_blocks[i].version->funcs->late_fini)
2960 adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2961 adev->ip_blocks[i].status.late_initialized = false0;
2962 }
2963
2964 amdgpu_ras_fini(adev);
2965
2966 return 0;
2967}
2968
2969/**
2970 * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2971 *
2972 * @work: work_struct.
2973 */
2974static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2975{
2976 struct amdgpu_device *adev =
2977 container_of(work, struct amdgpu_device, delayed_init_work.work)({ const __typeof( ((struct amdgpu_device *)0)->delayed_init_work
.work ) *__mptr = (work); (struct amdgpu_device *)( (char *)__mptr
- __builtin_offsetof(struct amdgpu_device, delayed_init_work
.work) );})
;
2978 int r;
2979
2980 r = amdgpu_ib_ring_tests(adev);
2981 if (r)
2982 DRM_ERROR("ib ring test failed (%d).\n", r)__drm_err("ib ring test failed (%d).\n", r);
2983}
2984
2985static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2986{
2987 struct amdgpu_device *adev =
2988 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work)({ const __typeof( ((struct amdgpu_device *)0)->gfx.gfx_off_delay_work
.work ) *__mptr = (work); (struct amdgpu_device *)( (char *)__mptr
- __builtin_offsetof(struct amdgpu_device, gfx.gfx_off_delay_work
.work) );})
;
2989
2990 WARN_ON_ONCE(adev->gfx.gfx_off_state)({ static int __warned; int __ret = !!(adev->gfx.gfx_off_state
); if (__ret && !__warned) { printf("WARNING %s failed at %s:%d\n"
, "adev->gfx.gfx_off_state", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 2990); __warned = 1; } __builtin_expect(!!(__ret), 0); })
;
2991 WARN_ON_ONCE(adev->gfx.gfx_off_req_count)({ static int __warned; int __ret = !!(adev->gfx.gfx_off_req_count
); if (__ret && !__warned) { printf("WARNING %s failed at %s:%d\n"
, "adev->gfx.gfx_off_req_count", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 2991); __warned = 1; } __builtin_expect(!!(__ret), 0); })
;
2992
2993 if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true1))
2994 adev->gfx.gfx_off_state = true1;
2995}
2996
2997/**
2998 * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2999 *
3000 * @adev: amdgpu_device pointer
3001 *
3002 * Main suspend function for hardware IPs. The list of all the hardware
3003 * IPs that make up the asic is walked, clockgating is disabled and the
3004 * suspend callbacks are run. suspend puts the hardware and software state
3005 * in each IP into a state suitable for suspend.
3006 * Returns 0 on success, negative error code on failure.
3007 */
3008static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3009{
3010 int i, r;
3011
3012 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3013 amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3014
3015 /*
3016 * Per PMFW team's suggestion, driver needs to handle gfxoff
3017 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3018 * scenario. Add the missing df cstate disablement here.
3019 */
3020 if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3021 dev_warn(adev->dev, "Failed to disallow df cstate")printf("drm:pid%d:%s *WARNING* " "Failed to disallow df cstate"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3022
3023 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3024 if (!adev->ip_blocks[i].status.valid)
3025 continue;
3026
3027 /* displays are handled separately */
3028 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3029 continue;
3030
3031 /* XXX handle errors */
3032 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3033 /* XXX handle errors */
3034 if (r) {
3035 DRM_ERROR("suspend of IP block <%s> failed %d\n",__drm_err("suspend of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
3036 adev->ip_blocks[i].version->funcs->name, r)__drm_err("suspend of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
3037 return r;
3038 }
3039
3040 adev->ip_blocks[i].status.hw = false0;
3041 }
3042
3043 return 0;
3044}
3045
3046/**
3047 * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3048 *
3049 * @adev: amdgpu_device pointer
3050 *
3051 * Main suspend function for hardware IPs. The list of all the hardware
3052 * IPs that make up the asic is walked, clockgating is disabled and the
3053 * suspend callbacks are run. suspend puts the hardware and software state
3054 * in each IP into a state suitable for suspend.
3055 * Returns 0 on success, negative error code on failure.
3056 */
3057static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3058{
3059 int i, r;
3060
3061 if (adev->in_s0ix)
3062 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3063
3064 for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3065 if (!adev->ip_blocks[i].status.valid)
3066 continue;
3067 /* displays are handled in phase1 */
3068 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3069 continue;
3070 /* PSP lost connection when err_event_athub occurs */
3071 if (amdgpu_ras_intr_triggered() &&
3072 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3073 adev->ip_blocks[i].status.hw = false0;
3074 continue;
3075 }
3076
3077 /* skip unnecessary suspend if we do not initialize them yet */
3078 if (adev->gmc.xgmi.pending_reset &&
3079 !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3080 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3081 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3082 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3083 adev->ip_blocks[i].status.hw = false0;
3084 continue;
3085 }
3086
3087 /* skip suspend of gfx/mes and psp for S0ix
3088 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3089 * like at runtime. PSP is also part of the always on hardware
3090 * so no need to suspend it.
3091 */
3092 if (adev->in_s0ix &&
3093 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3094 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3095 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3096 continue;
3097
3098 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3099 if (adev->in_s0ix &&
3100 (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)(((5) << 16) | ((0) << 8) | (0))) &&
3101 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3102 continue;
3103
3104 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3105 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3106 * from this location and RLC Autoload automatically also gets loaded
3107 * from here based on PMFW -> PSP message during re-init sequence.
3108 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3109 * the TMR and reload FWs again for IMU enabled APU ASICs.
3110 */
3111 if (amdgpu_in_reset(adev) &&
3112 (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3113 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3114 continue;
3115
3116 /* XXX handle errors */
3117 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3118 /* XXX handle errors */
3119 if (r) {
3120 DRM_ERROR("suspend of IP block <%s> failed %d\n",__drm_err("suspend of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
3121 adev->ip_blocks[i].version->funcs->name, r)__drm_err("suspend of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
3122 }
3123 adev->ip_blocks[i].status.hw = false0;
3124 /* handle putting the SMC in the appropriate state */
3125 if(!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))){
3126 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3127 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3128 if (r) {
3129 DRM_ERROR("SMC failed to set mp1 state %d, %d\n",__drm_err("SMC failed to set mp1 state %d, %d\n", adev->mp1_state
, r)
3130 adev->mp1_state, r)__drm_err("SMC failed to set mp1 state %d, %d\n", adev->mp1_state
, r)
;
3131 return r;
3132 }
3133 }
3134 }
3135 }
3136
3137 return 0;
3138}
3139
3140/**
3141 * amdgpu_device_ip_suspend - run suspend for hardware IPs
3142 *
3143 * @adev: amdgpu_device pointer
3144 *
3145 * Main suspend function for hardware IPs. The list of all the hardware
3146 * IPs that make up the asic is walked, clockgating is disabled and the
3147 * suspend callbacks are run. suspend puts the hardware and software state
3148 * in each IP into a state suitable for suspend.
3149 * Returns 0 on success, negative error code on failure.
3150 */
3151int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3152{
3153 int r;
3154
3155 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
3156 amdgpu_virt_fini_data_exchange(adev);
3157 amdgpu_virt_request_full_gpu(adev, false0);
3158 }
3159
3160 r = amdgpu_device_ip_suspend_phase1(adev);
3161 if (r)
3162 return r;
3163 r = amdgpu_device_ip_suspend_phase2(adev);
3164
3165 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
3166 amdgpu_virt_release_full_gpu(adev, false0);
3167
3168 return r;
3169}
3170
3171static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3172{
3173 int i, r;
3174
3175 static enum amd_ip_block_type ip_order[] = {
3176 AMD_IP_BLOCK_TYPE_COMMON,
3177 AMD_IP_BLOCK_TYPE_GMC,
3178 AMD_IP_BLOCK_TYPE_PSP,
3179 AMD_IP_BLOCK_TYPE_IH,
3180 };
3181
3182 for (i = 0; i < adev->num_ip_blocks; i++) {
3183 int j;
3184 struct amdgpu_ip_block *block;
3185
3186 block = &adev->ip_blocks[i];
3187 block->status.hw = false0;
3188
3189 for (j = 0; j < ARRAY_SIZE(ip_order)(sizeof((ip_order)) / sizeof((ip_order)[0])); j++) {
3190
3191 if (block->version->type != ip_order[j] ||
3192 !block->status.valid)
3193 continue;
3194
3195 r = block->version->funcs->hw_init(adev);
3196 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded")printk("\0016" "[" "drm" "] " "RE-INIT-early: %s %s\n", block
->version->funcs->name, r?"failed":"succeeded")
;
3197 if (r)
3198 return r;
3199 block->status.hw = true1;
3200 }
3201 }
3202
3203 return 0;
3204}
3205
3206static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3207{
3208 int i, r;
3209
3210 static enum amd_ip_block_type ip_order[] = {
3211 AMD_IP_BLOCK_TYPE_SMC,
3212 AMD_IP_BLOCK_TYPE_DCE,
3213 AMD_IP_BLOCK_TYPE_GFX,
3214 AMD_IP_BLOCK_TYPE_SDMA,
3215 AMD_IP_BLOCK_TYPE_UVD,
3216 AMD_IP_BLOCK_TYPE_VCE,
3217 AMD_IP_BLOCK_TYPE_VCN
3218 };
3219
3220 for (i = 0; i < ARRAY_SIZE(ip_order)(sizeof((ip_order)) / sizeof((ip_order)[0])); i++) {
3221 int j;
3222 struct amdgpu_ip_block *block;
3223
3224 for (j = 0; j < adev->num_ip_blocks; j++) {
3225 block = &adev->ip_blocks[j];
3226
3227 if (block->version->type != ip_order[i] ||
3228 !block->status.valid ||
3229 block->status.hw)
3230 continue;
3231
3232 if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3233 r = block->version->funcs->resume(adev);
3234 else
3235 r = block->version->funcs->hw_init(adev);
3236
3237 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded")printk("\0016" "[" "drm" "] " "RE-INIT-late: %s %s\n", block->
version->funcs->name, r?"failed":"succeeded")
;
3238 if (r)
3239 return r;
3240 block->status.hw = true1;
3241 }
3242 }
3243
3244 return 0;
3245}
3246
3247/**
3248 * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3249 *
3250 * @adev: amdgpu_device pointer
3251 *
3252 * First resume function for hardware IPs. The list of all the hardware
3253 * IPs that make up the asic is walked and the resume callbacks are run for
3254 * COMMON, GMC, and IH. resume puts the hardware into a functional state
3255 * after a suspend and updates the software state as necessary. This
3256 * function is also used for restoring the GPU after a GPU reset.
3257 * Returns 0 on success, negative error code on failure.
3258 */
3259static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3260{
3261 int i, r;
3262
3263 for (i = 0; i < adev->num_ip_blocks; i++) {
3264 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3265 continue;
3266 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3267 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3268 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3269 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))) {
3270
3271 r = adev->ip_blocks[i].version->funcs->resume(adev);
3272 if (r) {
3273 DRM_ERROR("resume of IP block <%s> failed %d\n",__drm_err("resume of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
3274 adev->ip_blocks[i].version->funcs->name, r)__drm_err("resume of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
3275 return r;
3276 }
3277 adev->ip_blocks[i].status.hw = true1;
3278 }
3279 }
3280
3281 return 0;
3282}
3283
3284/**
3285 * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3286 *
3287 * @adev: amdgpu_device pointer
3288 *
3289 * First resume function for hardware IPs. The list of all the hardware
3290 * IPs that make up the asic is walked and the resume callbacks are run for
3291 * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3292 * functional state after a suspend and updates the software state as
3293 * necessary. This function is also used for restoring the GPU after a GPU
3294 * reset.
3295 * Returns 0 on success, negative error code on failure.
3296 */
3297static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3298{
3299 int i, r;
3300
3301 for (i = 0; i < adev->num_ip_blocks; i++) {
3302 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3303 continue;
3304 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3305 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3306 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3307 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3308 continue;
3309 r = adev->ip_blocks[i].version->funcs->resume(adev);
3310 if (r) {
3311 DRM_ERROR("resume of IP block <%s> failed %d\n",__drm_err("resume of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
3312 adev->ip_blocks[i].version->funcs->name, r)__drm_err("resume of IP block <%s> failed %d\n", adev->
ip_blocks[i].version->funcs->name, r)
;
3313 return r;
3314 }
3315 adev->ip_blocks[i].status.hw = true1;
3316
3317 if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3318 /* disable gfxoff for IP resume. The gfxoff will be re-enabled in
3319 * amdgpu_device_resume() after IP resume.
3320 */
3321 amdgpu_gfx_off_ctrl(adev, false0);
3322 DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n")___drm_dbg(((void *)0), DRM_UT_CORE, "will disable gfxoff for re-initializing other blocks\n"
)
;
3323 }
3324
3325 }
3326
3327 return 0;
3328}
3329
3330/**
3331 * amdgpu_device_ip_resume - run resume for hardware IPs
3332 *
3333 * @adev: amdgpu_device pointer
3334 *
3335 * Main resume function for hardware IPs. The hardware IPs
3336 * are split into two resume functions because they are
3337 * are also used in in recovering from a GPU reset and some additional
3338 * steps need to be take between them. In this case (S3/S4) they are
3339 * run sequentially.
3340 * Returns 0 on success, negative error code on failure.
3341 */
3342static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3343{
3344 int r;
3345
3346 r = amdgpu_amdkfd_resume_iommu(adev);
3347 if (r)
3348 return r;
3349
3350 r = amdgpu_device_ip_resume_phase1(adev);
3351 if (r)
3352 return r;
3353
3354 r = amdgpu_device_fw_loading(adev);
3355 if (r)
3356 return r;
3357
3358 r = amdgpu_device_ip_resume_phase2(adev);
3359
3360 return r;
3361}
3362
3363/**
3364 * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3365 *
3366 * @adev: amdgpu_device pointer
3367 *
3368 * Query the VBIOS data tables to determine if the board supports SR-IOV.
3369 */
3370static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3371{
3372 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
3373 if (adev->is_atom_fw) {
3374 if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3375 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS(1 << 0);
3376 } else {
3377 if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3378 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS(1 << 0);
3379 }
3380
3381 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS(1 << 0)))
3382 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3383 }
3384}
3385
3386/**
3387 * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3388 *
3389 * @asic_type: AMD asic type
3390 *
3391 * Check if there is DC (new modesetting infrastructre) support for an asic.
3392 * returns true if DC has support, false if not.
3393 */
3394bool_Bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3395{
3396 switch (asic_type) {
3397#ifdef CONFIG_DRM_AMDGPU_SI
3398 case CHIP_HAINAN:
3399#endif
3400 case CHIP_TOPAZ:
3401 /* chips with no display hardware */
3402 return false0;
3403#if defined(CONFIG_DRM_AMD_DC1)
3404 case CHIP_TAHITI:
3405 case CHIP_PITCAIRN:
3406 case CHIP_VERDE:
3407 case CHIP_OLAND:
3408 /*
3409 * We have systems in the wild with these ASICs that require
3410 * LVDS and VGA support which is not supported with DC.
3411 *
3412 * Fallback to the non-DC driver here by default so as not to
3413 * cause regressions.
3414 */
3415#if defined(CONFIG_DRM_AMD_DC_SI)
3416 return amdgpu_dc > 0;
3417#else
3418 return false0;
3419#endif
3420 case CHIP_BONAIRE:
3421 case CHIP_KAVERI:
3422 case CHIP_KABINI:
3423 case CHIP_MULLINS:
3424 /*
3425 * We have systems in the wild with these ASICs that require
3426 * VGA support which is not supported with DC.
3427 *
3428 * Fallback to the non-DC driver here by default so as not to
3429 * cause regressions.
3430 */
3431 return amdgpu_dc > 0;
3432 default:
3433 return amdgpu_dc != 0;
3434#else
3435 default:
3436 if (amdgpu_dc > 0)
3437 DRM_INFO_ONCE("Display Core has been requested via kernel parameter "({ static int __warned; if (!__warned) { printk("\0016" "[" "drm"
"] " "Display Core has been requested via kernel parameter "
"but isn't supported by ASIC, ignoring\n"); __warned = 1; } }
)
3438 "but isn't supported by ASIC, ignoring\n")({ static int __warned; if (!__warned) { printk("\0016" "[" "drm"
"] " "Display Core has been requested via kernel parameter "
"but isn't supported by ASIC, ignoring\n"); __warned = 1; } }
)
;
3439 return false0;
3440#endif
3441 }
3442}
3443
3444/**
3445 * amdgpu_device_has_dc_support - check if dc is supported
3446 *
3447 * @adev: amdgpu_device pointer
3448 *
3449 * Returns true for supported, false for not supported
3450 */
3451bool_Bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3452{
3453 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) ||
3454 adev->enable_virtual_display ||
3455 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3456 return false0;
3457
3458 return amdgpu_device_asic_has_dc_support(adev->asic_type);
3459}
3460
3461static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3462{
3463 struct amdgpu_device *adev =
3464 container_of(__work, struct amdgpu_device, xgmi_reset_work)({ const __typeof( ((struct amdgpu_device *)0)->xgmi_reset_work
) *__mptr = (__work); (struct amdgpu_device *)( (char *)__mptr
- __builtin_offsetof(struct amdgpu_device, xgmi_reset_work) )
;})
;
3465 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3466
3467 /* It's a bug to not have a hive within this function */
3468 if (WARN_ON(!hive)({ int __ret = !!(!hive); if (__ret) printf("WARNING %s failed at %s:%d\n"
, "!hive", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c"
, 3468); __builtin_expect(!!(__ret), 0); })
)
3469 return;
3470
3471 /*
3472 * Use task barrier to synchronize all xgmi reset works across the
3473 * hive. task_barrier_enter and task_barrier_exit will block
3474 * until all the threads running the xgmi reset works reach
3475 * those points. task_barrier_full will do both blocks.
3476 */
3477 if (amdgpu_asic_reset_method(adev)(adev)->asic_funcs->reset_method((adev)) == AMD_RESET_METHOD_BACO) {
3478
3479 task_barrier_enter(&hive->tb);
3480 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3481
3482 if (adev->asic_reset_res)
3483 goto fail;
3484
3485 task_barrier_exit(&hive->tb);
3486 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3487
3488 if (adev->asic_reset_res)
3489 goto fail;
3490
3491 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3492 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3493 adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3494 } else {
3495
3496 task_barrier_full(&hive->tb);
3497 adev->asic_reset_res = amdgpu_asic_reset(adev)(adev)->asic_funcs->reset((adev));
3498 }
3499
3500fail:
3501 if (adev->asic_reset_res)
3502 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",printk("\0014" "[" "drm" "] " "ASIC reset failed with error, %d for drm dev, %s"
, adev->asic_reset_res, adev_to_drm(adev)->unique)
3503 adev->asic_reset_res, adev_to_drm(adev)->unique)printk("\0014" "[" "drm" "] " "ASIC reset failed with error, %d for drm dev, %s"
, adev->asic_reset_res, adev_to_drm(adev)->unique)
;
3504 amdgpu_put_xgmi_hive(hive);
3505}
3506
3507static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3508{
3509 char *input = amdgpu_lockup_timeout;
3510 char *timeout_setting = NULL((void *)0);
3511 int index = 0;
3512 long timeout;
3513 int ret = 0;
3514
3515 /*
3516 * By default timeout for non compute jobs is 10000
3517 * and 60000 for compute jobs.
3518 * In SR-IOV or passthrough mode, timeout for compute
3519 * jobs are 60000 by default.
3520 */
3521 adev->gfx_timeout = msecs_to_jiffies(10000)(((uint64_t)(10000)) * hz / 1000);
3522 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3523 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
3524 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev)((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) ?
3525 msecs_to_jiffies(60000)(((uint64_t)(60000)) * hz / 1000) : msecs_to_jiffies(10000)(((uint64_t)(10000)) * hz / 1000);
3526 else
3527 adev->compute_timeout = msecs_to_jiffies(60000)(((uint64_t)(60000)) * hz / 1000);
3528
3529#ifdef notyet
3530 if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH256)) {
3531 while ((timeout_setting = strsep(&input, ",")) &&
3532 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH256)) {
3533 ret = kstrtol(timeout_setting, 0, &timeout);
3534 if (ret)
3535 return ret;
3536
3537 if (timeout == 0) {
3538 index++;
3539 continue;
3540 } else if (timeout < 0) {
3541 timeout = MAX_SCHEDULE_TIMEOUT(0x7fffffff);
3542 dev_warn(adev->dev, "lockup timeout disabled")printf("drm:pid%d:%s *WARNING* " "lockup timeout disabled", (
{struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3543 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3544 } else {
3545 timeout = msecs_to_jiffies(timeout)(((uint64_t)(timeout)) * hz / 1000);
3546 }
3547
3548 switch (index++) {
3549 case 0:
3550 adev->gfx_timeout = timeout;
3551 break;
3552 case 1:
3553 adev->compute_timeout = timeout;
3554 break;
3555 case 2:
3556 adev->sdma_timeout = timeout;
3557 break;
3558 case 3:
3559 adev->video_timeout = timeout;
3560 break;
3561 default:
3562 break;
3563 }
3564 }
3565 /*
3566 * There is only one value specified and
3567 * it should apply to all non-compute jobs.
3568 */
3569 if (index == 1) {
3570 adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3571 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) || amdgpu_passthrough(adev)((adev)->virt.caps & (1 << 3)))
3572 adev->compute_timeout = adev->gfx_timeout;
3573 }
3574 }
3575#endif
3576
3577 return ret;
3578}
3579
3580/**
3581 * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3582 *
3583 * @adev: amdgpu_device pointer
3584 *
3585 * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3586 */
3587static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3588{
3589#ifdef notyet
3590 struct iommu_domain *domain;
3591
3592 domain = iommu_get_domain_for_dev(adev->dev);
3593 if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3594#endif
3595 adev->ram_is_direct_mapped = true1;
3596}
3597
3598static const struct attribute *amdgpu_dev_attributes[] = {
3599 &dev_attr_product_name.attr,
3600 &dev_attr_product_number.attr,
3601 &dev_attr_serial_number.attr,
3602 &dev_attr_pcie_replay_count.attr,
3603 NULL((void *)0)
3604};
3605
3606/**
3607 * amdgpu_device_init - initialize the driver
3608 *
3609 * @adev: amdgpu_device pointer
3610 * @flags: driver flags
3611 *
3612 * Initializes the driver info and hw (all asics).
3613 * Returns 0 for success or an error on failure.
3614 * Called at driver startup.
3615 */
3616int amdgpu_device_init(struct amdgpu_device *adev,
3617 uint32_t flags)
3618{
3619 struct drm_device *ddev = adev_to_drm(adev);
3620 struct pci_dev *pdev = adev->pdev;
3621 int r, i;
3622 bool_Bool px = false0;
3623 u32 max_MBps;
3624 int tmp;
3625
3626 adev->shutdown = false0;
3627 adev->flags = flags;
3628
3629 if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3630 adev->asic_type = amdgpu_force_asic_type;
3631 else
3632 adev->asic_type = flags & AMD_ASIC_MASK;
3633
3634 adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT100000;
3635 if (amdgpu_emu_mode == 1)
3636 adev->usec_timeout *= 10;
3637 adev->gmc.gart_size = 512 * 1024 * 1024;
3638 adev->accel_working = false0;
3639 adev->num_rings = 0;
3640 RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub())do { (adev->gang_submit) = (dma_fence_get_stub()); } while
(0)
;
3641 adev->mman.buffer_funcs = NULL((void *)0);
3642 adev->mman.buffer_funcs_ring = NULL((void *)0);
3643 adev->vm_manager.vm_pte_funcs = NULL((void *)0);
3644 adev->vm_manager.vm_pte_num_scheds = 0;
3645 adev->gmc.gmc_funcs = NULL((void *)0);
3646 adev->harvest_ip_mask = 0x0;
3647 adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS28);
3648 bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES128);
3649
3650 adev->smc_rreg = &amdgpu_invalid_rreg;
3651 adev->smc_wreg = &amdgpu_invalid_wreg;
3652 adev->pcie_rreg = &amdgpu_invalid_rreg;
3653 adev->pcie_wreg = &amdgpu_invalid_wreg;
3654 adev->pciep_rreg = &amdgpu_invalid_rreg;
3655 adev->pciep_wreg = &amdgpu_invalid_wreg;
3656 adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3657 adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3658 adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3659 adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3660 adev->didt_rreg = &amdgpu_invalid_rreg;
3661 adev->didt_wreg = &amdgpu_invalid_wreg;
3662 adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3663 adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3664 adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3665 adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3666
3667 DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",printk("\0016" "[" "drm" "] " "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n"
, amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev
->device, pdev->subsystem_vendor, pdev->subsystem_device
, pdev->revision)
3668 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,printk("\0016" "[" "drm" "] " "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n"
, amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev
->device, pdev->subsystem_vendor, pdev->subsystem_device
, pdev->revision)
3669 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision)printk("\0016" "[" "drm" "] " "initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n"
, amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev
->device, pdev->subsystem_vendor, pdev->subsystem_device
, pdev->revision)
;
3670
3671 /* mutex initialization are all done here so we
3672 * can recall function without having locking issues */
3673 rw_init(&adev->firmware.mutex, "agfw")_rw_init_flags(&adev->firmware.mutex, "agfw", 0, ((void
*)0))
;
3674 rw_init(&adev->pm.mutex, "agpm")_rw_init_flags(&adev->pm.mutex, "agpm", 0, ((void *)0)
)
;
3675 rw_init(&adev->gfx.gpu_clock_mutex, "gfxclk")_rw_init_flags(&adev->gfx.gpu_clock_mutex, "gfxclk", 0
, ((void *)0))
;
3676 rw_init(&adev->srbm_mutex, "srbm")_rw_init_flags(&adev->srbm_mutex, "srbm", 0, ((void *)
0))
;
3677 rw_init(&adev->gfx.pipe_reserve_mutex, "pipers")_rw_init_flags(&adev->gfx.pipe_reserve_mutex, "pipers"
, 0, ((void *)0))
;
3678 rw_init(&adev->gfx.gfx_off_mutex, "gfxoff")_rw_init_flags(&adev->gfx.gfx_off_mutex, "gfxoff", 0, (
(void *)0))
;
3679 rw_init(&adev->grbm_idx_mutex, "grbmidx")_rw_init_flags(&adev->grbm_idx_mutex, "grbmidx", 0, ((
void *)0))
;
3680 rw_init(&adev->mn_lock, "agpumn")_rw_init_flags(&adev->mn_lock, "agpumn", 0, ((void *)0
))
;
3681 rw_init(&adev->virt.vf_errors.lock, "vferr")_rw_init_flags(&adev->virt.vf_errors.lock, "vferr", 0,
((void *)0))
;
3682 hash_init(adev->mn_hash)__hash_init(adev->mn_hash, (sizeof((adev->mn_hash)) / sizeof
((adev->mn_hash)[0])))
;
3683 rw_init(&adev->psp.mutex, "agpsp")_rw_init_flags(&adev->psp.mutex, "agpsp", 0, ((void *)
0))
;
3684 rw_init(&adev->notifier_lock, "agnf")_rw_init_flags(&adev->notifier_lock, "agnf", 0, ((void
*)0))
;
3685 rw_init(&adev->pm.stable_pstate_ctx_lock, "agps")_rw_init_flags(&adev->pm.stable_pstate_ctx_lock, "agps"
, 0, ((void *)0))
;
3686 rw_init(&adev->benchmark_mutex, "agbm")_rw_init_flags(&adev->benchmark_mutex, "agbm", 0, ((void
*)0))
;
3687
3688 amdgpu_device_init_apu_flags(adev);
3689
3690 r = amdgpu_device_check_arguments(adev);
3691 if (r)
3692 return r;
3693
3694 mtx_init(&adev->mmio_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
mmio_idx_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
3695 mtx_init(&adev->smc_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
smc_idx_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
3696 mtx_init(&adev->pcie_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
pcie_idx_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
3697 mtx_init(&adev->uvd_ctx_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
uvd_ctx_idx_lock), ((((0x9)) > 0x0 && ((0x9)) <
0x9) ? 0x9 : ((0x9)))); } while (0)
;
3698 mtx_init(&adev->didt_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
didt_idx_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
3699 mtx_init(&adev->gc_cac_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
gc_cac_idx_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
3700 mtx_init(&adev->se_cac_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
se_cac_idx_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
3701 mtx_init(&adev->audio_endpt_idx_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
audio_endpt_idx_lock), ((((0x9)) > 0x0 && ((0x9)) <
0x9) ? 0x9 : ((0x9)))); } while (0)
;
3702 mtx_init(&adev->mm_stats.lock, IPL_NONE)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev->
mm_stats.lock), ((((0x0)) > 0x0 && ((0x0)) < 0x9
) ? 0x9 : ((0x0)))); } while (0)
;
3703
3704 INIT_LIST_HEAD(&adev->shadow_list);
3705 rw_init(&adev->shadow_list_lock, "sdwlst")_rw_init_flags(&adev->shadow_list_lock, "sdwlst", 0, (
(void *)0))
;
3706
3707 INIT_LIST_HEAD(&adev->reset_list);
3708
3709 INIT_LIST_HEAD(&adev->ras_list);
3710
3711 INIT_DELAYED_WORK(&adev->delayed_init_work,
3712 amdgpu_device_delayed_init_work_handler);
3713 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3714 amdgpu_device_delay_enable_gfx_off);
3715
3716 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3717
3718 adev->gfx.gfx_off_req_count = 1;
3719 adev->gfx.gfx_off_residency = 0;
3720 adev->gfx.gfx_off_entrycount = 0;
3721 adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3722
3723 atomic_set(&adev->throttling_logging_enabled, 1)({ typeof(*(&adev->throttling_logging_enabled)) __tmp =
((1)); *(volatile typeof(*(&adev->throttling_logging_enabled
)) *)&(*(&adev->throttling_logging_enabled)) = __tmp
; __tmp; })
;
3724 /*
3725 * If throttling continues, logging will be performed every minute
3726 * to avoid log flooding. "-1" is subtracted since the thermal
3727 * throttling interrupt comes every second. Thus, the total logging
3728 * interval is 59 seconds(retelimited printk interval) + 1(waiting
3729 * for throttling interrupt) = 60 seconds.
3730 */
3731 ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZhz, 1);
3732 ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE(1 << 0));
3733
3734#ifdef __linux__
3735 /* Registers mapping */
3736 /* TODO: block userspace mapping of io register */
3737 if (adev->asic_type >= CHIP_BONAIRE) {
3738 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3739 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3740 } else {
3741 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3742 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3743 }
3744
3745 for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3746 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN)({ typeof(*(&adev->pm.pwr_state[i])) __tmp = ((POWER_STATE_UNKNOWN
)); *(volatile typeof(*(&adev->pm.pwr_state[i])) *)&
(*(&adev->pm.pwr_state[i])) = __tmp; __tmp; })
;
3747
3748 adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3749 if (adev->rmmio == NULL((void *)0)) {
3750 return -ENOMEM12;
3751 }
3752#endif
3753 DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base)printk("\0016" "[" "drm" "] " "register mmio base: 0x%08X\n",
(uint32_t)adev->rmmio_base)
;
3754 DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size)printk("\0016" "[" "drm" "] " "register mmio size: %u\n", (unsigned
)adev->rmmio_size)
;
3755
3756 amdgpu_device_get_pcie_info(adev);
3757
3758 if (amdgpu_mcbp)
3759 DRM_INFO("MCBP is enabled\n")printk("\0016" "[" "drm" "] " "MCBP is enabled\n");
3760
3761 /*
3762 * Reset domain needs to be present early, before XGMI hive discovered
3763 * (if any) and intitialized to use reset sem and in_gpu reset flag
3764 * early on during init and before calling to RREG32.
3765 */
3766 adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3767 if (!adev->reset_domain)
3768 return -ENOMEM12;
3769
3770 /* detect hw virtualization here */
3771 amdgpu_detect_virtualization(adev);
3772
3773 r = amdgpu_device_get_job_timeout_settings(adev);
3774 if (r) {
3775 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n")printf("drm:pid%d:%s *ERROR* " "invalid lockup_timeout parameter syntax\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3776 return r;
3777 }
3778
3779 /* early init functions */
3780 r = amdgpu_device_ip_early_init(adev);
3781 if (r)
3782 return r;
3783
3784 /* Get rid of things like offb */
3785 r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3786 if (r)
3787 return r;
3788
3789 /* Enable TMZ based on IP_VERSION */
3790 amdgpu_gmc_tmz_set(adev);
3791
3792 amdgpu_gmc_noretry_set(adev);
3793 /* Need to get xgmi info early to decide the reset behavior*/
3794 if (adev->gmc.xgmi.supported) {
3795 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3796 if (r)
3797 return r;
3798 }
3799
3800 /* enable PCIE atomic ops */
3801#ifdef notyet
3802 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
3803 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3804 adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3805 (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3806 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3807 * internal path natively support atomics, set have_atomics_support to true.
3808 */
3809 else if ((adev->flags & AMD_IS_APU) &&
3810 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)(((9) << 16) | ((0) << 8) | (0))))
3811 adev->have_atomics_support = true1;
3812 else
3813 adev->have_atomics_support =
3814 !pci_enable_atomic_ops_to_root(adev->pdev,
3815 PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3816 PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3817 if (!adev->have_atomics_support)
3818 dev_info(adev->dev, "PCIE atomic ops is not supported\n")do { } while(0);
3819#else
3820 /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
3821 * internal path natively support atomics, set have_atomics_support to true.
3822 */
3823 if ((adev->flags & AMD_IS_APU) &&
3824 (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)(((9) << 16) | ((0) << 8) | (0))))
3825 adev->have_atomics_support = true1;
3826 else
3827 adev->have_atomics_support = false0;
3828#endif
3829
3830 /* doorbell bar mapping and doorbell index init*/
3831 amdgpu_device_doorbell_init(adev);
3832
3833 if (amdgpu_emu_mode == 1) {
3834 /* post the asic on emulation mode */
3835 emu_soc_asic_init(adev);
3836 goto fence_driver_init;
3837 }
3838
3839 amdgpu_reset_init(adev);
3840
3841 /* detect if we are with an SRIOV vbios */
3842 amdgpu_device_detect_sriov_bios(adev);
3843
3844 /* check if we need to reset the asic
3845 * E.g., driver was not cleanly unloaded previously, etc.
3846 */
3847 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) && amdgpu_asic_need_reset_on_init(adev)(adev)->asic_funcs->need_reset_on_init((adev))) {
3848 if (adev->gmc.xgmi.num_physical_nodes) {
3849 dev_info(adev->dev, "Pending hive reset.\n")do { } while(0);
3850 adev->gmc.xgmi.pending_reset = true1;
3851 /* Only need to init necessary block for SMU to handle the reset */
3852 for (i = 0; i < adev->num_ip_blocks; i++) {
3853 if (!adev->ip_blocks[i].status.valid)
3854 continue;
3855 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3856 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3857 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3858 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3859 DRM_DEBUG("IP %s disabled for hw_init.\n",___drm_dbg(((void *)0), DRM_UT_CORE, "IP %s disabled for hw_init.\n"
, adev->ip_blocks[i].version->funcs->name)
3860 adev->ip_blocks[i].version->funcs->name)___drm_dbg(((void *)0), DRM_UT_CORE, "IP %s disabled for hw_init.\n"
, adev->ip_blocks[i].version->funcs->name)
;
3861 adev->ip_blocks[i].status.hw = true1;
3862 }
3863 }
3864 } else {
3865 tmp = amdgpu_reset_method;
3866 /* It should do a default reset when loading or reloading the driver,
3867 * regardless of the module parameter reset_method.
3868 */
3869 amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3870 r = amdgpu_asic_reset(adev)(adev)->asic_funcs->reset((adev));
3871 amdgpu_reset_method = tmp;
3872 if (r) {
3873 dev_err(adev->dev, "asic reset on init failed\n")printf("drm:pid%d:%s *ERROR* " "asic reset on init failed\n",
({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3874 goto failed;
3875 }
3876 }
3877 }
3878
3879 pci_enable_pcie_error_reporting(adev->pdev);
3880
3881 /* Post card if necessary */
3882 if (amdgpu_device_need_post(adev)) {
3883 if (!adev->bios) {
3884 dev_err(adev->dev, "no vBIOS found\n")printf("drm:pid%d:%s *ERROR* " "no vBIOS found\n", ({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->
ps_pid, __func__)
;
3885 r = -EINVAL22;
3886 goto failed;
3887 }
3888 DRM_INFO("GPU posting now...\n")printk("\0016" "[" "drm" "] " "GPU posting now...\n");
3889 r = amdgpu_device_asic_init(adev);
3890 if (r) {
3891 dev_err(adev->dev, "gpu post error!\n")printf("drm:pid%d:%s *ERROR* " "gpu post error!\n", ({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->
ps_pid, __func__)
;
3892 goto failed;
3893 }
3894 }
3895
3896 if (adev->is_atom_fw) {
3897 /* Initialize clocks */
3898 r = amdgpu_atomfirmware_get_clock_info(adev);
3899 if (r) {
3900 dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_atomfirmware_get_clock_info failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3901 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3902 goto failed;
3903 }
3904 } else {
3905 /* Initialize clocks */
3906 r = amdgpu_atombios_get_clock_info(adev);
3907 if (r) {
3908 dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_atombios_get_clock_info failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3909 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3910 goto failed;
3911 }
3912 /* init i2c buses */
3913 if (!amdgpu_device_has_dc_support(adev))
3914 amdgpu_atombios_i2c_init(adev);
3915 }
3916
3917fence_driver_init:
3918 /* Fence driver */
3919 r = amdgpu_fence_driver_sw_init(adev);
3920 if (r) {
3921 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_fence_driver_sw_init failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3922 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3923 goto failed;
3924 }
3925
3926 /* init the mode config */
3927 drm_mode_config_init(adev_to_drm(adev));
3928
3929 r = amdgpu_device_ip_init(adev);
3930 if (r) {
3931 dev_err(adev->dev, "amdgpu_device_ip_init failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_device_ip_init failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
3932 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3933 goto release_ras_con;
3934 }
3935
3936 amdgpu_fence_driver_hw_init(adev);
3937
3938 dev_info(adev->dev,do { } while(0)
3939 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",do { } while(0)
3940 adev->gfx.config.max_shader_engines,do { } while(0)
3941 adev->gfx.config.max_sh_per_se,do { } while(0)
3942 adev->gfx.config.max_cu_per_sh,do { } while(0)
3943 adev->gfx.cu_info.number)do { } while(0);
3944
3945#ifdef __OpenBSD__1
3946{
3947 const char *chip_name;
3948 uint32_t version = adev->ip_versions[GC_HWIP][0];
3949 int maj, min, rev;
3950
3951 switch (adev->asic_type) {
3952 case CHIP_RAVEN:
3953 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
3954 chip_name = "RAVEN2";
3955 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
3956 chip_name = "PICASSO";
3957 else
3958 chip_name = "RAVEN";
3959 break;
3960 case CHIP_RENOIR:
3961 if (adev->apu_flags & AMD_APU_IS_RENOIR)
3962 chip_name = "RENOIR";
3963 else
3964 chip_name = "GREEN_SARDINE";
3965 break;
3966 default:
3967 chip_name = amdgpu_asic_name[adev->asic_type];
3968 }
3969
3970 printf("%s: %s", adev->self.dv_xname, chip_name);
3971 /* show graphics/compute ip block version, not set on < GFX9 */
3972 if (version) {
3973 maj = IP_VERSION_MAJ(version)((version) >> 16);
3974 min = IP_VERSION_MIN(version)(((version) >> 8) & 0xFF);
3975 rev = IP_VERSION_REV(version)((version) & 0xFF);
3976 printf(" GC %d.%d.%d", maj, min, rev);
3977 }
3978 printf(" %d CU rev 0x%02x\n", adev->gfx.cu_info.number, adev->rev_id);
3979}
3980#endif
3981
3982 adev->accel_working = true1;
3983
3984 amdgpu_vm_check_compute_bug(adev);
3985
3986 /* Initialize the buffer migration limit. */
3987 if (amdgpu_moverate >= 0)
3988 max_MBps = amdgpu_moverate;
3989 else
3990 max_MBps = 8; /* Allow 8 MB/s. */
3991 /* Get a log2 for easy divisions. */
3992 adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps))((sizeof((((1u)>(max_MBps))?(1u):(max_MBps))) <= 4) ? (
fls((((1u)>(max_MBps))?(1u):(max_MBps))) - 1) : (flsl((((1u
)>(max_MBps))?(1u):(max_MBps))) - 1))
;
3993
3994 r = amdgpu_pm_sysfs_init(adev);
3995 if (r) {
3996 adev->pm_sysfs_en = false0;
3997 DRM_ERROR("registering pm debugfs failed (%d).\n", r)__drm_err("registering pm debugfs failed (%d).\n", r);
3998 } else
3999 adev->pm_sysfs_en = true1;
4000
4001 r = amdgpu_ucode_sysfs_init(adev);
4002 if (r) {
4003 adev->ucode_sysfs_en = false0;
4004 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r)__drm_err("Creating firmware sysfs failed (%d).\n", r);
4005 } else
4006 adev->ucode_sysfs_en = true1;
4007
4008 r = amdgpu_psp_sysfs_init(adev);
4009 if (r) {
4010 adev->psp_sysfs_en = false0;
4011 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
4012 DRM_ERROR("Creating psp sysfs failed\n")__drm_err("Creating psp sysfs failed\n");
4013 } else
4014 adev->psp_sysfs_en = true1;
4015
4016 /*
4017 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4018 * Otherwise the mgpu fan boost feature will be skipped due to the
4019 * gpu instance is counted less.
4020 */
4021 amdgpu_register_gpu_instance(adev);
4022
4023 /* enable clockgating, etc. after ib tests, etc. since some blocks require
4024 * explicit gating rather than handling it automatically.
4025 */
4026 if (!adev->gmc.xgmi.pending_reset) {
4027 r = amdgpu_device_ip_late_init(adev);
4028 if (r) {
4029 dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_device_ip_late_init failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4030 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4031 goto release_ras_con;
4032 }
4033 /* must succeed. */
4034 amdgpu_ras_resume(adev);
4035 queue_delayed_work(system_wq, &adev->delayed_init_work,
4036 msecs_to_jiffies(AMDGPU_RESUME_MS)(((uint64_t)(2000)) * hz / 1000));
4037 }
4038
4039 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4040 amdgpu_virt_release_full_gpu(adev, true1);
4041 flush_delayed_work(&adev->delayed_init_work);
4042 }
4043
4044 r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes)0;
4045 if (r)
4046 dev_err(adev->dev, "Could not create amdgpu device attr\n")printf("drm:pid%d:%s *ERROR* " "Could not create amdgpu device attr\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4047
4048 if (IS_ENABLED(CONFIG_PERF_EVENTS)0)
4049 r = amdgpu_pmu_init(adev);
4050 if (r)
4051 dev_err(adev->dev, "amdgpu_pmu_init failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu_pmu_init failed\n", ({
struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4052
4053 /* Have stored pci confspace at hand for restore in sudden PCI error */
4054 if (amdgpu_device_cache_pci_state(adev->pdev))
4055 pci_restore_state(pdev);
4056
4057 /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4058 /* this will fail for cards that aren't VGA class devices, just
4059 * ignore it */
4060#ifdef notyet
4061 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA((0x03 << 8) | 0x00))
4062 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4063#endif
4064
4065 px = amdgpu_device_supports_px(ddev);
4066
4067 if (px || (!dev_is_removable(&adev->pdev->dev)0 &&
4068 apple_gmux_detect(NULL((void *)0), NULL((void *)0))))
4069 vga_switcheroo_register_client(adev->pdev,
4070 &amdgpu_switcheroo_ops, px);
4071
4072 if (px)
4073 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4074
4075 if (adev->gmc.xgmi.pending_reset)
4076 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4077 msecs_to_jiffies(AMDGPU_RESUME_MS)(((uint64_t)(2000)) * hz / 1000));
4078
4079 amdgpu_device_check_iommu_direct_map(adev);
4080
4081 return 0;
4082
4083release_ras_con:
4084 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
4085 amdgpu_virt_release_full_gpu(adev, true1);
4086
4087 /* failed in exclusive mode due to timeout */
4088 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) &&
4089 !amdgpu_sriov_runtime(adev)((adev)->virt.caps & (1 << 4)) &&
4090 amdgpu_virt_mmio_blocked(adev) &&
4091 !amdgpu_virt_wait_reset(adev)) {
4092 dev_err(adev->dev, "VF exclusive mode timeout\n")printf("drm:pid%d:%s *ERROR* " "VF exclusive mode timeout\n",
({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4093 /* Don't send request since VF is inactive. */
4094 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME(1 << 4);
4095 adev->virt.ops = NULL((void *)0);
4096 r = -EAGAIN35;
4097 }
4098 amdgpu_release_ras_context(adev);
4099
4100failed:
4101 amdgpu_vf_error_trans_all(adev);
4102
4103 return r;
4104}
4105
4106static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4107{
4108 STUB()do { printf("%s: stub\n", __func__); } while(0);
4109#ifdef notyet
4110 /* Clear all CPU mappings pointing to this device */
4111 unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4112#endif
4113
4114 /* Unmap all mapped bars - Doorbell, registers and VRAM */
4115 amdgpu_device_doorbell_fini(adev);
4116
4117#ifdef __linux__
4118 iounmap(adev->rmmio);
4119 adev->rmmio = NULL((void *)0);
4120 if (adev->mman.aper_base_kaddr)
4121 iounmap(adev->mman.aper_base_kaddr);
4122 adev->mman.aper_base_kaddr = NULL((void *)0);
4123#else
4124 if (adev->rmmio_size > 0)
4125 bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh,
4126 adev->rmmio_size);
4127 adev->rmmio_size = 0;
4128 adev->rmmio = NULL((void *)0);
4129 if (adev->mman.aper_base_kaddr)
4130 bus_space_unmap(adev->memt, adev->mman.aper_bsh,
4131 adev->gmc.visible_vram_size);
4132 adev->mman.aper_base_kaddr = NULL((void *)0);
4133#endif
4134
4135 /* Memory manager related */
4136 if (!adev->gmc.xgmi.connected_to_cpu) {
4137#ifdef __linux__
4138 arch_phys_wc_del(adev->gmc.vram_mtrr);
4139 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4140#else
4141 drm_mtrr_del(0, adev->gmc.aper_base, adev->gmc.aper_size, DRM_MTRR_WC(1<<1));
4142#endif
4143 }
4144}
4145
4146/**
4147 * amdgpu_device_fini_hw - tear down the driver
4148 *
4149 * @adev: amdgpu_device pointer
4150 *
4151 * Tear down the driver info (all asics).
4152 * Called at driver shutdown.
4153 */
4154void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4155{
4156 dev_info(adev->dev, "amdgpu: finishing device.\n")do { } while(0);
4157 flush_delayed_work(&adev->delayed_init_work);
4158 adev->shutdown = true1;
4159
4160 /* make sure IB test finished before entering exclusive mode
4161 * to avoid preemption on IB test
4162 * */
4163 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4164 amdgpu_virt_request_full_gpu(adev, false0);
4165 amdgpu_virt_fini_data_exchange(adev);
4166 }
4167
4168 /* disable all interrupts */
4169 amdgpu_irq_disable_all(adev);
4170 if (adev->mode_info.mode_config_initialized){
4171 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4172 drm_helper_force_disable_all(adev_to_drm(adev));
4173 else
4174 drm_atomic_helper_shutdown(adev_to_drm(adev));
4175 }
4176 amdgpu_fence_driver_hw_fini(adev);
4177
4178 if (adev->mman.initialized) {
4179 flush_delayed_work(&adev->mman.bdev.wq);
4180 ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
4181 }
4182
4183 if (adev->pm_sysfs_en)
4184 amdgpu_pm_sysfs_fini(adev);
4185 if (adev->ucode_sysfs_en)
4186 amdgpu_ucode_sysfs_fini(adev);
4187 if (adev->psp_sysfs_en)
4188 amdgpu_psp_sysfs_fini(adev);
4189 sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4190
4191 /* disable ras feature must before hw fini */
4192 amdgpu_ras_pre_fini(adev);
4193
4194 amdgpu_device_ip_fini_early(adev);
4195
4196 amdgpu_irq_fini_hw(adev);
4197
4198 if (adev->mman.initialized)
4199 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4200
4201 amdgpu_gart_dummy_page_fini(adev);
4202
4203 if (drm_dev_is_unplugged(adev_to_drm(adev)))
4204 amdgpu_device_unmap_mmio(adev);
4205
4206}
4207
4208void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4209{
4210 int idx;
4211 bool_Bool px;
4212
4213 amdgpu_fence_driver_sw_fini(adev);
4214 amdgpu_device_ip_fini(adev);
4215 release_firmware(adev->firmware.gpu_info_fw);
4216 adev->firmware.gpu_info_fw = NULL((void *)0);
4217 adev->accel_working = false0;
4218 dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)(adev->gang_submit));
4219
4220 amdgpu_reset_fini(adev);
4221
4222 /* free i2c buses */
4223 if (!amdgpu_device_has_dc_support(adev))
4224 amdgpu_i2c_fini(adev);
4225
4226 if (amdgpu_emu_mode != 1)
4227 amdgpu_atombios_fini(adev);
4228
4229 kfree(adev->bios);
4230 adev->bios = NULL((void *)0);
4231
4232 px = amdgpu_device_supports_px(adev_to_drm(adev));
4233
4234 if (px || (!dev_is_removable(&adev->pdev->dev)0 &&
4235 apple_gmux_detect(NULL((void *)0), NULL((void *)0))))
4236 vga_switcheroo_unregister_client(adev->pdev);
4237
4238 if (px)
4239 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4240
4241 if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA((0x03 << 8) | 0x00))
4242 vga_client_unregister(adev->pdev);
4243
4244 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4245#ifdef __linux__
4246 iounmap(adev->rmmio);
4247 adev->rmmio = NULL((void *)0);
4248#else
4249 if (adev->rmmio_size > 0)
4250 bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh,
4251 adev->rmmio_size);
4252 adev->rmmio_size = 0;
4253 adev->rmmio = NULL((void *)0);
4254#endif
4255 amdgpu_device_doorbell_fini(adev);
4256 drm_dev_exit(idx);
4257 }
4258
4259 if (IS_ENABLED(CONFIG_PERF_EVENTS)0)
4260 amdgpu_pmu_fini(adev);
4261 if (adev->mman.discovery_bin)
4262 amdgpu_discovery_fini(adev);
4263
4264 amdgpu_reset_put_reset_domain(adev->reset_domain);
4265 adev->reset_domain = NULL((void *)0);
4266
4267 kfree(adev->pci_state);
4268
4269}
4270
4271/**
4272 * amdgpu_device_evict_resources - evict device resources
4273 * @adev: amdgpu device object
4274 *
4275 * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4276 * of the vram memory type. Mainly used for evicting device resources
4277 * at suspend time.
4278 *
4279 */
4280static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4281{
4282 int ret;
4283
4284 /* No need to evict vram on APUs for suspend to ram or s2idle */
4285 if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4286 return 0;
4287
4288 ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM2);
4289 if (ret)
4290 DRM_WARN("evicting device resources failed\n")printk("\0014" "[" "drm" "] " "evicting device resources failed\n"
)
;
4291 return ret;
4292}
4293
4294/*
4295 * Suspend & resume.
4296 */
4297/**
4298 * amdgpu_device_suspend - initiate device suspend
4299 *
4300 * @dev: drm dev pointer
4301 * @fbcon : notify the fbdev of suspend
4302 *
4303 * Puts the hw in the suspend state (all asics).
4304 * Returns 0 for success or an error on failure.
4305 * Called at driver suspend.
4306 */
4307int amdgpu_device_suspend(struct drm_device *dev, bool_Bool fbcon)
4308{
4309 struct amdgpu_device *adev = drm_to_adev(dev);
4310 int r = 0;
4311
4312 if (adev->shutdown)
4313 return 0;
4314
4315#ifdef notyet
4316 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4317 return 0;
4318#endif
4319
4320 adev->in_suspend = true1;
4321
4322 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4323 amdgpu_virt_fini_data_exchange(adev);
4324 r = amdgpu_virt_request_full_gpu(adev, false0);
4325 if (r)
4326 return r;
4327 }
4328
4329 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4330 DRM_WARN("smart shift update failed\n")printk("\0014" "[" "drm" "] " "smart shift update failed\n");
4331
4332 drm_kms_helper_poll_disable(dev);
4333
4334 if (fbcon)
4335 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true1);
4336
4337 cancel_delayed_work_sync(&adev->delayed_init_work);
4338 flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4339
4340 amdgpu_ras_suspend(adev);
4341
4342 amdgpu_device_ip_suspend_phase1(adev);
4343
4344 if (!adev->in_s0ix)
4345 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4346
4347 r = amdgpu_device_evict_resources(adev);
4348 if (r)
4349 return r;
4350
4351 amdgpu_fence_driver_hw_fini(adev);
4352
4353 amdgpu_device_ip_suspend_phase2(adev);
4354
4355 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
4356 amdgpu_virt_release_full_gpu(adev, false0);
4357
4358 return 0;
4359}
4360
4361/**
4362 * amdgpu_device_resume - initiate device resume
4363 *
4364 * @dev: drm dev pointer
4365 * @fbcon : notify the fbdev of resume
4366 *
4367 * Bring the hw back to operating state (all asics).
4368 * Returns 0 for success or an error on failure.
4369 * Called at driver resume.
4370 */
4371int amdgpu_device_resume(struct drm_device *dev, bool_Bool fbcon)
4372{
4373 struct amdgpu_device *adev = drm_to_adev(dev);
4374 int r = 0;
4375
4376 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4377 r = amdgpu_virt_request_full_gpu(adev, true1);
4378 if (r)
4379 return r;
4380 }
4381
4382#ifdef notyet
4383 if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4384 return 0;
4385#endif
4386
4387 if (adev->in_s0ix)
4388 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4389
4390 /* post card */
4391 if (amdgpu_device_need_post(adev)) {
4392 r = amdgpu_device_asic_init(adev);
4393 if (r)
4394 dev_err(adev->dev, "amdgpu asic init failed\n")printf("drm:pid%d:%s *ERROR* " "amdgpu asic init failed\n", (
{struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4395 }
4396
4397 r = amdgpu_device_ip_resume(adev);
4398
4399 /* no matter what r is, always need to properly release full GPU */
4400 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4401 amdgpu_virt_init_data_exchange(adev);
4402 amdgpu_virt_release_full_gpu(adev, true1);
4403 }
4404
4405 if (r) {
4406 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r)printf("drm:pid%d:%s *ERROR* " "amdgpu_device_ip_resume failed (%d).\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r)
;
4407 return r;
4408 }
4409 amdgpu_fence_driver_hw_init(adev);
4410
4411 r = amdgpu_device_ip_late_init(adev);
4412 if (r)
4413 return r;
4414
4415 queue_delayed_work(system_wq, &adev->delayed_init_work,
4416 msecs_to_jiffies(AMDGPU_RESUME_MS)(((uint64_t)(2000)) * hz / 1000));
4417
4418 if (!adev->in_s0ix) {
4419 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4420 if (r)
4421 return r;
4422 }
4423
4424 /* Make sure IB tests flushed */
4425 flush_delayed_work(&adev->delayed_init_work);
4426
4427 if (adev->in_s0ix) {
4428 /* re-enable gfxoff after IP resume. This re-enables gfxoff after
4429 * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
4430 */
4431 amdgpu_gfx_off_ctrl(adev, true1);
4432 DRM_DEBUG("will enable gfxoff for the mission mode\n")___drm_dbg(((void *)0), DRM_UT_CORE, "will enable gfxoff for the mission mode\n"
)
;
4433 }
4434 if (fbcon)
4435 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false0);
4436
4437 drm_kms_helper_poll_enable(dev);
4438
4439 amdgpu_ras_resume(adev);
4440
4441 /*
4442 * Most of the connector probing functions try to acquire runtime pm
4443 * refs to ensure that the GPU is powered on when connector polling is
4444 * performed. Since we're calling this from a runtime PM callback,
4445 * trying to acquire rpm refs will cause us to deadlock.
4446 *
4447 * Since we're guaranteed to be holding the rpm lock, it's safe to
4448 * temporarily disable the rpm helpers so this doesn't deadlock us.
4449 */
4450#if defined(CONFIG_PM) && defined(__linux__)
4451 dev->dev->power.disable_depth++;
4452#endif
4453 if (!amdgpu_device_has_dc_support(adev))
4454 drm_helper_hpd_irq_event(dev);
4455 else
4456 drm_kms_helper_hotplug_event(dev);
4457#if defined(CONFIG_PM) && defined(__linux__)
4458 dev->dev->power.disable_depth--;
4459#endif
4460 adev->in_suspend = false0;
4461
4462 if (adev->enable_mes)
4463 amdgpu_mes_self_test(adev);
4464
4465 if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4466 DRM_WARN("smart shift update failed\n")printk("\0014" "[" "drm" "] " "smart shift update failed\n");
4467
4468 return 0;
4469}
4470
4471/**
4472 * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4473 *
4474 * @adev: amdgpu_device pointer
4475 *
4476 * The list of all the hardware IPs that make up the asic is walked and
4477 * the check_soft_reset callbacks are run. check_soft_reset determines
4478 * if the asic is still hung or not.
4479 * Returns true if any of the IPs are still in a hung state, false if not.
4480 */
4481static bool_Bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4482{
4483 int i;
4484 bool_Bool asic_hang = false0;
4485
4486 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
4487 return true1;
4488
4489 if (amdgpu_asic_need_full_reset(adev)(adev)->asic_funcs->need_full_reset((adev)))
4490 return true1;
4491
4492 for (i = 0; i < adev->num_ip_blocks; i++) {
4493 if (!adev->ip_blocks[i].status.valid)
4494 continue;
4495 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4496 adev->ip_blocks[i].status.hang =
4497 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4498 if (adev->ip_blocks[i].status.hang) {
4499 dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name)do { } while(0);
4500 asic_hang = true1;
4501 }
4502 }
4503 return asic_hang;
4504}
4505
4506/**
4507 * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4508 *
4509 * @adev: amdgpu_device pointer
4510 *
4511 * The list of all the hardware IPs that make up the asic is walked and the
4512 * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
4513 * handles any IP specific hardware or software state changes that are
4514 * necessary for a soft reset to succeed.
4515 * Returns 0 on success, negative error code on failure.
4516 */
4517static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4518{
4519 int i, r = 0;
4520
4521 for (i = 0; i < adev->num_ip_blocks; i++) {
4522 if (!adev->ip_blocks[i].status.valid)
4523 continue;
4524 if (adev->ip_blocks[i].status.hang &&
4525 adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4526 r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4527 if (r)
4528 return r;
4529 }
4530 }
4531
4532 return 0;
4533}
4534
4535/**
4536 * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4537 *
4538 * @adev: amdgpu_device pointer
4539 *
4540 * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
4541 * reset is necessary to recover.
4542 * Returns true if a full asic reset is required, false if not.
4543 */
4544static bool_Bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4545{
4546 int i;
4547
4548 if (amdgpu_asic_need_full_reset(adev)(adev)->asic_funcs->need_full_reset((adev)))
4549 return true1;
4550
4551 for (i = 0; i < adev->num_ip_blocks; i++) {
4552 if (!adev->ip_blocks[i].status.valid)
4553 continue;
4554 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4555 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4556 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4557 (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4558 adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4559 if (adev->ip_blocks[i].status.hang) {
4560 dev_info(adev->dev, "Some block need full reset!\n")do { } while(0);
4561 return true1;
4562 }
4563 }
4564 }
4565 return false0;
4566}
4567
4568/**
4569 * amdgpu_device_ip_soft_reset - do a soft reset
4570 *
4571 * @adev: amdgpu_device pointer
4572 *
4573 * The list of all the hardware IPs that make up the asic is walked and the
4574 * soft_reset callbacks are run if the block is hung. soft_reset handles any
4575 * IP specific hardware or software state changes that are necessary to soft
4576 * reset the IP.
4577 * Returns 0 on success, negative error code on failure.
4578 */
4579static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4580{
4581 int i, r = 0;
4582
4583 for (i = 0; i < adev->num_ip_blocks; i++) {
4584 if (!adev->ip_blocks[i].status.valid)
4585 continue;
4586 if (adev->ip_blocks[i].status.hang &&
4587 adev->ip_blocks[i].version->funcs->soft_reset) {
4588 r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4589 if (r)
4590 return r;
4591 }
4592 }
4593
4594 return 0;
4595}
4596
4597/**
4598 * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4599 *
4600 * @adev: amdgpu_device pointer
4601 *
4602 * The list of all the hardware IPs that make up the asic is walked and the
4603 * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
4604 * handles any IP specific hardware or software state changes that are
4605 * necessary after the IP has been soft reset.
4606 * Returns 0 on success, negative error code on failure.
4607 */
4608static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4609{
4610 int i, r = 0;
4611
4612 for (i = 0; i < adev->num_ip_blocks; i++) {
4613 if (!adev->ip_blocks[i].status.valid)
4614 continue;
4615 if (adev->ip_blocks[i].status.hang &&
4616 adev->ip_blocks[i].version->funcs->post_soft_reset)
4617 r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4618 if (r)
4619 return r;
4620 }
4621
4622 return 0;
4623}
4624
4625/**
4626 * amdgpu_device_recover_vram - Recover some VRAM contents
4627 *
4628 * @adev: amdgpu_device pointer
4629 *
4630 * Restores the contents of VRAM buffers from the shadows in GTT. Used to
4631 * restore things like GPUVM page tables after a GPU reset where
4632 * the contents of VRAM might be lost.
4633 *
4634 * Returns:
4635 * 0 on success, negative error code on failure.
4636 */
4637static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4638{
4639 struct dma_fence *fence = NULL((void *)0), *next = NULL((void *)0);
4640 struct amdgpu_bo *shadow;
4641 struct amdgpu_bo_vm *vmbo;
4642 long r = 1, tmo;
4643
4644 if (amdgpu_sriov_runtime(adev)((adev)->virt.caps & (1 << 4)))
4645 tmo = msecs_to_jiffies(8000)(((uint64_t)(8000)) * hz / 1000);
4646 else
4647 tmo = msecs_to_jiffies(100)(((uint64_t)(100)) * hz / 1000);
4648
4649 dev_info(adev->dev, "recover vram bo from shadow start\n")do { } while(0);
4650 mutex_lock(&adev->shadow_list_lock)rw_enter_write(&adev->shadow_list_lock);
4651 list_for_each_entry(vmbo, &adev->shadow_list, shadow_list)for (vmbo = ({ const __typeof( ((__typeof(*vmbo) *)0)->shadow_list
) *__mptr = ((&adev->shadow_list)->next); (__typeof
(*vmbo) *)( (char *)__mptr - __builtin_offsetof(__typeof(*vmbo
), shadow_list) );}); &vmbo->shadow_list != (&adev
->shadow_list); vmbo = ({ const __typeof( ((__typeof(*vmbo
) *)0)->shadow_list ) *__mptr = (vmbo->shadow_list.next
); (__typeof(*vmbo) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*vmbo), shadow_list) );}))
{
4652 /* If vm is compute context or adev is APU, shadow will be NULL */
4653 if (!vmbo->shadow)
4654 continue;
4655 shadow = vmbo->shadow;
4656
4657 /* No need to recover an evicted BO */
4658 if (shadow->tbo.resource->mem_type != TTM_PL_TT1 ||
4659 shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET0x7fffffffffffffffL ||
4660 shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM2)
4661 continue;
4662
4663 r = amdgpu_bo_restore_shadow(shadow, &next);
4664 if (r)
4665 break;
4666
4667 if (fence) {
4668 tmo = dma_fence_wait_timeout(fence, false0, tmo);
4669 dma_fence_put(fence);
4670 fence = next;
4671 if (tmo == 0) {
4672 r = -ETIMEDOUT60;
4673 break;
4674 } else if (tmo < 0) {
4675 r = tmo;
4676 break;
4677 }
4678 } else {
4679 fence = next;
4680 }
4681 }
4682 mutex_unlock(&adev->shadow_list_lock)rw_exit_write(&adev->shadow_list_lock);
4683
4684 if (fence)
4685 tmo = dma_fence_wait_timeout(fence, false0, tmo);
4686 dma_fence_put(fence);
4687
4688 if (r < 0 || tmo <= 0) {
4689 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo)printf("drm:pid%d:%s *ERROR* " "recover vram bo from shadow failed, r is %ld, tmo is %ld\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r, tmo
)
;
4690 return -EIO5;
4691 }
4692
4693 dev_info(adev->dev, "recover vram bo from shadow done\n")do { } while(0);
4694 return 0;
4695}
4696
4697
4698/**
4699 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4700 *
4701 * @adev: amdgpu_device pointer
4702 * @from_hypervisor: request from hypervisor
4703 *
4704 * do VF FLR and reinitialize Asic
4705 * return 0 means succeeded otherwise failed
4706 */
4707static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4708 bool_Bool from_hypervisor)
4709{
4710 int r;
4711 struct amdgpu_hive_info *hive = NULL((void *)0);
4712 int retry_limit = 0;
4713
4714retry:
4715 amdgpu_amdkfd_pre_reset(adev);
4716
4717 if (from_hypervisor)
4718 r = amdgpu_virt_request_full_gpu(adev, true1);
4719 else
4720 r = amdgpu_virt_reset_gpu(adev);
4721 if (r)
4722 return r;
4723
4724 /* Resume IP prior to SMC */
4725 r = amdgpu_device_ip_reinit_early_sriov(adev);
4726 if (r)
4727 goto error;
4728
4729 amdgpu_virt_init_data_exchange(adev);
4730
4731 r = amdgpu_device_fw_loading(adev);
4732 if (r)
4733 return r;
4734
4735 /* now we are okay to resume SMC/CP/SDMA */
4736 r = amdgpu_device_ip_reinit_late_sriov(adev);
4737 if (r)
4738 goto error;
4739
4740 hive = amdgpu_get_xgmi_hive(adev);
4741 /* Update PSP FW topology after reset */
4742 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4743 r = amdgpu_xgmi_update_topology(hive, adev);
4744
4745 if (hive)
4746 amdgpu_put_xgmi_hive(hive);
4747
4748 if (!r) {
4749 amdgpu_irq_gpu_reset_resume_helper(adev);
4750 r = amdgpu_ib_ring_tests(adev);
4751
4752 amdgpu_amdkfd_post_reset(adev);
4753 }
4754
4755error:
4756 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4757 amdgpu_inc_vram_lost(adev)__sync_fetch_and_add(&((adev)->vram_lost_counter), 1);;
4758 r = amdgpu_device_recover_vram(adev);
4759 }
4760 amdgpu_virt_release_full_gpu(adev, true1);
4761
4762 if (AMDGPU_RETRY_SRIOV_RESET(r)((r) == -16 || (r) == -60 || (r) == -22)) {
4763 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT2) {
4764 retry_limit++;
4765 goto retry;
4766 } else
4767 DRM_ERROR("GPU reset retry is beyond the retry limit\n")__drm_err("GPU reset retry is beyond the retry limit\n");
4768 }
4769
4770 return r;
4771}
4772
4773/**
4774 * amdgpu_device_has_job_running - check if there is any job in mirror list
4775 *
4776 * @adev: amdgpu_device pointer
4777 *
4778 * check if there is any job in mirror list
4779 */
4780bool_Bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4781{
4782 int i;
4783 struct drm_sched_job *job;
4784
4785 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
4786 struct amdgpu_ring *ring = adev->rings[i];
4787
4788 if (!ring || !ring->sched.thread)
4789 continue;
4790
4791 spin_lock(&ring->sched.job_list_lock)mtx_enter(&ring->sched.job_list_lock);
4792 job = list_first_entry_or_null(&ring->sched.pending_list,(list_empty(&ring->sched.pending_list) ? ((void *)0) :
({ const __typeof( ((struct drm_sched_job *)0)->list ) *__mptr
= ((&ring->sched.pending_list)->next); (struct drm_sched_job
*)( (char *)__mptr - __builtin_offsetof(struct drm_sched_job
, list) );}))
4793 struct drm_sched_job, list)(list_empty(&ring->sched.pending_list) ? ((void *)0) :
({ const __typeof( ((struct drm_sched_job *)0)->list ) *__mptr
= ((&ring->sched.pending_list)->next); (struct drm_sched_job
*)( (char *)__mptr - __builtin_offsetof(struct drm_sched_job
, list) );}))
;
4794 spin_unlock(&ring->sched.job_list_lock)mtx_leave(&ring->sched.job_list_lock);
4795 if (job)
4796 return true1;
4797 }
4798 return false0;
4799}
4800
4801/**
4802 * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4803 *
4804 * @adev: amdgpu_device pointer
4805 *
4806 * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4807 * a hung GPU.
4808 */
4809bool_Bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4810{
4811
4812 if (amdgpu_gpu_recovery == 0)
4813 goto disabled;
4814
4815 if (!amdgpu_device_ip_check_soft_reset(adev)) {
4816 dev_info(adev->dev,"Timeout, but no hardware hang detected.\n")do { } while(0);
4817 return false0;
4818 }
4819
4820 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
4821 return true1;
4822
4823 if (amdgpu_gpu_recovery == -1) {
4824 switch (adev->asic_type) {
4825#ifdef CONFIG_DRM_AMDGPU_SI
4826 case CHIP_VERDE:
4827 case CHIP_TAHITI:
4828 case CHIP_PITCAIRN:
4829 case CHIP_OLAND:
4830 case CHIP_HAINAN:
4831#endif
4832#ifdef CONFIG_DRM_AMDGPU_CIK
4833 case CHIP_KAVERI:
4834 case CHIP_KABINI:
4835 case CHIP_MULLINS:
4836#endif
4837 case CHIP_CARRIZO:
4838 case CHIP_STONEY:
4839 case CHIP_CYAN_SKILLFISH:
4840 goto disabled;
4841 default:
4842 break;
4843 }
4844 }
4845
4846 return true1;
4847
4848disabled:
4849 dev_info(adev->dev, "GPU recovery disabled.\n")do { } while(0);
4850 return false0;
4851}
4852
4853int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4854{
4855 u32 i;
4856 int ret = 0;
4857
4858 amdgpu_atombios_scratch_regs_engine_hung(adev, true1);
4859
4860 dev_info(adev->dev, "GPU mode1 reset\n")do { } while(0);
4861
4862 /* disable BM */
4863 pci_clear_master(adev->pdev);
4864
4865 amdgpu_device_cache_pci_state(adev->pdev);
4866
4867 if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4868 dev_info(adev->dev, "GPU smu mode1 reset\n")do { } while(0);
4869 ret = amdgpu_dpm_mode1_reset(adev);
4870 } else {
4871 dev_info(adev->dev, "GPU psp mode1 reset\n")do { } while(0);
4872 ret = psp_gpu_reset(adev);
4873 }
4874
4875 if (ret)
4876 dev_err(adev->dev, "GPU mode1 reset failed\n")printf("drm:pid%d:%s *ERROR* " "GPU mode1 reset failed\n", ({
struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4877
4878 amdgpu_device_load_pci_state(adev->pdev);
4879
4880 /* wait for asic to come out of reset */
4881 for (i = 0; i < adev->usec_timeout; i++) {
4882 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4883
4884 if (memsize != 0xffffffff)
4885 break;
4886 udelay(1);
4887 }
4888
4889 amdgpu_atombios_scratch_regs_engine_hung(adev, false0);
4890 return ret;
4891}
4892
4893int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4894 struct amdgpu_reset_context *reset_context)
4895{
4896 int i, r = 0;
4897 struct amdgpu_job *job = NULL((void *)0);
4898 bool_Bool need_full_reset =
4899 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4900
4901 if (reset_context->reset_req_dev == adev)
4902 job = reset_context->job;
4903
4904 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4905 /* stop the data exchange thread */
4906 amdgpu_virt_fini_data_exchange(adev);
4907 }
4908
4909 amdgpu_fence_driver_isr_toggle(adev, true1);
4910
4911 /* block all schedulers and reset given job's ring */
4912 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
4913 struct amdgpu_ring *ring = adev->rings[i];
4914
4915 if (!ring || !ring->sched.thread)
4916 continue;
4917
4918 /*clear job fence from fence drv to avoid force_completion
4919 *leave NULL and vm flush fence in fence drv */
4920 amdgpu_fence_driver_clear_job_fences(ring);
4921
4922 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4923 amdgpu_fence_driver_force_completion(ring);
4924 }
4925
4926 amdgpu_fence_driver_isr_toggle(adev, false0);
4927
4928 if (job && job->vm)
4929 drm_sched_increase_karma(&job->base);
4930
4931 r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4932 /* If reset handler not implemented, continue; otherwise return */
4933 if (r == -ENOSYS78)
4934 r = 0;
4935 else
4936 return r;
4937
4938 /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4939 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
4940
4941 if (!need_full_reset)
4942 need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4943
4944 if (!need_full_reset && amdgpu_gpu_recovery) {
4945 amdgpu_device_ip_pre_soft_reset(adev);
4946 r = amdgpu_device_ip_soft_reset(adev);
4947 amdgpu_device_ip_post_soft_reset(adev);
4948 if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4949 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n")do { } while(0);
4950 need_full_reset = true1;
4951 }
4952 }
4953
4954 if (need_full_reset)
4955 r = amdgpu_device_ip_suspend(adev);
4956 if (need_full_reset)
4957 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4958 else
4959 clear_bit(AMDGPU_NEED_FULL_RESET,
4960 &reset_context->flags);
4961 }
4962
4963 return r;
4964}
4965
4966static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4967{
4968 int i;
4969
4970 lockdep_assert_held(&adev->reset_domain->sem)do { (void)(&adev->reset_domain->sem); } while(0);
4971
4972 for (i = 0; i < adev->num_regs; i++) {
4973 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i])amdgpu_device_rreg(adev, (adev->reset_dump_reg_list[i]), 0
)
;
4974 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4975 adev->reset_dump_reg_value[i]);
4976 }
4977
4978 return 0;
4979}
4980
4981#ifdef CONFIG_DEV_COREDUMP
4982static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4983 size_t count, void *data, size_t datalen)
4984{
4985 struct drm_printer p;
4986 struct amdgpu_device *adev = data;
4987 struct drm_print_iterator iter;
4988 int i;
4989
4990 iter.data = buffer;
4991 iter.offset = 0;
4992 iter.start = offset;
4993 iter.remain = count;
4994
4995 p = drm_coredump_printer(&iter);
4996
4997 drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4998 drm_printf(&p, "kernel: " UTS_RELEASE"" "\n");
4999 drm_printf(&p, "module: " KBUILD_MODNAME "\n");
5000 drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
5001 if (adev->reset_task_info.pid)
5002 drm_printf(&p, "process_name: %s PID: %d\n",
5003 adev->reset_task_info.process_name,
5004 adev->reset_task_info.pid);
5005
5006 if (adev->reset_vram_lost)
5007 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
5008 if (adev->num_regs) {
5009 drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
5010
5011 for (i = 0; i < adev->num_regs; i++)
5012 drm_printf(&p, "0x%08x: 0x%08x\n",
5013 adev->reset_dump_reg_list[i],
5014 adev->reset_dump_reg_value[i]);
5015 }
5016
5017 return count - iter.remain;
5018}
5019
5020static void amdgpu_devcoredump_free(void *data)
5021{
5022}
5023
5024static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
5025{
5026 struct drm_device *dev = adev_to_drm(adev);
5027
5028 ktime_get_ts64(&adev->reset_time);
5029 dev_coredumpm(dev->dev, THIS_MODULE((void *)0), adev, 0, GFP_KERNEL(0x0001 | 0x0004),
5030 amdgpu_devcoredump_read, amdgpu_devcoredump_free);
5031}
5032#endif
5033
5034int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5035 struct amdgpu_reset_context *reset_context)
5036{
5037 struct amdgpu_device *tmp_adev = NULL((void *)0);
5038 bool_Bool need_full_reset, skip_hw_reset, vram_lost = false0;
5039 int r = 0;
5040 bool_Bool gpu_reset_for_dev_remove = 0;
5041
5042 /* Try reset handler method first */
5043 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,({ const __typeof( ((struct amdgpu_device *)0)->reset_list
) *__mptr = ((device_list_handle)->next); (struct amdgpu_device
*)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device
, reset_list) );})
5044 reset_list)({ const __typeof( ((struct amdgpu_device *)0)->reset_list
) *__mptr = ((device_list_handle)->next); (struct amdgpu_device
*)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device
, reset_list) );})
;
5045 amdgpu_reset_reg_dumps(tmp_adev);
5046
5047 reset_context->reset_device_list = device_list_handle;
5048 r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5049 /* If reset handler not implemented, continue; otherwise return */
5050 if (r == -ENOSYS78)
5051 r = 0;
5052 else
5053 return r;
5054
5055 /* Reset handler not implemented, use the default method */
5056 need_full_reset =
5057 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5058 skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5059
5060 gpu_reset_for_dev_remove =
5061 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5062 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5063
5064 /*
5065 * ASIC reset has to be done on all XGMI hive nodes ASAP
5066 * to allow proper links negotiation in FW (within 1 sec)
5067 */
5068 if (!skip_hw_reset && need_full_reset) {
5069 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5070 /* For XGMI run all resets in parallel to speed up the process */
5071 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5072 tmp_adev->gmc.xgmi.pending_reset = false0;
5073 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5074 r = -EALREADY37;
5075 } else
5076 r = amdgpu_asic_reset(tmp_adev)(tmp_adev)->asic_funcs->reset((tmp_adev));
5077
5078 if (r) {
5079 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",printf("drm:pid%d:%s *ERROR* " "ASIC reset failed with error, %d for drm dev, %s"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r, adev_to_drm
(tmp_adev)->unique)
5080 r, adev_to_drm(tmp_adev)->unique)printf("drm:pid%d:%s *ERROR* " "ASIC reset failed with error, %d for drm dev, %s"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r, adev_to_drm
(tmp_adev)->unique)
;
5081 break;
5082 }
5083 }
5084
5085 /* For XGMI wait for all resets to complete before proceed */
5086 if (!r) {
5087 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5088 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5089 flush_work(&tmp_adev->xgmi_reset_work);
5090 r = tmp_adev->asic_reset_res;
5091 if (r)
5092 break;
5093 }
5094 }
5095 }
5096 }
5097
5098 if (!r && amdgpu_ras_intr_triggered()) {
5099 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5100 if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
5101 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
5102 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
5103 }
5104
5105 amdgpu_ras_intr_cleared();
5106 }
5107
5108 /* Since the mode1 reset affects base ip blocks, the
5109 * phase1 ip blocks need to be resumed. Otherwise there
5110 * will be a BIOS signature error and the psp bootloader
5111 * can't load kdb on the next amdgpu install.
5112 */
5113 if (gpu_reset_for_dev_remove) {
5114 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
5115 amdgpu_device_ip_resume_phase1(tmp_adev);
5116
5117 goto end;
5118 }
5119
5120 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5121 if (need_full_reset) {
5122 /* post card */
5123 r = amdgpu_device_asic_init(tmp_adev);
5124 if (r) {
5125 dev_warn(tmp_adev->dev, "asic atom init failed!")printf("drm:pid%d:%s *WARNING* " "asic atom init failed!", ({
struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
5126 } else {
5127 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n")do { } while(0);
5128 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
5129 if (r)
5130 goto out;
5131
5132 r = amdgpu_device_ip_resume_phase1(tmp_adev);
5133 if (r)
5134 goto out;
5135
5136 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5137#ifdef CONFIG_DEV_COREDUMP
5138 tmp_adev->reset_vram_lost = vram_lost;
5139 memset(&tmp_adev->reset_task_info, 0,__builtin_memset((&tmp_adev->reset_task_info), (0), (sizeof
(tmp_adev->reset_task_info)))
5140 sizeof(tmp_adev->reset_task_info))__builtin_memset((&tmp_adev->reset_task_info), (0), (sizeof
(tmp_adev->reset_task_info)))
;
5141 if (reset_context->job && reset_context->job->vm)
5142 tmp_adev->reset_task_info =
5143 reset_context->job->vm->task_info;
5144 amdgpu_reset_capture_coredumpm(tmp_adev);
5145#endif
5146 if (vram_lost) {
5147 DRM_INFO("VRAM is lost due to GPU reset!\n")printk("\0016" "[" "drm" "] " "VRAM is lost due to GPU reset!\n"
)
;
5148 amdgpu_inc_vram_lost(tmp_adev)__sync_fetch_and_add(&((tmp_adev)->vram_lost_counter),
1);
;
5149 }
5150
5151 r = amdgpu_device_fw_loading(tmp_adev);
5152 if (r)
5153 return r;
5154
5155 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5156 if (r)
5157 goto out;
5158
5159 if (vram_lost)
5160 amdgpu_device_fill_reset_magic(tmp_adev);
5161
5162 /*
5163 * Add this ASIC as tracked as reset was already
5164 * complete successfully.
5165 */
5166 amdgpu_register_gpu_instance(tmp_adev);
5167
5168 if (!reset_context->hive &&
5169 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5170 amdgpu_xgmi_add_device(tmp_adev);
5171
5172 r = amdgpu_device_ip_late_init(tmp_adev);
5173 if (r)
5174 goto out;
5175
5176 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false0);
5177
5178 /*
5179 * The GPU enters bad state once faulty pages
5180 * by ECC has reached the threshold, and ras
5181 * recovery is scheduled next. So add one check
5182 * here to break recovery if it indeed exceeds
5183 * bad page threshold, and remind user to
5184 * retire this GPU or setting one bigger
5185 * bad_page_threshold value to fix this once
5186 * probing driver again.
5187 */
5188 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5189 /* must succeed. */
5190 amdgpu_ras_resume(tmp_adev);
5191 } else {
5192 r = -EINVAL22;
5193 goto out;
5194 }
5195
5196 /* Update PSP FW topology after reset */
5197 if (reset_context->hive &&
5198 tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5199 r = amdgpu_xgmi_update_topology(
5200 reset_context->hive, tmp_adev);
5201 }
5202 }
5203
5204out:
5205 if (!r) {
5206 amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5207 r = amdgpu_ib_ring_tests(tmp_adev);
5208 if (r) {
5209 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r)printf("drm:pid%d:%s *ERROR* " "ib ring test failed (%d).\n",
({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r)
;
5210 need_full_reset = true1;
5211 r = -EAGAIN35;
5212 goto end;
5213 }
5214 }
5215
5216 if (!r)
5217 r = amdgpu_device_recover_vram(tmp_adev);
5218 else
5219 tmp_adev->asic_reset_res = r;
5220 }
5221
5222end:
5223 if (need_full_reset)
5224 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5225 else
5226 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5227 return r;
5228}
5229
5230static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5231{
5232
5233 switch (amdgpu_asic_reset_method(adev)(adev)->asic_funcs->reset_method((adev))) {
5234 case AMD_RESET_METHOD_MODE1:
5235 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5236 break;
5237 case AMD_RESET_METHOD_MODE2:
5238 adev->mp1_state = PP_MP1_STATE_RESET;
5239 break;
5240 default:
5241 adev->mp1_state = PP_MP1_STATE_NONE;
5242 break;
5243 }
5244
5245 pci_dev_put(p);
5246}
5247
5248static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5249{
5250 amdgpu_vf_error_trans_all(adev);
5251 adev->mp1_state = PP_MP1_STATE_NONE;
5252}
5253
5254static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5255{
5256 STUB()do { printf("%s: stub\n", __func__); } while(0);
5257#ifdef notyet
5258 struct pci_dev *p = NULL((void *)0);
5259
5260 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5261 adev->pdev->bus->number, 1);
5262 if (p) {
5263 pm_runtime_enable(&(p->dev));
5264 pm_runtime_resume(&(p->dev));
5265 }
5266#endif
5267}
5268
5269static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5270{
5271 enum amd_reset_method reset_method;
5272 struct pci_dev *p = NULL((void *)0);
5273 u64 expires;
5274
5275 /*
5276 * For now, only BACO and mode1 reset are confirmed
5277 * to suffer the audio issue without proper suspended.
5278 */
5279 reset_method = amdgpu_asic_reset_method(adev)(adev)->asic_funcs->reset_method((adev));
5280 if ((reset_method != AMD_RESET_METHOD_BACO) &&
5281 (reset_method != AMD_RESET_METHOD_MODE1))
5282 return -EINVAL22;
5283
5284 STUB()do { printf("%s: stub\n", __func__); } while(0);
5285 return -ENOSYS78;
5286#ifdef notyet
5287
5288 p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5289 adev->pdev->bus->number, 1);
5290 if (!p)
5291 return -ENODEV19;
5292
5293 expires = pm_runtime_autosuspend_expiration(&(p->dev));
5294 if (!expires)
5295 /*
5296 * If we cannot get the audio device autosuspend delay,
5297 * a fixed 4S interval will be used. Considering 3S is
5298 * the audio controller default autosuspend delay setting.
5299 * 4S used here is guaranteed to cover that.
5300 */
5301 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC1000000000L * 4ULL;
5302
5303 while (!pm_runtime_status_suspended(&(p->dev))) {
5304 if (!pm_runtime_suspend(&(p->dev)))
5305 break;
5306
5307 if (expires < ktime_get_mono_fast_ns()) {
5308 dev_warn(adev->dev, "failed to suspend display audio\n")printf("drm:pid%d:%s *WARNING* " "failed to suspend display audio\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
5309 pci_dev_put(p);
5310 /* TODO: abort the succeeding gpu reset? */
5311 return -ETIMEDOUT60;
5312 }
5313 }
5314
5315 pm_runtime_disable(&(p->dev));
5316
5317 pci_dev_put(p);
5318 return 0;
5319#endif
5320}
5321
5322static void amdgpu_device_recheck_guilty_jobs(
5323 struct amdgpu_device *adev, struct list_head *device_list_handle,
5324 struct amdgpu_reset_context *reset_context)
5325{
5326 int i, r = 0;
5327
5328 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
5329 struct amdgpu_ring *ring = adev->rings[i];
5330 int ret = 0;
5331 struct drm_sched_job *s_job;
5332
5333 if (!ring || !ring->sched.thread)
5334 continue;
5335
5336 s_job = list_first_entry_or_null(&ring->sched.pending_list,(list_empty(&ring->sched.pending_list) ? ((void *)0) :
({ const __typeof( ((struct drm_sched_job *)0)->list ) *__mptr
= ((&ring->sched.pending_list)->next); (struct drm_sched_job
*)( (char *)__mptr - __builtin_offsetof(struct drm_sched_job
, list) );}))
5337 struct drm_sched_job, list)(list_empty(&ring->sched.pending_list) ? ((void *)0) :
({ const __typeof( ((struct drm_sched_job *)0)->list ) *__mptr
= ((&ring->sched.pending_list)->next); (struct drm_sched_job
*)( (char *)__mptr - __builtin_offsetof(struct drm_sched_job
, list) );}))
;
5338 if (s_job == NULL((void *)0))
5339 continue;
5340
5341 /* clear job's guilty and depend the folowing step to decide the real one */
5342 drm_sched_reset_karma(s_job);
5343 drm_sched_resubmit_jobs_ext(&ring->sched, 1);
5344
5345 if (!s_job->s_fence->parent) {
5346 DRM_WARN("Failed to get a HW fence for job!")printk("\0014" "[" "drm" "] " "Failed to get a HW fence for job!"
)
;
5347 continue;
5348 }
5349
5350 ret = dma_fence_wait_timeout(s_job->s_fence->parent, false0, ring->sched.timeout);
5351 if (ret == 0) { /* timeout */
5352 DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",__drm_err("Found the real bad job! ring:%s, job_id:%llx\n", ring
->sched.name, s_job->id)
5353 ring->sched.name, s_job->id)__drm_err("Found the real bad job! ring:%s, job_id:%llx\n", ring
->sched.name, s_job->id)
;
5354
5355
5356 amdgpu_fence_driver_isr_toggle(adev, true1);
5357
5358 /* Clear this failed job from fence array */
5359 amdgpu_fence_driver_clear_job_fences(ring);
5360
5361 amdgpu_fence_driver_isr_toggle(adev, false0);
5362
5363 /* Since the job won't signal and we go for
5364 * another resubmit drop this parent pointer
5365 */
5366 dma_fence_put(s_job->s_fence->parent);
5367 s_job->s_fence->parent = NULL((void *)0);
5368
5369 /* set guilty */
5370 drm_sched_increase_karma(s_job);
5371 amdgpu_reset_prepare_hwcontext(adev, reset_context);
5372retry:
5373 /* do hw reset */
5374 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
5375 amdgpu_virt_fini_data_exchange(adev);
5376 r = amdgpu_device_reset_sriov(adev, false0);
5377 if (r)
5378 adev->asic_reset_res = r;
5379 } else {
5380 clear_bit(AMDGPU_SKIP_HW_RESET,
5381 &reset_context->flags);
5382 r = amdgpu_do_asic_reset(device_list_handle,
5383 reset_context);
5384 if (r && r == -EAGAIN35)
5385 goto retry;
5386 }
5387
5388 /*
5389 * add reset counter so that the following
5390 * resubmitted job could flush vmid
5391 */
5392 atomic_inc(&adev->gpu_reset_counter)__sync_fetch_and_add(&adev->gpu_reset_counter, 1);
5393 continue;
5394 }
5395
5396 /* got the hw fence, signal finished fence */
5397 atomic_dec(ring->sched.score)__sync_fetch_and_sub(ring->sched.score, 1);
5398 dma_fence_get(&s_job->s_fence->finished);
5399 dma_fence_signal(&s_job->s_fence->finished);
5400 dma_fence_put(&s_job->s_fence->finished);
5401
5402 /* remove node from list and free the job */
5403 spin_lock(&ring->sched.job_list_lock)mtx_enter(&ring->sched.job_list_lock);
5404 list_del_init(&s_job->list);
5405 spin_unlock(&ring->sched.job_list_lock)mtx_leave(&ring->sched.job_list_lock);
5406 ring->sched.ops->free_job(s_job);
5407 }
5408}
5409
5410static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5411{
5412 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5413
5414#if defined(CONFIG_DEBUG_FS)
5415 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
5416 cancel_work(&adev->reset_work);
5417#endif
5418
5419 if (adev->kfd.dev)
5420 cancel_work(&adev->kfd.reset_work);
5421
5422 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
5423 cancel_work(&adev->virt.flr_work);
5424
5425 if (con && adev->ras_enabled)
5426 cancel_work(&con->recovery_work);
5427
5428}
5429
5430
5431/**
5432 * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5433 *
5434 * @adev: amdgpu_device pointer
5435 * @job: which job trigger hang
5436 *
5437 * Attempt to reset the GPU if it has hung (all asics).
5438 * Attempt to do soft-reset or full-reset and reinitialize Asic
5439 * Returns 0 for success or an error on failure.
5440 */
5441
5442int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5443 struct amdgpu_job *job,
5444 struct amdgpu_reset_context *reset_context)
5445{
5446 struct list_head device_list, *device_list_handle = NULL((void *)0);
5447 bool_Bool job_signaled = false0;
5448 struct amdgpu_hive_info *hive = NULL((void *)0);
5449 struct amdgpu_device *tmp_adev = NULL((void *)0);
5450 int i, r = 0;
5451 bool_Bool need_emergency_restart = false0;
5452 bool_Bool audio_suspended = false0;
5453 int tmp_vram_lost_counter;
5454 bool_Bool gpu_reset_for_dev_remove = false0;
5455
5456 gpu_reset_for_dev_remove =
5457 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5458 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5459
5460 /*
5461 * Special case: RAS triggered and full reset isn't supported
5462 */
5463 need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5464
5465 /*
5466 * Flush RAM to disk so that after reboot
5467 * the user can read log and see why the system rebooted.
5468 */
5469 if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
1
Assuming 'need_emergency_restart' is false
5470 amdgpu_ras_get_context(adev)->reboot) {
5471 DRM_WARN("Emergency reboot.")printk("\0014" "[" "drm" "] " "Emergency reboot.");
5472
5473#ifdef notyet
5474 ksys_sync_helper();
5475 emergency_restart();
5476#else
5477 panic("emergency_restart");
5478#endif
5479 }
5480
5481 dev_info(adev->dev, "GPU %s begin!\n",do { } while(0)
2
Loop condition is false. Exiting loop
5482 need_emergency_restart ? "jobs stop":"reset")do { } while(0);
5483
5484 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)))
3
Assuming the condition is true
4
Taking true branch
5485 hive = amdgpu_get_xgmi_hive(adev);
5
Value assigned to 'hive'
5486 if (hive)
6
Assuming 'hive' is null
7
Taking false branch
5487 mutex_lock(&hive->hive_lock)rw_enter_write(&hive->hive_lock);
5488
5489 reset_context->job = job;
5490 reset_context->hive = hive;
5491 /*
5492 * Build list of devices to reset.
5493 * In case we are in XGMI hive mode, resort the device list
5494 * to put adev in the 1st position.
5495 */
5496 INIT_LIST_HEAD(&device_list);
5497 if (!amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
8
Assuming the condition is true
9
Assuming field 'num_physical_nodes' is > 1
10
Taking true branch
5498 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->gmc.xgmi.head ) *__mptr = ((&hive->device_list)->
next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), gmc.xgmi.head) );}); &tmp_adev->
gmc.xgmi.head != (&hive->device_list); tmp_adev = ({ const
__typeof( ((__typeof(*tmp_adev) *)0)->gmc.xgmi.head ) *__mptr
= (tmp_adev->gmc.xgmi.head.next); (__typeof(*tmp_adev) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*tmp_adev), gmc
.xgmi.head) );}))
{
11
Access to field 'next' results in a dereference of a null pointer
5499 list_add_tail(&tmp_adev->reset_list, &device_list);
5500 if (gpu_reset_for_dev_remove && adev->shutdown)
5501 tmp_adev->shutdown = true1;
5502 }
5503 if (!list_is_first(&adev->reset_list, &device_list))
5504 list_rotate_to_front(&adev->reset_list, &device_list);
5505 device_list_handle = &device_list;
5506 } else {
5507 list_add_tail(&adev->reset_list, &device_list);
5508 device_list_handle = &device_list;
5509 }
5510
5511 /* We need to lock reset domain only once both for XGMI and single device */
5512 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,({ const __typeof( ((struct amdgpu_device *)0)->reset_list
) *__mptr = ((device_list_handle)->next); (struct amdgpu_device
*)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device
, reset_list) );})
5513 reset_list)({ const __typeof( ((struct amdgpu_device *)0)->reset_list
) *__mptr = ((device_list_handle)->next); (struct amdgpu_device
*)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device
, reset_list) );})
;
5514 amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5515
5516 /* block all schedulers and reset given job's ring */
5517 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5518
5519 amdgpu_device_set_mp1_state(tmp_adev);
5520
5521 /*
5522 * Try to put the audio codec into suspend state
5523 * before gpu reset started.
5524 *
5525 * Due to the power domain of the graphics device
5526 * is shared with AZ power domain. Without this,
5527 * we may change the audio hardware from behind
5528 * the audio driver's back. That will trigger
5529 * some audio codec errors.
5530 */
5531 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5532 audio_suspended = true1;
5533
5534 amdgpu_ras_set_error_query_ready(tmp_adev, false0);
5535
5536 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5537
5538 if (!amdgpu_sriov_vf(tmp_adev)((tmp_adev)->virt.caps & (1 << 2)))
5539 amdgpu_amdkfd_pre_reset(tmp_adev);
5540
5541 /*
5542 * Mark these ASICs to be reseted as untracked first
5543 * And add them back after reset completed
5544 */
5545 amdgpu_unregister_gpu_instance(tmp_adev);
5546
5547 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true1);
5548
5549 /* disable ras on ALL IPs */
5550 if (!need_emergency_restart &&
5551 amdgpu_device_ip_need_full_reset(tmp_adev))
5552 amdgpu_ras_suspend(tmp_adev);
5553
5554 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
5555 struct amdgpu_ring *ring = tmp_adev->rings[i];
5556
5557 if (!ring || !ring->sched.thread)
5558 continue;
5559
5560 drm_sched_stop(&ring->sched, job ? &job->base : NULL((void *)0));
5561
5562 if (need_emergency_restart)
5563 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5564 }
5565 atomic_inc(&tmp_adev->gpu_reset_counter)__sync_fetch_and_add(&tmp_adev->gpu_reset_counter, 1);
5566 }
5567
5568 if (need_emergency_restart)
5569 goto skip_sched_resume;
5570
5571 /*
5572 * Must check guilty signal here since after this point all old
5573 * HW fences are force signaled.
5574 *
5575 * job->base holds a reference to parent fence
5576 */
5577 if (job && dma_fence_is_signaled(&job->hw_fence)) {
5578 job_signaled = true1;
5579 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset")do { } while(0);
5580 goto skip_hw_reset;
5581 }
5582
5583retry: /* Rest of adevs pre asic reset from XGMI hive. */
5584 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5585 if (gpu_reset_for_dev_remove) {
5586 /* Workaroud for ASICs need to disable SMC first */
5587 amdgpu_device_smu_fini_early(tmp_adev);
5588 }
5589 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5590 /*TODO Should we stop ?*/
5591 if (r) {
5592 dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",printf("drm:pid%d:%s *ERROR* " "GPU pre asic reset failed with err, %d for drm dev, %s "
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r, adev_to_drm
(tmp_adev)->unique)
5593 r, adev_to_drm(tmp_adev)->unique)printf("drm:pid%d:%s *ERROR* " "GPU pre asic reset failed with err, %d for drm dev, %s "
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , r, adev_to_drm
(tmp_adev)->unique)
;
5594 tmp_adev->asic_reset_res = r;
5595 }
5596
5597 /*
5598 * Drop all pending non scheduler resets. Scheduler resets
5599 * were already dropped during drm_sched_stop
5600 */
5601 amdgpu_device_stop_pending_resets(tmp_adev);
5602 }
5603
5604 tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter))({ typeof(*(&((adev)->vram_lost_counter))) __tmp = *(volatile
typeof(*(&((adev)->vram_lost_counter))) *)&(*(&
((adev)->vram_lost_counter))); membar_datadep_consumer(); __tmp
; })
;
5605 /* Actual ASIC resets if needed.*/
5606 /* Host driver will handle XGMI hive reset for SRIOV */
5607 if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) {
5608 r = amdgpu_device_reset_sriov(adev, job ? false0 : true1);
5609 if (r)
5610 adev->asic_reset_res = r;
5611
5612 /* Aldebaran supports ras in SRIOV, so need resume ras during reset */
5613 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)(((9) << 16) | ((4) << 8) | (2)))
5614 amdgpu_ras_resume(adev);
5615 } else {
5616 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5617 if (r && r == -EAGAIN35)
5618 goto retry;
5619
5620 if (!r && gpu_reset_for_dev_remove)
5621 goto recover_end;
5622 }
5623
5624skip_hw_reset:
5625
5626 /* Post ASIC reset for all devs .*/
5627 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5628
5629 /*
5630 * Sometimes a later bad compute job can block a good gfx job as gfx
5631 * and compute ring share internal GC HW mutually. We add an additional
5632 * guilty jobs recheck step to find the real guilty job, it synchronously
5633 * submits and pends for the first job being signaled. If it gets timeout,
5634 * we identify it as a real guilty job.
5635 */
5636 if (amdgpu_gpu_recovery == 2 &&
5637 !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)({ typeof(*(&adev->vram_lost_counter)) __tmp = *(volatile
typeof(*(&adev->vram_lost_counter)) *)&(*(&adev
->vram_lost_counter)); membar_datadep_consumer(); __tmp; }
)
))
5638 amdgpu_device_recheck_guilty_jobs(
5639 tmp_adev, device_list_handle, reset_context);
5640
5641 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
5642 struct amdgpu_ring *ring = tmp_adev->rings[i];
5643
5644 if (!ring || !ring->sched.thread)
5645 continue;
5646
5647 /* No point to resubmit jobs if we didn't HW reset*/
5648 if (!tmp_adev->asic_reset_res && !job_signaled)
5649 drm_sched_resubmit_jobs(&ring->sched);
5650
5651 drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
5652 }
5653
5654 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)(((11) << 16) | ((0) << 8) | (3)))
5655 amdgpu_mes_self_test(tmp_adev);
5656
5657 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5658 drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5659 }
5660
5661 if (tmp_adev->asic_reset_res)
5662 r = tmp_adev->asic_reset_res;
5663
5664 tmp_adev->asic_reset_res = 0;
5665
5666 if (r) {
5667 /* bad news, how to tell it to userspace ? */
5668 dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter))do { } while(0);
5669 amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5670 } else {
5671 dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter))do { } while(0);
5672 if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5673 DRM_WARN("smart shift update failed\n")printk("\0014" "[" "drm" "] " "smart shift update failed\n");
5674 }
5675 }
5676
5677skip_sched_resume:
5678 list_for_each_entry(tmp_adev, device_list_handle, reset_list)for (tmp_adev = ({ const __typeof( ((__typeof(*tmp_adev) *)0)
->reset_list ) *__mptr = ((device_list_handle)->next); (
__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tmp_adev), reset_list) );}); &tmp_adev->reset_list !=
(device_list_handle); tmp_adev = ({ const __typeof( ((__typeof
(*tmp_adev) *)0)->reset_list ) *__mptr = (tmp_adev->reset_list
.next); (__typeof(*tmp_adev) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*tmp_adev), reset_list) );}))
{
5679 /* unlock kfd: SRIOV would do it separately */
5680 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)((tmp_adev)->virt.caps & (1 << 2)))
5681 amdgpu_amdkfd_post_reset(tmp_adev);
5682
5683 /* kfd_post_reset will do nothing if kfd device is not initialized,
5684 * need to bring up kfd here if it's not be initialized before
5685 */
5686 if (!adev->kfd.init_complete)
5687 amdgpu_amdkfd_device_init(adev);
5688
5689 if (audio_suspended)
5690 amdgpu_device_resume_display_audio(tmp_adev);
5691
5692 amdgpu_device_unset_mp1_state(tmp_adev);
5693 }
5694
5695recover_end:
5696 tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,({ const __typeof( ((struct amdgpu_device *)0)->reset_list
) *__mptr = ((device_list_handle)->next); (struct amdgpu_device
*)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device
, reset_list) );})
5697 reset_list)({ const __typeof( ((struct amdgpu_device *)0)->reset_list
) *__mptr = ((device_list_handle)->next); (struct amdgpu_device
*)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device
, reset_list) );})
;
5698 amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5699
5700 if (hive) {
5701 mutex_unlock(&hive->hive_lock)rw_exit_write(&hive->hive_lock);
5702 amdgpu_put_xgmi_hive(hive);
5703 }
5704
5705 if (r)
5706 dev_info(adev->dev, "GPU reset end with ret = %d\n", r)do { } while(0);
5707
5708 atomic_set(&adev->reset_domain->reset_res, r)({ typeof(*(&adev->reset_domain->reset_res)) __tmp =
((r)); *(volatile typeof(*(&adev->reset_domain->reset_res
)) *)&(*(&adev->reset_domain->reset_res)) = __tmp
; __tmp; })
;
5709 return r;
5710}
5711
5712/**
5713 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5714 *
5715 * @adev: amdgpu_device pointer
5716 *
5717 * Fetchs and stores in the driver the PCIE capabilities (gen speed
5718 * and lanes) of the slot the device is in. Handles APUs and
5719 * virtualized environments where PCIE config space may not be available.
5720 */
5721static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5722{
5723 struct pci_dev *pdev;
5724 enum pci_bus_speed speed_cap, platform_speed_cap;
5725 enum pcie_link_width platform_link_width;
5726
5727 if (amdgpu_pcie_gen_cap)
5728 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5729
5730 if (amdgpu_pcie_lane_cap)
5731 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5732
5733 /* covers APUs as well */
5734 if (pci_is_root_bus(adev->pdev->bus)) {
5735 if (adev->pm.pcie_gen_mask == 0)
5736 adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK(0x00010000 | 0x00020000 | 0x00000001 | 0x00000002 | 0x00000004
)
;
5737 if (adev->pm.pcie_mlw_mask == 0)
5738 adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK(0x00010000 | 0x00020000 | 0x00040000 | 0x00080000 | 0x00200000
)
;
5739 return;
5740 }
5741
5742 if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5743 return;
5744
5745 pcie_bandwidth_available(adev->pdev, NULL((void *)0),
5746 &platform_speed_cap, &platform_link_width);
5747
5748 if (adev->pm.pcie_gen_mask == 0) {
5749 /* asic caps */
5750 pdev = adev->pdev;
5751 speed_cap = pcie_get_speed_cap(pdev);
5752 if (speed_cap == PCI_SPEED_UNKNOWN) {
5753 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN10x00000001 |
5754 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN20x00000002 |
5755 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN30x00000004);
5756 } else {
5757 if (speed_cap == PCIE_SPEED_32_0GT)
5758 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN10x00000001 |
5759 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN20x00000002 |
5760 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN30x00000004 |
5761 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN40x00000008 |
5762 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN50x00000010);
5763 else if (speed_cap == PCIE_SPEED_16_0GT)
5764 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN10x00000001 |
5765 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN20x00000002 |
5766 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN30x00000004 |
5767 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN40x00000008);
5768 else if (speed_cap == PCIE_SPEED_8_0GT)
5769 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN10x00000001 |
5770 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN20x00000002 |
5771 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN30x00000004);
5772 else if (speed_cap == PCIE_SPEED_5_0GT)
5773 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN10x00000001 |
5774 CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN20x00000002);
5775 else
5776 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN10x00000001;
5777 }
5778 /* platform caps */
5779 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5780 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000 |
5781 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN20x00020000);
5782 } else {
5783 if (platform_speed_cap == PCIE_SPEED_32_0GT)
5784 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000 |
5785 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN20x00020000 |
5786 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN30x00040000 |
5787 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN40x00080000 |
5788 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN50x00100000);
5789 else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5790 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000 |
5791 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN20x00020000 |
5792 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN30x00040000 |
5793 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN40x00080000);
5794 else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5795 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000 |
5796 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN20x00020000 |
5797 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN30x00040000);
5798 else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5799 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000 |
5800 CAIL_PCIE_LINK_SPEED_SUPPORT_GEN20x00020000);
5801 else
5802 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000;
5803
5804 }
5805 }
5806 if (adev->pm.pcie_mlw_mask == 0) {
5807 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5808 adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK(0x00010000 | 0x00020000 | 0x00040000 | 0x00080000 | 0x00200000
)
;
5809 } else {
5810 switch (platform_link_width) {
5811 case PCIE_LNK_X32:
5812 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X320x00400000 |
5813 CAIL_PCIE_LINK_WIDTH_SUPPORT_X160x00200000 |
5814 CAIL_PCIE_LINK_WIDTH_SUPPORT_X120x00100000 |
5815 CAIL_PCIE_LINK_WIDTH_SUPPORT_X80x00080000 |
5816 CAIL_PCIE_LINK_WIDTH_SUPPORT_X40x00040000 |
5817 CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000 |
5818 CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000);
5819 break;
5820 case PCIE_LNK_X16:
5821 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X160x00200000 |
5822 CAIL_PCIE_LINK_WIDTH_SUPPORT_X120x00100000 |
5823 CAIL_PCIE_LINK_WIDTH_SUPPORT_X80x00080000 |
5824 CAIL_PCIE_LINK_WIDTH_SUPPORT_X40x00040000 |
5825 CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000 |
5826 CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000);
5827 break;
5828 case PCIE_LNK_X12:
5829 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X120x00100000 |
5830 CAIL_PCIE_LINK_WIDTH_SUPPORT_X80x00080000 |
5831 CAIL_PCIE_LINK_WIDTH_SUPPORT_X40x00040000 |
5832 CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000 |
5833 CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000);
5834 break;
5835 case PCIE_LNK_X8:
5836 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X80x00080000 |
5837 CAIL_PCIE_LINK_WIDTH_SUPPORT_X40x00040000 |
5838 CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000 |
5839 CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000);
5840 break;
5841 case PCIE_LNK_X4:
5842 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X40x00040000 |
5843 CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000 |
5844 CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000);
5845 break;
5846 case PCIE_LNK_X2:
5847 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000 |
5848 CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000);
5849 break;
5850 case PCIE_LNK_X1:
5851 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000;
5852 break;
5853 default:
5854 break;
5855 }
5856 }
5857 }
5858}
5859
5860/**
5861 * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5862 *
5863 * @adev: amdgpu_device pointer
5864 * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5865 *
5866 * Return true if @peer_adev can access (DMA) @adev through the PCIe
5867 * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5868 * @peer_adev.
5869 */
5870bool_Bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5871 struct amdgpu_device *peer_adev)
5872{
5873#ifdef CONFIG_HSA_AMD_P2P
5874 uint64_t address_mask = peer_adev->dev->dma_mask ?
5875 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5876 resource_size_t aper_limit =
5877 adev->gmc.aper_base + adev->gmc.aper_size - 1;
5878 bool_Bool p2p_access =
5879 !adev->gmc.xgmi.connected_to_cpu &&
5880 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false0) < 0);
5881
5882 return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5883 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5884 !(adev->gmc.aper_base & address_mask ||
5885 aper_limit & address_mask));
5886#else
5887 return false0;
5888#endif
5889}
5890
5891int amdgpu_device_baco_enter(struct drm_device *dev)
5892{
5893 struct amdgpu_device *adev = drm_to_adev(dev);
5894 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5895
5896 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5897 return -ENOTSUPP91;
5898
5899 if (ras && adev->ras_enabled &&
5900 adev->nbio.funcs->enable_doorbell_interrupt)
5901 adev->nbio.funcs->enable_doorbell_interrupt(adev, false0);
5902
5903 return amdgpu_dpm_baco_enter(adev);
5904}
5905
5906int amdgpu_device_baco_exit(struct drm_device *dev)
5907{
5908 struct amdgpu_device *adev = drm_to_adev(dev);
5909 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5910 int ret = 0;
5911
5912 if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
5913 return -ENOTSUPP91;
5914
5915 ret = amdgpu_dpm_baco_exit(adev);
5916 if (ret)
5917 return ret;
5918
5919 if (ras && adev->ras_enabled &&
5920 adev->nbio.funcs->enable_doorbell_interrupt)
5921 adev->nbio.funcs->enable_doorbell_interrupt(adev, true1);
5922
5923 if (amdgpu_passthrough(adev)((adev)->virt.caps & (1 << 3)) &&
5924 adev->nbio.funcs->clear_doorbell_interrupt)
5925 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5926
5927 return 0;
5928}
5929
5930/**
5931 * amdgpu_pci_error_detected - Called when a PCI error is detected.
5932 * @pdev: PCI device struct
5933 * @state: PCI channel state
5934 *
5935 * Description: Called when a PCI error is detected.
5936 *
5937 * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5938 */
5939pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5940{
5941 STUB()do { printf("%s: stub\n", __func__); } while(0);
5942 return 0;
5943#ifdef notyet
5944 struct drm_device *dev = pci_get_drvdata(pdev);
5945 struct amdgpu_device *adev = drm_to_adev(dev);
5946 int i;
5947
5948 DRM_INFO("PCI error: detected callback, state(%d)!!\n", state)printk("\0016" "[" "drm" "] " "PCI error: detected callback, state(%d)!!\n"
, state)
;
5949
5950 if (adev->gmc.xgmi.num_physical_nodes > 1) {
5951 DRM_WARN("No support for XGMI hive yet...")printk("\0014" "[" "drm" "] " "No support for XGMI hive yet..."
)
;
5952 return PCI_ERS_RESULT_DISCONNECT0;
5953 }
5954
5955 adev->pci_channel_state = state;
5956
5957 switch (state) {
5958 case pci_channel_io_normal:
5959 return PCI_ERS_RESULT_CAN_RECOVER;
5960 /* Fatal error, prepare for slot reset */
5961 case pci_channel_io_frozen:
5962 /*
5963 * Locking adev->reset_domain->sem will prevent any external access
5964 * to GPU during PCI error recovery
5965 */
5966 amdgpu_device_lock_reset_domain(adev->reset_domain);
5967 amdgpu_device_set_mp1_state(adev);
5968
5969 /*
5970 * Block any work scheduling as we do for regular GPU reset
5971 * for the duration of the recovery
5972 */
5973 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
5974 struct amdgpu_ring *ring = adev->rings[i];
5975
5976 if (!ring || !ring->sched.thread)
5977 continue;
5978
5979 drm_sched_stop(&ring->sched, NULL((void *)0));
5980 }
5981 atomic_inc(&adev->gpu_reset_counter)__sync_fetch_and_add(&adev->gpu_reset_counter, 1);
5982 return PCI_ERS_RESULT_NEED_RESET;
5983 case pci_channel_io_perm_failure:
5984 /* Permanent error, prepare for device removal */
5985 return PCI_ERS_RESULT_DISCONNECT0;
5986 }
5987
5988 return PCI_ERS_RESULT_NEED_RESET;
5989#endif
5990}
5991
5992/**
5993 * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5994 * @pdev: pointer to PCI device
5995 */
5996pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5997{
5998
5999 DRM_INFO("PCI error: mmio enabled callback!!\n")printk("\0016" "[" "drm" "] " "PCI error: mmio enabled callback!!\n"
)
;
6000
6001 /* TODO - dump whatever for debugging purposes */
6002
6003 /* This called only if amdgpu_pci_error_detected returns
6004 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6005 * works, no need to reset slot.
6006 */
6007
6008 return PCI_ERS_RESULT_RECOVERED1;
6009}
6010
6011/**
6012 * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6013 * @pdev: PCI device struct
6014 *
6015 * Description: This routine is called by the pci error recovery
6016 * code after the PCI slot has been reset, just before we
6017 * should resume normal operations.
6018 */
6019pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6020{
6021 STUB()do { printf("%s: stub\n", __func__); } while(0);
6022 return PCI_ERS_RESULT_RECOVERED1;
6023#ifdef notyet
6024 struct drm_device *dev = pci_get_drvdata(pdev);
6025 struct amdgpu_device *adev = drm_to_adev(dev);
6026 int r, i;
6027 struct amdgpu_reset_context reset_context;
6028 u32 memsize;
6029 struct list_head device_list;
6030
6031 DRM_INFO("PCI error: slot reset callback!!\n")printk("\0016" "[" "drm" "] " "PCI error: slot reset callback!!\n"
)
;
6032
6033 memset(&reset_context, 0, sizeof(reset_context))__builtin_memset((&reset_context), (0), (sizeof(reset_context
)))
;
6034
6035 INIT_LIST_HEAD(&device_list);
6036 list_add_tail(&adev->reset_list, &device_list);
6037
6038 /* wait for asic to come out of reset */
6039 drm_msleep(500)mdelay(500);
6040
6041 /* Restore PCI confspace */
6042 amdgpu_device_load_pci_state(pdev);
6043
6044 /* confirm ASIC came out of reset */
6045 for (i = 0; i < adev->usec_timeout; i++) {
6046 memsize = amdgpu_asic_get_config_memsize(adev)(adev)->asic_funcs->get_config_memsize((adev));
6047
6048 if (memsize != 0xffffffff)
6049 break;
6050 udelay(1);
6051 }
6052 if (memsize == 0xffffffff) {
6053 r = -ETIME60;
6054 goto out;
6055 }
6056
6057 reset_context.method = AMD_RESET_METHOD_NONE;
6058 reset_context.reset_req_dev = adev;
6059 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6060 set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6061
6062 adev->no_hw_access = true1;
6063 r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6064 adev->no_hw_access = false0;
6065 if (r)
6066 goto out;
6067
6068 r = amdgpu_do_asic_reset(&device_list, &reset_context);
6069
6070out:
6071 if (!r) {
6072 if (amdgpu_device_cache_pci_state(adev->pdev))
6073 pci_restore_state(adev->pdev);
6074
6075 DRM_INFO("PCIe error recovery succeeded\n")printk("\0016" "[" "drm" "] " "PCIe error recovery succeeded\n"
)
;
6076 } else {
6077 DRM_ERROR("PCIe error recovery failed, err:%d", r)__drm_err("PCIe error recovery failed, err:%d", r);
6078 amdgpu_device_unset_mp1_state(adev);
6079 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6080 }
6081
6082 return r ? PCI_ERS_RESULT_DISCONNECT0 : PCI_ERS_RESULT_RECOVERED1;
6083#endif
6084}
6085
6086/**
6087 * amdgpu_pci_resume() - resume normal ops after PCI reset
6088 * @pdev: pointer to PCI device
6089 *
6090 * Called when the error recovery driver tells us that its
6091 * OK to resume normal operation.
6092 */
6093void amdgpu_pci_resume(struct pci_dev *pdev)
6094{
6095 STUB()do { printf("%s: stub\n", __func__); } while(0);
6096#ifdef notyet
6097 struct drm_device *dev = pci_get_drvdata(pdev);
6098 struct amdgpu_device *adev = drm_to_adev(dev);
6099 int i;
6100
6101
6102 DRM_INFO("PCI error: resume callback!!\n")printk("\0016" "[" "drm" "] " "PCI error: resume callback!!\n"
)
;
6103
6104 /* Only continue execution for the case of pci_channel_io_frozen */
6105 if (adev->pci_channel_state != pci_channel_io_frozen)
6106 return;
6107
6108 for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) {
6109 struct amdgpu_ring *ring = adev->rings[i];
6110
6111 if (!ring || !ring->sched.thread)
6112 continue;
6113
6114
6115 drm_sched_resubmit_jobs(&ring->sched);
6116 drm_sched_start(&ring->sched, true1);
6117 }
6118
6119 amdgpu_device_unset_mp1_state(adev);
6120 amdgpu_device_unlock_reset_domain(adev->reset_domain);
6121#endif
6122}
6123
6124bool_Bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6125{
6126 return false0;
6127#ifdef notyet
6128 struct drm_device *dev = pci_get_drvdata(pdev);
6129 struct amdgpu_device *adev = drm_to_adev(dev);
6130 int r;
6131
6132 r = pci_save_state(pdev);
6133 if (!r) {
6134 kfree(adev->pci_state);
6135
6136 adev->pci_state = pci_store_saved_state(pdev);
6137
6138 if (!adev->pci_state) {
6139 DRM_ERROR("Failed to store PCI saved state")__drm_err("Failed to store PCI saved state");
6140 return false0;
6141 }
6142 } else {
6143 DRM_WARN("Failed to save PCI state, err:%d\n", r)printk("\0014" "[" "drm" "] " "Failed to save PCI state, err:%d\n"
, r)
;
6144 return false0;
6145 }
6146
6147 return true1;
6148#endif
6149}
6150
6151bool_Bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6152{
6153 STUB()do { printf("%s: stub\n", __func__); } while(0);
6154 return false0;
6155#ifdef notyet
6156 struct drm_device *dev = pci_get_drvdata(pdev);
6157 struct amdgpu_device *adev = drm_to_adev(dev);
6158 int r;
6159
6160 if (!adev->pci_state)
6161 return false0;
6162
6163 r = pci_load_saved_state(pdev, adev->pci_state);
6164
6165 if (!r) {
6166 pci_restore_state(pdev);
6167 } else {
6168 DRM_WARN("Failed to load PCI state, err:%d\n", r)printk("\0014" "[" "drm" "] " "Failed to load PCI state, err:%d\n"
, r)
;
6169 return false0;
6170 }
6171
6172 return true1;
6173#endif
6174}
6175
6176void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6177 struct amdgpu_ring *ring)
6178{
6179#ifdef CONFIG_X86_641
6180 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)((adev)->virt.caps & (1 << 3)))
6181 return;
6182#endif
6183 if (adev->gmc.xgmi.connected_to_cpu)
6184 return;
6185
6186 if (ring && ring->funcs->emit_hdp_flush)
6187 amdgpu_ring_emit_hdp_flush(ring)(ring)->funcs->emit_hdp_flush((ring));
6188 else
6189 amdgpu_asic_flush_hdp(adev, ring)((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->
flush_hdp((adev), (ring)) : (adev)->hdp.funcs->flush_hdp
((adev), (ring)))
;
6190}
6191
6192void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6193 struct amdgpu_ring *ring)
6194{
6195#ifdef CONFIG_X86_641
6196 if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)((adev)->virt.caps & (1 << 3)))
6197 return;
6198#endif
6199 if (adev->gmc.xgmi.connected_to_cpu)
6200 return;
6201
6202 amdgpu_asic_invalidate_hdp(adev, ring)((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs
->invalidate_hdp((adev), (ring)) : ((adev)->hdp.funcs->
invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev
), (ring)) : 0))
;
6203}
6204
6205int amdgpu_in_reset(struct amdgpu_device *adev)
6206{
6207 return atomic_read(&adev->reset_domain->in_gpu_reset)({ typeof(*(&adev->reset_domain->in_gpu_reset)) __tmp
= *(volatile typeof(*(&adev->reset_domain->in_gpu_reset
)) *)&(*(&adev->reset_domain->in_gpu_reset)); membar_datadep_consumer
(); __tmp; })
;
6208 }
6209
6210/**
6211 * amdgpu_device_halt() - bring hardware to some kind of halt state
6212 *
6213 * @adev: amdgpu_device pointer
6214 *
6215 * Bring hardware to some kind of halt state so that no one can touch it
6216 * any more. It will help to maintain error context when error occurred.
6217 * Compare to a simple hang, the system will keep stable at least for SSH
6218 * access. Then it should be trivial to inspect the hardware state and
6219 * see what's going on. Implemented as following:
6220 *
6221 * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6222 * clears all CPU mappings to device, disallows remappings through page faults
6223 * 2. amdgpu_irq_disable_all() disables all interrupts
6224 * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6225 * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6226 * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6227 * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6228 * flush any in flight DMA operations
6229 */
6230void amdgpu_device_halt(struct amdgpu_device *adev)
6231{
6232 struct pci_dev *pdev = adev->pdev;
6233 struct drm_device *ddev = adev_to_drm(adev);
6234
6235 drm_dev_unplug(ddev);
6236
6237 amdgpu_irq_disable_all(adev);
6238
6239 amdgpu_fence_driver_hw_fini(adev);
6240
6241 adev->no_hw_access = true1;
6242
6243 amdgpu_device_unmap_mmio(adev);
6244
6245 pci_disable_device(pdev);
6246 pci_wait_for_pending_transaction(pdev);
6247}
6248
6249u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6250 u32 reg)
6251{
6252 unsigned long flags, address, data;
6253 u32 r;
6254
6255 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6256 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6257
6258 spin_lock_irqsave(&adev->pcie_idx_lock, flags)do { flags = 0; mtx_enter(&adev->pcie_idx_lock); } while
(0)
;
6259 WREG32(address, reg * 4)amdgpu_device_wreg(adev, (address), (reg * 4), 0);
6260 (void)RREG32(address)amdgpu_device_rreg(adev, (address), 0);
6261 r = RREG32(data)amdgpu_device_rreg(adev, (data), 0);
6262 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->pcie_idx_lock); }
while (0)
;
6263 return r;
6264}
6265
6266void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6267 u32 reg, u32 v)
6268{
6269 unsigned long flags, address, data;
6270
6271 address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6272 data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6273
6274 spin_lock_irqsave(&adev->pcie_idx_lock, flags)do { flags = 0; mtx_enter(&adev->pcie_idx_lock); } while
(0)
;
6275 WREG32(address, reg * 4)amdgpu_device_wreg(adev, (address), (reg * 4), 0);
6276 (void)RREG32(address)amdgpu_device_rreg(adev, (address), 0);
6277 WREG32(data, v)amdgpu_device_wreg(adev, (data), (v), 0);
6278 (void)RREG32(data)amdgpu_device_rreg(adev, (data), 0);
6279 spin_unlock_irqrestore(&adev->pcie_idx_lock, flags)do { (void)(flags); mtx_leave(&adev->pcie_idx_lock); }
while (0)
;
6280}
6281
6282/**
6283 * amdgpu_device_switch_gang - switch to a new gang
6284 * @adev: amdgpu_device pointer
6285 * @gang: the gang to switch to
6286 *
6287 * Try to switch to a new gang.
6288 * Returns: NULL if we switched to the new gang or a reference to the current
6289 * gang leader.
6290 */
6291struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6292 struct dma_fence *gang)
6293{
6294 struct dma_fence *old = NULL((void *)0);
6295
6296 do {
6297 dma_fence_put(old);
6298 rcu_read_lock();
6299 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6300 rcu_read_unlock();
6301
6302 if (old == gang)
6303 break;
6304
6305 if (!dma_fence_is_signaled(old))
6306 return old;
6307
6308 } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,__sync_val_compare_and_swap((struct dma_fence **)&adev->
gang_submit, old, gang)
6309 old, gang)__sync_val_compare_and_swap((struct dma_fence **)&adev->
gang_submit, old, gang)
!= old);
6310
6311 dma_fence_put(old);
6312 return NULL((void *)0);
6313}
6314
6315bool_Bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6316{
6317 switch (adev->asic_type) {
6318#ifdef CONFIG_DRM_AMDGPU_SI
6319 case CHIP_HAINAN:
6320#endif
6321 case CHIP_TOPAZ:
6322 /* chips with no display hardware */
6323 return false0;
6324#ifdef CONFIG_DRM_AMDGPU_SI
6325 case CHIP_TAHITI:
6326 case CHIP_PITCAIRN:
6327 case CHIP_VERDE:
6328 case CHIP_OLAND:
6329#endif
6330#ifdef CONFIG_DRM_AMDGPU_CIK
6331 case CHIP_BONAIRE:
6332 case CHIP_HAWAII:
6333 case CHIP_KAVERI:
6334 case CHIP_KABINI:
6335 case CHIP_MULLINS:
6336#endif
6337 case CHIP_TONGA:
6338 case CHIP_FIJI:
6339 case CHIP_POLARIS10:
6340 case CHIP_POLARIS11:
6341 case CHIP_POLARIS12:
6342 case CHIP_VEGAM:
6343 case CHIP_CARRIZO:
6344 case CHIP_STONEY:
6345 /* chips with display hardware */
6346 return true1;
6347 default:
6348 /* IP discovery */
6349 if (!adev->ip_versions[DCE_HWIP][0] ||
6350 (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6351 return false0;
6352 return true1;
6353 }
6354}