File: | dev/pci/drm/amd/amdgpu/amdgpu_gfx.c |
Warning: | line 1008, column 12 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |||
3 | * Copyright 2008 Red Hat Inc. | |||
4 | * Copyright 2009 Jerome Glisse. | |||
5 | * | |||
6 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
7 | * copy of this software and associated documentation files (the "Software"), | |||
8 | * to deal in the Software without restriction, including without limitation | |||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
10 | * and/or sell copies of the Software, and to permit persons to whom the | |||
11 | * Software is furnished to do so, subject to the following conditions: | |||
12 | * | |||
13 | * The above copyright notice and this permission notice shall be included in | |||
14 | * all copies or substantial portions of the Software. | |||
15 | * | |||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
22 | * OTHER DEALINGS IN THE SOFTWARE. | |||
23 | * | |||
24 | */ | |||
25 | ||||
26 | #include <linux/firmware.h> | |||
27 | #include "amdgpu.h" | |||
28 | #include "amdgpu_gfx.h" | |||
29 | #include "amdgpu_rlc.h" | |||
30 | #include "amdgpu_ras.h" | |||
31 | ||||
32 | /* delay 0.1 second to enable gfx off feature */ | |||
33 | #define GFX_OFF_DELAY_ENABLE(((uint64_t)(100)) * hz / 1000) msecs_to_jiffies(100)(((uint64_t)(100)) * hz / 1000) | |||
34 | ||||
35 | #define GFX_OFF_NO_DELAY0 0 | |||
36 | ||||
37 | /* | |||
38 | * GPU GFX IP block helpers function. | |||
39 | */ | |||
40 | ||||
41 | int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, | |||
42 | int pipe, int queue) | |||
43 | { | |||
44 | int bit = 0; | |||
45 | ||||
46 | bit += mec * adev->gfx.mec.num_pipe_per_mec | |||
47 | * adev->gfx.mec.num_queue_per_pipe; | |||
48 | bit += pipe * adev->gfx.mec.num_queue_per_pipe; | |||
49 | bit += queue; | |||
50 | ||||
51 | return bit; | |||
52 | } | |||
53 | ||||
54 | void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, | |||
55 | int *mec, int *pipe, int *queue) | |||
56 | { | |||
57 | *queue = bit % adev->gfx.mec.num_queue_per_pipe; | |||
58 | *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) | |||
59 | % adev->gfx.mec.num_pipe_per_mec; | |||
60 | *mec = (bit / adev->gfx.mec.num_queue_per_pipe) | |||
61 | / adev->gfx.mec.num_pipe_per_mec; | |||
62 | ||||
63 | } | |||
64 | ||||
65 | bool_Bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, | |||
66 | int mec, int pipe, int queue) | |||
67 | { | |||
68 | return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue), | |||
69 | adev->gfx.mec.queue_bitmap); | |||
70 | } | |||
71 | ||||
72 | int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, | |||
73 | int me, int pipe, int queue) | |||
74 | { | |||
75 | int bit = 0; | |||
76 | ||||
77 | bit += me * adev->gfx.me.num_pipe_per_me | |||
78 | * adev->gfx.me.num_queue_per_pipe; | |||
79 | bit += pipe * adev->gfx.me.num_queue_per_pipe; | |||
80 | bit += queue; | |||
81 | ||||
82 | return bit; | |||
83 | } | |||
84 | ||||
85 | void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, | |||
86 | int *me, int *pipe, int *queue) | |||
87 | { | |||
88 | *queue = bit % adev->gfx.me.num_queue_per_pipe; | |||
89 | *pipe = (bit / adev->gfx.me.num_queue_per_pipe) | |||
90 | % adev->gfx.me.num_pipe_per_me; | |||
91 | *me = (bit / adev->gfx.me.num_queue_per_pipe) | |||
92 | / adev->gfx.me.num_pipe_per_me; | |||
93 | } | |||
94 | ||||
95 | bool_Bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, | |||
96 | int me, int pipe, int queue) | |||
97 | { | |||
98 | return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue), | |||
99 | adev->gfx.me.queue_bitmap); | |||
100 | } | |||
101 | ||||
102 | /** | |||
103 | * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter | |||
104 | * | |||
105 | * @mask: array in which the per-shader array disable masks will be stored | |||
106 | * @max_se: number of SEs | |||
107 | * @max_sh: number of SHs | |||
108 | * | |||
109 | * The bitmask of CUs to be disabled in the shader array determined by se and | |||
110 | * sh is stored in mask[se * max_sh + sh]. | |||
111 | */ | |||
112 | void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh) | |||
113 | { | |||
114 | unsigned se, sh, cu; | |||
115 | const char *p; | |||
116 | ||||
117 | memset(mask, 0, sizeof(*mask) * max_se * max_sh)__builtin_memset((mask), (0), (sizeof(*mask) * max_se * max_sh )); | |||
118 | ||||
119 | if (!amdgpu_disable_cu || !*amdgpu_disable_cu) | |||
120 | return; | |||
121 | ||||
122 | #ifdef notyet | |||
123 | p = amdgpu_disable_cu; | |||
124 | for (;;) { | |||
125 | char *next; | |||
126 | int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); | |||
127 | if (ret < 3) { | |||
128 | DRM_ERROR("amdgpu: could not parse disable_cu\n")__drm_err("amdgpu: could not parse disable_cu\n"); | |||
129 | return; | |||
130 | } | |||
131 | ||||
132 | if (se < max_se && sh < max_sh && cu < 16) { | |||
133 | DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu)printk("\0016" "[" "drm" "] " "amdgpu: disabling CU %u.%u.%u\n" , se, sh, cu); | |||
134 | mask[se * max_sh + sh] |= 1u << cu; | |||
135 | } else { | |||
136 | DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",__drm_err("amdgpu: disable_cu %u.%u.%u is out of range\n", se , sh, cu) | |||
137 | se, sh, cu)__drm_err("amdgpu: disable_cu %u.%u.%u is out of range\n", se , sh, cu); | |||
138 | } | |||
139 | ||||
140 | next = strchr(p, ','); | |||
141 | if (!next) | |||
142 | break; | |||
143 | p = next + 1; | |||
144 | } | |||
145 | #endif | |||
146 | } | |||
147 | ||||
148 | static bool_Bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev) | |||
149 | { | |||
150 | return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1; | |||
151 | } | |||
152 | ||||
153 | static bool_Bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) | |||
154 | { | |||
155 | if (amdgpu_compute_multipipe != -1) { | |||
156 | DRM_INFO("amdgpu: forcing compute pipe policy %d\n",printk("\0016" "[" "drm" "] " "amdgpu: forcing compute pipe policy %d\n" , amdgpu_compute_multipipe) | |||
157 | amdgpu_compute_multipipe)printk("\0016" "[" "drm" "] " "amdgpu: forcing compute pipe policy %d\n" , amdgpu_compute_multipipe); | |||
158 | return amdgpu_compute_multipipe == 1; | |||
159 | } | |||
160 | ||||
161 | if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)(((9) << 16) | ((0) << 8) | (0))) | |||
162 | return true1; | |||
163 | ||||
164 | /* FIXME: spreading the queues across pipes causes perf regressions | |||
165 | * on POLARIS11 compute workloads */ | |||
166 | if (adev->asic_type == CHIP_POLARIS11) | |||
167 | return false0; | |||
168 | ||||
169 | return adev->gfx.mec.num_mec > 1; | |||
170 | } | |||
171 | ||||
172 | bool_Bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev, | |||
173 | struct amdgpu_ring *ring) | |||
174 | { | |||
175 | int queue = ring->queue; | |||
176 | int pipe = ring->pipe; | |||
177 | ||||
178 | /* Policy: use pipe1 queue0 as high priority graphics queue if we | |||
179 | * have more than one gfx pipe. | |||
180 | */ | |||
181 | if (amdgpu_gfx_is_graphics_multipipe_capable(adev) && | |||
182 | adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) { | |||
183 | int me = ring->me; | |||
184 | int bit; | |||
185 | ||||
186 | bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue); | |||
187 | if (ring == &adev->gfx.gfx_ring[bit]) | |||
188 | return true1; | |||
189 | } | |||
190 | ||||
191 | return false0; | |||
192 | } | |||
193 | ||||
194 | bool_Bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, | |||
195 | struct amdgpu_ring *ring) | |||
196 | { | |||
197 | /* Policy: use 1st queue as high priority compute queue if we | |||
198 | * have more than one compute queue. | |||
199 | */ | |||
200 | if (adev->gfx.num_compute_rings > 1 && | |||
201 | ring == &adev->gfx.compute_ring[0]) | |||
202 | return true1; | |||
203 | ||||
204 | return false0; | |||
205 | } | |||
206 | ||||
207 | void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) | |||
208 | { | |||
209 | int i, queue, pipe; | |||
210 | bool_Bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev); | |||
211 | int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *(((adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe )<(adev->gfx.num_compute_rings))?(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe):(adev->gfx.num_compute_rings )) | |||
212 | adev->gfx.mec.num_queue_per_pipe,(((adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe )<(adev->gfx.num_compute_rings))?(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe):(adev->gfx.num_compute_rings )) | |||
213 | adev->gfx.num_compute_rings)(((adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe )<(adev->gfx.num_compute_rings))?(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe):(adev->gfx.num_compute_rings )); | |||
214 | ||||
215 | if (multipipe_policy) { | |||
216 | /* policy: make queues evenly cross all pipes on MEC1 only */ | |||
217 | for (i = 0; i < max_queues_per_mec; i++) { | |||
218 | pipe = i % adev->gfx.mec.num_pipe_per_mec; | |||
219 | queue = (i / adev->gfx.mec.num_pipe_per_mec) % | |||
220 | adev->gfx.mec.num_queue_per_pipe; | |||
221 | ||||
222 | set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, | |||
223 | adev->gfx.mec.queue_bitmap); | |||
224 | } | |||
225 | } else { | |||
226 | /* policy: amdgpu owns all queues in the given pipe */ | |||
227 | for (i = 0; i < max_queues_per_mec; ++i) | |||
228 | set_bit(i, adev->gfx.mec.queue_bitmap); | |||
229 | } | |||
230 | ||||
231 | dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES))do { } while(0); | |||
232 | } | |||
233 | ||||
234 | void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) | |||
235 | { | |||
236 | int i, queue, pipe; | |||
237 | bool_Bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev); | |||
238 | int max_queues_per_me = adev->gfx.me.num_pipe_per_me * | |||
239 | adev->gfx.me.num_queue_per_pipe; | |||
240 | ||||
241 | if (multipipe_policy) { | |||
242 | /* policy: amdgpu owns the first queue per pipe at this stage | |||
243 | * will extend to mulitple queues per pipe later */ | |||
244 | for (i = 0; i < max_queues_per_me; i++) { | |||
245 | pipe = i % adev->gfx.me.num_pipe_per_me; | |||
246 | queue = (i / adev->gfx.me.num_pipe_per_me) % | |||
247 | adev->gfx.me.num_queue_per_pipe; | |||
248 | ||||
249 | set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue, | |||
250 | adev->gfx.me.queue_bitmap); | |||
251 | } | |||
252 | } else { | |||
253 | for (i = 0; i < max_queues_per_me; ++i) | |||
254 | set_bit(i, adev->gfx.me.queue_bitmap); | |||
255 | } | |||
256 | ||||
257 | /* update the number of active graphics rings */ | |||
258 | adev->gfx.num_gfx_rings = | |||
259 | bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES128); | |||
260 | } | |||
261 | ||||
262 | static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, | |||
263 | struct amdgpu_ring *ring) | |||
264 | { | |||
265 | int queue_bit; | |||
266 | int mec, pipe, queue; | |||
267 | ||||
268 | queue_bit = adev->gfx.mec.num_mec | |||
269 | * adev->gfx.mec.num_pipe_per_mec | |||
270 | * adev->gfx.mec.num_queue_per_pipe; | |||
271 | ||||
272 | while (--queue_bit >= 0) { | |||
273 | if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) | |||
274 | continue; | |||
275 | ||||
276 | amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); | |||
277 | ||||
278 | /* | |||
279 | * 1. Using pipes 2/3 from MEC 2 seems cause problems. | |||
280 | * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN | |||
281 | * only can be issued on queue 0. | |||
282 | */ | |||
283 | if ((mec == 1 && pipe > 1) || queue != 0) | |||
284 | continue; | |||
285 | ||||
286 | ring->me = mec + 1; | |||
287 | ring->pipe = pipe; | |||
288 | ring->queue = queue; | |||
289 | ||||
290 | return 0; | |||
291 | } | |||
292 | ||||
293 | dev_err(adev->dev, "Failed to find a queue for KIQ\n")printf("drm:pid%d:%s *ERROR* " "Failed to find a queue for KIQ\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); | |||
294 | return -EINVAL22; | |||
295 | } | |||
296 | ||||
297 | int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, | |||
298 | struct amdgpu_ring *ring, | |||
299 | struct amdgpu_irq_src *irq) | |||
300 | { | |||
301 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
302 | int r = 0; | |||
303 | ||||
304 | mtx_init(&kiq->ring_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&kiq-> ring_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); | |||
305 | ||||
306 | ring->adev = NULL((void *)0); | |||
307 | ring->ring_obj = NULL((void *)0); | |||
308 | ring->use_doorbell = true1; | |||
309 | ring->doorbell_index = adev->doorbell_index.kiq; | |||
310 | ||||
311 | r = amdgpu_gfx_kiq_acquire(adev, ring); | |||
312 | if (r) | |||
313 | return r; | |||
314 | ||||
315 | ring->eop_gpu_addr = kiq->eop_gpu_addr; | |||
316 | ring->no_scheduler = true1; | |||
317 | snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); | |||
318 | r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, | |||
319 | AMDGPU_RING_PRIO_DEFAULT, NULL((void *)0)); | |||
320 | if (r) | |||
321 | dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r)printf("drm:pid%d:%s *WARNING* " "(%d) failed to init kiq ring\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
322 | ||||
323 | return r; | |||
324 | } | |||
325 | ||||
326 | void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) | |||
327 | { | |||
328 | amdgpu_ring_fini(ring); | |||
329 | } | |||
330 | ||||
331 | void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) | |||
332 | { | |||
333 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
334 | ||||
335 | amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL((void *)0)); | |||
336 | } | |||
337 | ||||
338 | int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, | |||
339 | unsigned hpd_size) | |||
340 | { | |||
341 | int r; | |||
342 | u32 *hpd; | |||
343 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
344 | ||||
345 | r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE(1 << 12), | |||
346 | AMDGPU_GEM_DOMAIN_GTT0x2, &kiq->eop_obj, | |||
347 | &kiq->eop_gpu_addr, (void **)&hpd); | |||
348 | if (r) { | |||
349 | dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r)printf("drm:pid%d:%s *WARNING* " "failed to create KIQ bo (%d).\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
350 | return r; | |||
351 | } | |||
352 | ||||
353 | memset(hpd, 0, hpd_size)__builtin_memset((hpd), (0), (hpd_size)); | |||
354 | ||||
355 | r = amdgpu_bo_reserve(kiq->eop_obj, true1); | |||
356 | if (unlikely(r != 0)__builtin_expect(!!(r != 0), 0)) | |||
357 | dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r)printf("drm:pid%d:%s *WARNING* " "(%d) reserve kiq eop bo failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
358 | amdgpu_bo_kunmap(kiq->eop_obj); | |||
359 | amdgpu_bo_unreserve(kiq->eop_obj); | |||
360 | ||||
361 | return 0; | |||
362 | } | |||
363 | ||||
364 | /* create MQD for each compute/gfx queue */ | |||
365 | int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, | |||
366 | unsigned mqd_size) | |||
367 | { | |||
368 | struct amdgpu_ring *ring = NULL((void *)0); | |||
369 | int r, i; | |||
370 | ||||
371 | /* create MQD for KIQ */ | |||
372 | ring = &adev->gfx.kiq.ring; | |||
373 | if (!adev->enable_mes_kiq && !ring->mqd_obj) { | |||
374 | /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must | |||
375 | * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD | |||
376 | * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for | |||
377 | * KIQ MQD no matter SRIOV or Bare-metal | |||
378 | */ | |||
379 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), | |||
380 | AMDGPU_GEM_DOMAIN_VRAM0x4, &ring->mqd_obj, | |||
381 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |||
382 | if (r) { | |||
383 | dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create ring mqd ob (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
384 | return r; | |||
385 | } | |||
386 | ||||
387 | /* prepare MQD backup */ | |||
388 | adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS8] = kmalloc(mqd_size, GFP_KERNEL(0x0001 | 0x0004)); | |||
389 | if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS8]) | |||
390 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name)printf("drm:pid%d:%s *WARNING* " "no memory to create MQD backup for ring %s\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , ring-> name); | |||
391 | } | |||
392 | ||||
393 | if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { | |||
394 | /* create MQD for each KGQ */ | |||
395 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |||
396 | ring = &adev->gfx.gfx_ring[i]; | |||
397 | if (!ring->mqd_obj) { | |||
398 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), | |||
399 | AMDGPU_GEM_DOMAIN_GTT0x2, &ring->mqd_obj, | |||
400 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |||
401 | if (r) { | |||
402 | dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create ring mqd bo (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
403 | return r; | |||
404 | } | |||
405 | ||||
406 | /* prepare MQD backup */ | |||
407 | adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL(0x0001 | 0x0004)); | |||
408 | if (!adev->gfx.me.mqd_backup[i]) | |||
409 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name)printf("drm:pid%d:%s *WARNING* " "no memory to create MQD backup for ring %s\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , ring-> name); | |||
410 | } | |||
411 | } | |||
412 | } | |||
413 | ||||
414 | /* create MQD for each KCQ */ | |||
415 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |||
416 | ring = &adev->gfx.compute_ring[i]; | |||
417 | if (!ring->mqd_obj) { | |||
418 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), | |||
419 | AMDGPU_GEM_DOMAIN_GTT0x2, &ring->mqd_obj, | |||
420 | &ring->mqd_gpu_addr, &ring->mqd_ptr); | |||
421 | if (r) { | |||
422 | dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create ring mqd bo (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
423 | return r; | |||
424 | } | |||
425 | ||||
426 | /* prepare MQD backup */ | |||
427 | adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL(0x0001 | 0x0004)); | |||
428 | if (!adev->gfx.mec.mqd_backup[i]) | |||
429 | dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name)printf("drm:pid%d:%s *WARNING* " "no memory to create MQD backup for ring %s\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , ring-> name); | |||
430 | } | |||
431 | } | |||
432 | ||||
433 | return 0; | |||
434 | } | |||
435 | ||||
436 | void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) | |||
437 | { | |||
438 | struct amdgpu_ring *ring = NULL((void *)0); | |||
439 | int i; | |||
440 | ||||
441 | if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { | |||
442 | for (i = 0; i < adev->gfx.num_gfx_rings; i++) { | |||
443 | ring = &adev->gfx.gfx_ring[i]; | |||
444 | kfree(adev->gfx.me.mqd_backup[i]); | |||
445 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |||
446 | &ring->mqd_gpu_addr, | |||
447 | &ring->mqd_ptr); | |||
448 | } | |||
449 | } | |||
450 | ||||
451 | for (i = 0; i < adev->gfx.num_compute_rings; i++) { | |||
452 | ring = &adev->gfx.compute_ring[i]; | |||
453 | kfree(adev->gfx.mec.mqd_backup[i]); | |||
454 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |||
455 | &ring->mqd_gpu_addr, | |||
456 | &ring->mqd_ptr); | |||
457 | } | |||
458 | ||||
459 | ring = &adev->gfx.kiq.ring; | |||
460 | kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS8]); | |||
461 | amdgpu_bo_free_kernel(&ring->mqd_obj, | |||
462 | &ring->mqd_gpu_addr, | |||
463 | &ring->mqd_ptr); | |||
464 | } | |||
465 | ||||
466 | int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) | |||
467 | { | |||
468 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
469 | struct amdgpu_ring *kiq_ring = &kiq->ring; | |||
470 | int i, r = 0; | |||
471 | ||||
472 | if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) | |||
473 | return -EINVAL22; | |||
474 | ||||
475 | spin_lock(&adev->gfx.kiq.ring_lock)mtx_enter(&adev->gfx.kiq.ring_lock); | |||
476 | if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * | |||
477 | adev->gfx.num_compute_rings)) { | |||
478 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
479 | return -ENOMEM12; | |||
480 | } | |||
481 | ||||
482 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | |||
483 | kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], | |||
484 | RESET_QUEUES, 0, 0); | |||
485 | ||||
486 | if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) | |||
487 | r = amdgpu_ring_test_helper(kiq_ring); | |||
488 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
489 | ||||
490 | return r; | |||
491 | } | |||
492 | ||||
493 | int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, | |||
494 | int queue_bit) | |||
495 | { | |||
496 | int mec, pipe, queue; | |||
497 | int set_resource_bit = 0; | |||
498 | ||||
499 | amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); | |||
500 | ||||
501 | set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; | |||
502 | ||||
503 | return set_resource_bit; | |||
504 | } | |||
505 | ||||
506 | int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) | |||
507 | { | |||
508 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
509 | struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; | |||
510 | uint64_t queue_mask = 0; | |||
511 | int r, i; | |||
512 | ||||
513 | if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources) | |||
514 | return -EINVAL22; | |||
515 | ||||
516 | for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES128; ++i) { | |||
517 | if (!test_bit(i, adev->gfx.mec.queue_bitmap)) | |||
518 | continue; | |||
519 | ||||
520 | /* This situation may be hit in the future if a new HW | |||
521 | * generation exposes more than 64 queues. If so, the | |||
522 | * definition of queue_mask needs updating */ | |||
523 | if (WARN_ON(i > (sizeof(queue_mask)*8))({ int __ret = !!(i > (sizeof(queue_mask)*8)); if (__ret) printf ("WARNING %s failed at %s:%d\n", "i > (sizeof(queue_mask)*8)" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c", 523); __builtin_expect (!!(__ret), 0); })) { | |||
524 | DRM_ERROR("Invalid KCQ enabled: %d\n", i)__drm_err("Invalid KCQ enabled: %d\n", i); | |||
525 | break; | |||
526 | } | |||
527 | ||||
528 | queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); | |||
529 | } | |||
530 | ||||
531 | DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,printk("\0016" "[" "drm" "] " "kiq ring mec %d pipe %d q %d\n" , kiq_ring->me, kiq_ring->pipe, kiq_ring->queue) | |||
532 | kiq_ring->queue)printk("\0016" "[" "drm" "] " "kiq ring mec %d pipe %d q %d\n" , kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); | |||
533 | spin_lock(&adev->gfx.kiq.ring_lock)mtx_enter(&adev->gfx.kiq.ring_lock); | |||
534 | r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * | |||
535 | adev->gfx.num_compute_rings + | |||
536 | kiq->pmf->set_resources_size); | |||
537 | if (r) { | |||
538 | DRM_ERROR("Failed to lock KIQ (%d).\n", r)__drm_err("Failed to lock KIQ (%d).\n", r); | |||
539 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
540 | return r; | |||
541 | } | |||
542 | ||||
543 | if (adev->enable_mes) | |||
544 | queue_mask = ~0ULL; | |||
545 | ||||
546 | kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); | |||
547 | for (i = 0; i < adev->gfx.num_compute_rings; i++) | |||
548 | kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); | |||
549 | ||||
550 | r = amdgpu_ring_test_helper(kiq_ring); | |||
551 | spin_unlock(&adev->gfx.kiq.ring_lock)mtx_leave(&adev->gfx.kiq.ring_lock); | |||
552 | if (r) | |||
553 | DRM_ERROR("KCQ enable failed\n")__drm_err("KCQ enable failed\n"); | |||
554 | ||||
555 | return r; | |||
556 | } | |||
557 | ||||
558 | /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable | |||
559 | * | |||
560 | * @adev: amdgpu_device pointer | |||
561 | * @bool enable true: enable gfx off feature, false: disable gfx off feature | |||
562 | * | |||
563 | * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. | |||
564 | * 2. other client can send request to disable gfx off feature, the request should be honored. | |||
565 | * 3. other client can cancel their request of disable gfx off feature | |||
566 | * 4. other client should not send request to enable gfx off feature before disable gfx off feature. | |||
567 | */ | |||
568 | ||||
569 | void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool_Bool enable) | |||
570 | { | |||
571 | unsigned long delay = GFX_OFF_DELAY_ENABLE(((uint64_t)(100)) * hz / 1000); | |||
572 | ||||
573 | if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) | |||
574 | return; | |||
575 | ||||
576 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
577 | ||||
578 | if (enable) { | |||
579 | /* If the count is already 0, it means there's an imbalance bug somewhere. | |||
580 | * Note that the bug may be in a different caller than the one which triggers the | |||
581 | * WARN_ON_ONCE. | |||
582 | */ | |||
583 | if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)({ static int __warned; int __ret = !!(adev->gfx.gfx_off_req_count == 0); if (__ret && !__warned) { printf("WARNING %s failed at %s:%d\n" , "adev->gfx.gfx_off_req_count == 0", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c" , 583); __warned = 1; } __builtin_expect(!!(__ret), 0); })) | |||
584 | goto unlock; | |||
585 | ||||
586 | adev->gfx.gfx_off_req_count--; | |||
587 | ||||
588 | if (adev->gfx.gfx_off_req_count == 0 && | |||
589 | !adev->gfx.gfx_off_state) { | |||
590 | schedule_delayed_work(&adev->gfx.gfx_off_delay_work, | |||
591 | delay); | |||
592 | } | |||
593 | } else { | |||
594 | if (adev->gfx.gfx_off_req_count == 0) { | |||
595 | cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); | |||
596 | ||||
597 | if (adev->gfx.gfx_off_state && | |||
598 | !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false0)) { | |||
599 | adev->gfx.gfx_off_state = false0; | |||
600 | ||||
601 | if (adev->gfx.funcs->init_spm_golden) { | |||
602 | dev_dbg(adev->dev,do { } while(0) | |||
603 | "GFXOFF is disabled, re-init SPM golden settings\n")do { } while(0); | |||
604 | amdgpu_gfx_init_spm_golden(adev)(adev)->gfx.funcs->init_spm_golden((adev)); | |||
605 | } | |||
606 | } | |||
607 | } | |||
608 | ||||
609 | adev->gfx.gfx_off_req_count++; | |||
610 | } | |||
611 | ||||
612 | unlock: | |||
613 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
614 | } | |||
615 | ||||
616 | int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool_Bool value) | |||
617 | { | |||
618 | int r = 0; | |||
619 | ||||
620 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
621 | ||||
622 | r = amdgpu_dpm_set_residency_gfxoff(adev, value); | |||
623 | ||||
624 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
625 | ||||
626 | return r; | |||
627 | } | |||
628 | ||||
629 | int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) | |||
630 | { | |||
631 | int r = 0; | |||
632 | ||||
633 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
634 | ||||
635 | r = amdgpu_dpm_get_residency_gfxoff(adev, value); | |||
636 | ||||
637 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
638 | ||||
639 | return r; | |||
640 | } | |||
641 | ||||
642 | int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) | |||
643 | { | |||
644 | int r = 0; | |||
645 | ||||
646 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
647 | ||||
648 | r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); | |||
649 | ||||
650 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
651 | ||||
652 | return r; | |||
653 | } | |||
654 | ||||
655 | int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) | |||
656 | { | |||
657 | ||||
658 | int r = 0; | |||
659 | ||||
660 | mutex_lock(&adev->gfx.gfx_off_mutex)rw_enter_write(&adev->gfx.gfx_off_mutex); | |||
661 | ||||
662 | r = amdgpu_dpm_get_status_gfxoff(adev, value); | |||
663 | ||||
664 | mutex_unlock(&adev->gfx.gfx_off_mutex)rw_exit_write(&adev->gfx.gfx_off_mutex); | |||
665 | ||||
666 | return r; | |||
667 | } | |||
668 | ||||
669 | int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) | |||
670 | { | |||
671 | int r; | |||
672 | ||||
673 | if (amdgpu_ras_is_supported(adev, ras_block->block)) { | |||
674 | if (!amdgpu_persistent_edc_harvesting_supported(adev)) | |||
675 | amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); | |||
676 | ||||
677 | r = amdgpu_ras_block_late_init(adev, ras_block); | |||
678 | if (r) | |||
679 | return r; | |||
680 | ||||
681 | if (adev->gfx.cp_ecc_error_irq.funcs) { | |||
682 | r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); | |||
683 | if (r) | |||
684 | goto late_fini; | |||
685 | } | |||
686 | } else { | |||
687 | amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); | |||
688 | } | |||
689 | ||||
690 | return 0; | |||
691 | late_fini: | |||
692 | amdgpu_ras_block_late_fini(adev, ras_block); | |||
693 | return r; | |||
694 | } | |||
695 | ||||
696 | int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, | |||
697 | void *err_data, | |||
698 | struct amdgpu_iv_entry *entry) | |||
699 | { | |||
700 | /* TODO ue will trigger an interrupt. | |||
701 | * | |||
702 | * When “Full RAS” is enabled, the per-IP interrupt sources should | |||
703 | * be disabled and the driver should only look for the aggregated | |||
704 | * interrupt via sync flood | |||
705 | */ | |||
706 | if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { | |||
707 | kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); | |||
708 | if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops && | |||
709 | adev->gfx.ras->ras_block.hw_ops->query_ras_error_count) | |||
710 | adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); | |||
711 | amdgpu_ras_reset_gpu(adev); | |||
712 | } | |||
713 | return AMDGPU_RAS_SUCCESS; | |||
714 | } | |||
715 | ||||
716 | int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, | |||
717 | struct amdgpu_irq_src *source, | |||
718 | struct amdgpu_iv_entry *entry) | |||
719 | { | |||
720 | struct ras_common_if *ras_if = adev->gfx.ras_if; | |||
721 | struct ras_dispatch_if ih_data = { | |||
722 | .entry = entry, | |||
723 | }; | |||
724 | ||||
725 | if (!ras_if) | |||
726 | return 0; | |||
727 | ||||
728 | ih_data.head = *ras_if; | |||
729 | ||||
730 | DRM_ERROR("CP ECC ERROR IRQ\n")__drm_err("CP ECC ERROR IRQ\n"); | |||
731 | amdgpu_ras_interrupt_dispatch(adev, &ih_data); | |||
732 | return 0; | |||
733 | } | |||
734 | ||||
735 | uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) | |||
736 | { | |||
737 | signed long r, cnt = 0; | |||
738 | unsigned long flags; | |||
739 | uint32_t seq, reg_val_offs = 0, value = 0; | |||
740 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
741 | struct amdgpu_ring *ring = &kiq->ring; | |||
742 | ||||
743 | if (amdgpu_device_skip_hw_access(adev)) | |||
744 | return 0; | |||
745 | ||||
746 | if (adev->mes.ring.sched.ready) | |||
747 | return amdgpu_mes_rreg(adev, reg); | |||
748 | ||||
749 | BUG_ON(!ring->funcs->emit_rreg)((!(!ring->funcs->emit_rreg)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c", 749, "!(!ring->funcs->emit_rreg)" )); | |||
750 | ||||
751 | spin_lock_irqsave(&kiq->ring_lock, flags)do { flags = 0; mtx_enter(&kiq->ring_lock); } while (0 ); | |||
752 | if (amdgpu_device_wb_get(adev, ®_val_offs)) { | |||
753 | pr_err("critical bug! too many kiq readers\n")printk("\0013" "amdgpu: " "critical bug! too many kiq readers\n" ); | |||
754 | goto failed_unlock; | |||
755 | } | |||
756 | amdgpu_ring_alloc(ring, 32); | |||
757 | amdgpu_ring_emit_rreg(ring, reg, reg_val_offs)(ring)->funcs->emit_rreg((ring), (reg), (reg_val_offs)); | |||
758 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT5000); | |||
759 | if (r) | |||
760 | goto failed_undo; | |||
761 | ||||
762 | amdgpu_ring_commit(ring); | |||
763 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
764 | ||||
765 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
766 | ||||
767 | /* don't wait anymore for gpu reset case because this way may | |||
768 | * block gpu_recover() routine forever, e.g. this virt_kiq_rreg | |||
769 | * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will | |||
770 | * never return if we keep waiting in virt_kiq_rreg, which cause | |||
771 | * gpu_recover() hang there. | |||
772 | * | |||
773 | * also don't wait anymore for IRQ context | |||
774 | * */ | |||
775 | if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) | |||
776 | goto failed_kiq_read; | |||
777 | ||||
778 | might_sleep()assertwaitok(); | |||
779 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY1000) { | |||
780 | drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL)mdelay(5); | |||
781 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
782 | } | |||
783 | ||||
784 | if (cnt > MAX_KIQ_REG_TRY1000) | |||
785 | goto failed_kiq_read; | |||
786 | ||||
787 | mb()do { __asm volatile("mfence" ::: "memory"); } while (0); | |||
788 | value = adev->wb.wb[reg_val_offs]; | |||
789 | amdgpu_device_wb_free(adev, reg_val_offs); | |||
790 | return value; | |||
791 | ||||
792 | failed_undo: | |||
793 | amdgpu_ring_undo(ring); | |||
794 | failed_unlock: | |||
795 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
796 | failed_kiq_read: | |||
797 | if (reg_val_offs) | |||
798 | amdgpu_device_wb_free(adev, reg_val_offs); | |||
799 | dev_err(adev->dev, "failed to read reg:%x\n", reg)printf("drm:pid%d:%s *ERROR* " "failed to read reg:%x\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci ) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci; })->ci_curproc->p_p->ps_pid, __func__ , reg); | |||
800 | return ~0; | |||
801 | } | |||
802 | ||||
803 | void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) | |||
804 | { | |||
805 | signed long r, cnt = 0; | |||
806 | unsigned long flags; | |||
807 | uint32_t seq; | |||
808 | struct amdgpu_kiq *kiq = &adev->gfx.kiq; | |||
809 | struct amdgpu_ring *ring = &kiq->ring; | |||
810 | ||||
811 | BUG_ON(!ring->funcs->emit_wreg)((!(!ring->funcs->emit_wreg)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_gfx.c", 811, "!(!ring->funcs->emit_wreg)" )); | |||
812 | ||||
813 | if (amdgpu_device_skip_hw_access(adev)) | |||
814 | return; | |||
815 | ||||
816 | if (adev->mes.ring.sched.ready) { | |||
817 | amdgpu_mes_wreg(adev, reg, v); | |||
818 | return; | |||
819 | } | |||
820 | ||||
821 | spin_lock_irqsave(&kiq->ring_lock, flags)do { flags = 0; mtx_enter(&kiq->ring_lock); } while (0 ); | |||
822 | amdgpu_ring_alloc(ring, 32); | |||
823 | amdgpu_ring_emit_wreg(ring, reg, v)(ring)->funcs->emit_wreg((ring), (reg), (v)); | |||
824 | r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT5000); | |||
825 | if (r) | |||
826 | goto failed_undo; | |||
827 | ||||
828 | amdgpu_ring_commit(ring); | |||
829 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
830 | ||||
831 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
832 | ||||
833 | /* don't wait anymore for gpu reset case because this way may | |||
834 | * block gpu_recover() routine forever, e.g. this virt_kiq_rreg | |||
835 | * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will | |||
836 | * never return if we keep waiting in virt_kiq_rreg, which cause | |||
837 | * gpu_recover() hang there. | |||
838 | * | |||
839 | * also don't wait anymore for IRQ context | |||
840 | * */ | |||
841 | if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) | |||
842 | goto failed_kiq_write; | |||
843 | ||||
844 | might_sleep()assertwaitok(); | |||
845 | while (r < 1 && cnt++ < MAX_KIQ_REG_TRY1000) { | |||
846 | ||||
847 | drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL)mdelay(5); | |||
848 | r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT5000); | |||
849 | } | |||
850 | ||||
851 | if (cnt > MAX_KIQ_REG_TRY1000) | |||
852 | goto failed_kiq_write; | |||
853 | ||||
854 | return; | |||
855 | ||||
856 | failed_undo: | |||
857 | amdgpu_ring_undo(ring); | |||
858 | spin_unlock_irqrestore(&kiq->ring_lock, flags)do { (void)(flags); mtx_leave(&kiq->ring_lock); } while (0); | |||
859 | failed_kiq_write: | |||
860 | dev_err(adev->dev, "failed to write reg:%x\n", reg)printf("drm:pid%d:%s *ERROR* " "failed to write reg:%x\n", ({ struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , reg); | |||
861 | } | |||
862 | ||||
863 | int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) | |||
864 | { | |||
865 | if (amdgpu_num_kcq == -1) { | |||
866 | return 8; | |||
867 | } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { | |||
868 | dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n")printf("drm:pid%d:%s *WARNING* " "set kernel compute queue number to 8 due to invalid parameter provided by user\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); | |||
869 | return 8; | |||
870 | } | |||
871 | return amdgpu_num_kcq; | |||
872 | } | |||
873 | ||||
874 | void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, | |||
875 | uint32_t ucode_id) | |||
876 | { | |||
877 | const struct gfx_firmware_header_v1_0 *cp_hdr; | |||
878 | const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0; | |||
879 | struct amdgpu_firmware_info *info = NULL((void *)0); | |||
880 | const struct firmware *ucode_fw; | |||
| ||||
881 | unsigned int fw_size; | |||
882 | ||||
883 | switch (ucode_id) { | |||
884 | case AMDGPU_UCODE_ID_CP_PFP: | |||
885 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
886 | adev->gfx.pfp_fw->data; | |||
887 | adev->gfx.pfp_fw_version = | |||
888 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
889 | adev->gfx.pfp_feature_version = | |||
890 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
891 | ucode_fw = adev->gfx.pfp_fw; | |||
892 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)); | |||
893 | break; | |||
894 | case AMDGPU_UCODE_ID_CP_RS64_PFP: | |||
895 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
896 | adev->gfx.pfp_fw->data; | |||
897 | adev->gfx.pfp_fw_version = | |||
898 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version)((__uint32_t)(cp_hdr_v2_0->header.ucode_version)); | |||
899 | adev->gfx.pfp_feature_version = | |||
900 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version)((__uint32_t)(cp_hdr_v2_0->ucode_feature_version)); | |||
901 | ucode_fw = adev->gfx.pfp_fw; | |||
902 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes)((__uint32_t)(cp_hdr_v2_0->ucode_size_bytes)); | |||
903 | break; | |||
904 | case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK: | |||
905 | case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK: | |||
906 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
907 | adev->gfx.pfp_fw->data; | |||
908 | ucode_fw = adev->gfx.pfp_fw; | |||
909 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes)((__uint32_t)(cp_hdr_v2_0->data_size_bytes)); | |||
910 | break; | |||
911 | case AMDGPU_UCODE_ID_CP_ME: | |||
912 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
913 | adev->gfx.me_fw->data; | |||
914 | adev->gfx.me_fw_version = | |||
915 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
916 | adev->gfx.me_feature_version = | |||
917 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
918 | ucode_fw = adev->gfx.me_fw; | |||
919 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)); | |||
920 | break; | |||
921 | case AMDGPU_UCODE_ID_CP_RS64_ME: | |||
922 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
923 | adev->gfx.me_fw->data; | |||
924 | adev->gfx.me_fw_version = | |||
925 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version)((__uint32_t)(cp_hdr_v2_0->header.ucode_version)); | |||
926 | adev->gfx.me_feature_version = | |||
927 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version)((__uint32_t)(cp_hdr_v2_0->ucode_feature_version)); | |||
928 | ucode_fw = adev->gfx.me_fw; | |||
929 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes)((__uint32_t)(cp_hdr_v2_0->ucode_size_bytes)); | |||
930 | break; | |||
931 | case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK: | |||
932 | case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK: | |||
933 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
934 | adev->gfx.me_fw->data; | |||
935 | ucode_fw = adev->gfx.me_fw; | |||
936 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes)((__uint32_t)(cp_hdr_v2_0->data_size_bytes)); | |||
937 | break; | |||
938 | case AMDGPU_UCODE_ID_CP_CE: | |||
939 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
940 | adev->gfx.ce_fw->data; | |||
941 | adev->gfx.ce_fw_version = | |||
942 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
943 | adev->gfx.ce_feature_version = | |||
944 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
945 | ucode_fw = adev->gfx.ce_fw; | |||
946 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)); | |||
947 | break; | |||
948 | case AMDGPU_UCODE_ID_CP_MEC1: | |||
949 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
950 | adev->gfx.mec_fw->data; | |||
951 | adev->gfx.mec_fw_version = | |||
952 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
953 | adev->gfx.mec_feature_version = | |||
954 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
955 | ucode_fw = adev->gfx.mec_fw; | |||
956 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)) - | |||
957 | le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
958 | break; | |||
959 | case AMDGPU_UCODE_ID_CP_MEC1_JT: | |||
960 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
961 | adev->gfx.mec_fw->data; | |||
962 | ucode_fw = adev->gfx.mec_fw; | |||
963 | fw_size = le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
964 | break; | |||
965 | case AMDGPU_UCODE_ID_CP_MEC2: | |||
966 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
967 | adev->gfx.mec2_fw->data; | |||
968 | adev->gfx.mec2_fw_version = | |||
969 | le32_to_cpu(cp_hdr->header.ucode_version)((__uint32_t)(cp_hdr->header.ucode_version)); | |||
970 | adev->gfx.mec2_feature_version = | |||
971 | le32_to_cpu(cp_hdr->ucode_feature_version)((__uint32_t)(cp_hdr->ucode_feature_version)); | |||
972 | ucode_fw = adev->gfx.mec2_fw; | |||
973 | fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes)((__uint32_t)(cp_hdr->header.ucode_size_bytes)) - | |||
974 | le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
975 | break; | |||
976 | case AMDGPU_UCODE_ID_CP_MEC2_JT: | |||
977 | cp_hdr = (const struct gfx_firmware_header_v1_0 *) | |||
978 | adev->gfx.mec2_fw->data; | |||
979 | ucode_fw = adev->gfx.mec2_fw; | |||
980 | fw_size = le32_to_cpu(cp_hdr->jt_size)((__uint32_t)(cp_hdr->jt_size)) * 4; | |||
981 | break; | |||
982 | case AMDGPU_UCODE_ID_CP_RS64_MEC: | |||
983 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
984 | adev->gfx.mec_fw->data; | |||
985 | adev->gfx.mec_fw_version = | |||
986 | le32_to_cpu(cp_hdr_v2_0->header.ucode_version)((__uint32_t)(cp_hdr_v2_0->header.ucode_version)); | |||
987 | adev->gfx.mec_feature_version = | |||
988 | le32_to_cpu(cp_hdr_v2_0->ucode_feature_version)((__uint32_t)(cp_hdr_v2_0->ucode_feature_version)); | |||
989 | ucode_fw = adev->gfx.mec_fw; | |||
990 | fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes)((__uint32_t)(cp_hdr_v2_0->ucode_size_bytes)); | |||
991 | break; | |||
992 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK: | |||
993 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK: | |||
994 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK: | |||
995 | case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: | |||
996 | cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *) | |||
997 | adev->gfx.mec_fw->data; | |||
998 | ucode_fw = adev->gfx.mec_fw; | |||
999 | fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes)((__uint32_t)(cp_hdr_v2_0->data_size_bytes)); | |||
1000 | break; | |||
1001 | default: | |||
1002 | break; | |||
1003 | } | |||
1004 | ||||
1005 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { | |||
1006 | info = &adev->firmware.ucode[ucode_id]; | |||
1007 | info->ucode_id = ucode_id; | |||
1008 | info->fw = ucode_fw; | |||
| ||||
1009 | adev->firmware.fw_size += roundup2(fw_size, PAGE_SIZE)(((fw_size) + (((1 << 12)) - 1)) & (~((__typeof(fw_size ))((1 << 12)) - 1))); | |||
1010 | } | |||
1011 | } |