File: | dev/pci/drm/amd/amdgpu/amdgpu_mes.c |
Warning: | line 1101, column 2 Value stored to 'r' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | */ |
23 | |
24 | #include <linux/firmware.h> |
25 | |
26 | #include "amdgpu_mes.h" |
27 | #include "amdgpu.h" |
28 | #include "soc15_common.h" |
29 | #include "amdgpu_mes_ctx.h" |
30 | |
31 | #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS1024 1024 |
32 | #define AMDGPU_ONE_DOORBELL_SIZE8 8 |
33 | |
34 | int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) |
35 | { |
36 | return roundup(AMDGPU_ONE_DOORBELL_SIZE *((((8 * 1024)+(((1 << 12))-1))/((1 << 12)))*((1 << 12))) |
37 | AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,((((8 * 1024)+(((1 << 12))-1))/((1 << 12)))*((1 << 12))) |
38 | PAGE_SIZE)((((8 * 1024)+(((1 << 12))-1))/((1 << 12)))*((1 << 12))); |
39 | } |
40 | |
41 | int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev, |
42 | unsigned int *doorbell_index) |
43 | { |
44 | int r = ida_simple_get(&adev->mes.doorbell_ida, 2, |
45 | adev->mes.max_doorbell_slices, |
46 | GFP_KERNEL(0x0001 | 0x0004)); |
47 | if (r > 0) |
48 | *doorbell_index = r; |
49 | |
50 | return r; |
51 | } |
52 | |
53 | void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev, |
54 | unsigned int doorbell_index) |
55 | { |
56 | if (doorbell_index) |
57 | ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index); |
58 | } |
59 | |
60 | unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( |
61 | struct amdgpu_device *adev, |
62 | uint32_t doorbell_index, |
63 | unsigned int doorbell_id) |
64 | { |
65 | return ((doorbell_index * |
66 | amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) + |
67 | doorbell_id * 2); |
68 | } |
69 | |
70 | static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev, |
71 | struct amdgpu_mes_process *process, |
72 | int ip_type, uint64_t *doorbell_index) |
73 | { |
74 | unsigned int offset, found; |
75 | |
76 | if (ip_type == AMDGPU_RING_TYPE_SDMA) { |
77 | offset = adev->doorbell_index.sdma_engine[0]; |
78 | found = find_next_zero_bit(process->doorbell_bitmap, |
79 | AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS1024, |
80 | offset); |
81 | } else { |
82 | found = find_first_zero_bit(process->doorbell_bitmap, |
83 | AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS1024); |
84 | } |
85 | |
86 | if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS1024) { |
87 | DRM_WARN("No doorbell available\n")printk("\0014" "[" "drm" "] " "No doorbell available\n"); |
88 | return -ENOSPC28; |
89 | } |
90 | |
91 | set_bit(found, process->doorbell_bitmap); |
92 | |
93 | *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev, |
94 | process->doorbell_index, found); |
95 | |
96 | return 0; |
97 | } |
98 | |
99 | static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev, |
100 | struct amdgpu_mes_process *process, |
101 | uint32_t doorbell_index) |
102 | { |
103 | unsigned int old, doorbell_id; |
104 | |
105 | doorbell_id = doorbell_index - |
106 | (process->doorbell_index * |
107 | amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32); |
108 | doorbell_id /= 2; |
109 | |
110 | old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap); |
111 | WARN_ON(!old)({ int __ret = !!(!old); if (__ret) printf("WARNING %s failed at %s:%d\n" , "!old", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_mes.c", 111); __builtin_expect(!!(__ret), 0); }); |
112 | } |
113 | |
114 | static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) |
115 | { |
116 | size_t doorbell_start_offset; |
117 | size_t doorbell_aperture_size; |
118 | size_t doorbell_process_limit; |
119 | size_t aggregated_doorbell_start; |
120 | int i; |
121 | |
122 | aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); |
123 | aggregated_doorbell_start = |
124 | roundup(aggregated_doorbell_start, PAGE_SIZE)((((aggregated_doorbell_start)+(((1 << 12))-1))/((1 << 12)))*((1 << 12))); |
125 | |
126 | doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE(1 << 12); |
127 | doorbell_start_offset = |
128 | roundup(doorbell_start_offset,((((doorbell_start_offset)+((amdgpu_mes_doorbell_process_slice (adev))-1))/(amdgpu_mes_doorbell_process_slice(adev)))*(amdgpu_mes_doorbell_process_slice (adev))) |
129 | amdgpu_mes_doorbell_process_slice(adev))((((doorbell_start_offset)+((amdgpu_mes_doorbell_process_slice (adev))-1))/(amdgpu_mes_doorbell_process_slice(adev)))*(amdgpu_mes_doorbell_process_slice (adev))); |
130 | |
131 | doorbell_aperture_size = adev->doorbell.size; |
132 | doorbell_aperture_size = |
133 | rounddown(doorbell_aperture_size,(((doorbell_aperture_size) / (amdgpu_mes_doorbell_process_slice (adev))) * (amdgpu_mes_doorbell_process_slice(adev))) |
134 | amdgpu_mes_doorbell_process_slice(adev))(((doorbell_aperture_size) / (amdgpu_mes_doorbell_process_slice (adev))) * (amdgpu_mes_doorbell_process_slice(adev))); |
135 | |
136 | if (doorbell_aperture_size > doorbell_start_offset) |
137 | doorbell_process_limit = |
138 | (doorbell_aperture_size - doorbell_start_offset) / |
139 | amdgpu_mes_doorbell_process_slice(adev); |
140 | else |
141 | return -ENOSPC28; |
142 | |
143 | adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); |
144 | adev->mes.max_doorbell_slices = doorbell_process_limit; |
145 | |
146 | /* allocate Qword range for aggregated doorbell */ |
147 | for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) |
148 | adev->mes.aggregated_doorbells[i] = |
149 | aggregated_doorbell_start / sizeof(u32) + i * 2; |
150 | |
151 | DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit)printk("\0016" "[" "drm" "] " "max_doorbell_slices=%zu\n", doorbell_process_limit ); |
152 | return 0; |
153 | } |
154 | |
155 | int amdgpu_mes_init(struct amdgpu_device *adev) |
156 | { |
157 | int i, r; |
158 | |
159 | adev->mes.adev = adev; |
160 | |
161 | idr_init(&adev->mes.pasid_idr); |
162 | idr_init(&adev->mes.gang_id_idr); |
163 | idr_init(&adev->mes.queue_id_idr); |
164 | ida_init(&adev->mes.doorbell_ida); |
165 | mtx_init(&adev->mes.queue_id_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev-> mes.queue_id_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); |
166 | mtx_init(&adev->mes.ring_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev-> mes.ring_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9 ) ? 0x9 : ((0x9)))); } while (0); |
167 | rw_init(&adev->mes.mutex_hidden, "agmes")_rw_init_flags(&adev->mes.mutex_hidden, "agmes", 0, (( void *)0)); |
168 | |
169 | adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK(0x1000000u - 1); |
170 | adev->mes.vmid_mask_mmhub = 0xffffff00; |
171 | adev->mes.vmid_mask_gfxhub = 0xffffff00; |
172 | |
173 | for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES8; i++) { |
174 | /* use only 1st MEC pipes */ |
175 | if (i >= 4) |
176 | continue; |
177 | adev->mes.compute_hqd_mask[i] = 0xc; |
178 | } |
179 | |
180 | for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES2; i++) |
181 | adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; |
182 | |
183 | for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES2; i++) { |
184 | if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)(((6) << 16) | ((0) << 8) | (0))) |
185 | adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; |
186 | /* zero sdma_hqd_mask for non-existent engine */ |
187 | else if (adev->sdma.num_instances == 1) |
188 | adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; |
189 | else |
190 | adev->mes.sdma_hqd_mask[i] = 0xfc; |
191 | } |
192 | |
193 | r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); |
194 | if (r) { |
195 | dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "(%d) ring trail_fence_offs wb alloc failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) |
196 | "(%d) ring trail_fence_offs wb alloc failed\n", r)printf("drm:pid%d:%s *ERROR* " "(%d) ring trail_fence_offs wb alloc failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); |
197 | goto error_ids; |
198 | } |
199 | adev->mes.sch_ctx_gpu_addr = |
200 | adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); |
201 | adev->mes.sch_ctx_ptr = |
202 | (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; |
203 | |
204 | r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); |
205 | if (r) { |
206 | amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); |
207 | dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "(%d) query_status_fence_offs wb alloc failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) |
208 | "(%d) query_status_fence_offs wb alloc failed\n", r)printf("drm:pid%d:%s *ERROR* " "(%d) query_status_fence_offs wb alloc failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); |
209 | goto error_ids; |
210 | } |
211 | adev->mes.query_status_fence_gpu_addr = |
212 | adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); |
213 | adev->mes.query_status_fence_ptr = |
214 | (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; |
215 | |
216 | r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); |
217 | if (r) { |
218 | amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); |
219 | amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); |
220 | dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "(%d) read_val_offs alloc failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) |
221 | "(%d) read_val_offs alloc failed\n", r)printf("drm:pid%d:%s *ERROR* " "(%d) read_val_offs alloc failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); |
222 | goto error_ids; |
223 | } |
224 | adev->mes.read_val_gpu_addr = |
225 | adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); |
226 | adev->mes.read_val_ptr = |
227 | (uint32_t *)&adev->wb.wb[adev->mes.read_val_offs]; |
228 | |
229 | r = amdgpu_mes_doorbell_init(adev); |
230 | if (r) |
231 | goto error; |
232 | |
233 | return 0; |
234 | |
235 | error: |
236 | amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); |
237 | amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); |
238 | amdgpu_device_wb_free(adev, adev->mes.read_val_offs); |
239 | error_ids: |
240 | idr_destroy(&adev->mes.pasid_idr); |
241 | idr_destroy(&adev->mes.gang_id_idr); |
242 | idr_destroy(&adev->mes.queue_id_idr); |
243 | ida_destroy(&adev->mes.doorbell_ida); |
244 | mutex_destroy(&adev->mes.mutex_hidden); |
245 | return r; |
246 | } |
247 | |
248 | void amdgpu_mes_fini(struct amdgpu_device *adev) |
249 | { |
250 | amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); |
251 | amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); |
252 | amdgpu_device_wb_free(adev, adev->mes.read_val_offs); |
253 | |
254 | idr_destroy(&adev->mes.pasid_idr); |
255 | idr_destroy(&adev->mes.gang_id_idr); |
256 | idr_destroy(&adev->mes.queue_id_idr); |
257 | ida_destroy(&adev->mes.doorbell_ida); |
258 | mutex_destroy(&adev->mes.mutex_hidden); |
259 | } |
260 | |
261 | static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q) |
262 | { |
263 | amdgpu_bo_free_kernel(&q->mqd_obj, |
264 | &q->mqd_gpu_addr, |
265 | &q->mqd_cpu_ptr); |
266 | } |
267 | |
268 | int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, |
269 | struct amdgpu_vm *vm) |
270 | { |
271 | struct amdgpu_mes_process *process; |
272 | int r; |
273 | |
274 | /* allocate the mes process buffer */ |
275 | process = kzalloc(sizeof(struct amdgpu_mes_process), GFP_KERNEL(0x0001 | 0x0004)); |
276 | if (!process) { |
277 | DRM_ERROR("no more memory to create mes process\n")__drm_err("no more memory to create mes process\n"); |
278 | return -ENOMEM12; |
279 | } |
280 | |
281 | process->doorbell_bitmap = |
282 | kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS,(((1024) + ((8) - 1)) / (8)) |
283 | BITS_PER_BYTE)(((1024) + ((8) - 1)) / (8)), GFP_KERNEL(0x0001 | 0x0004)); |
284 | if (!process->doorbell_bitmap) { |
285 | DRM_ERROR("failed to allocate doorbell bitmap\n")__drm_err("failed to allocate doorbell bitmap\n"); |
286 | kfree(process); |
287 | return -ENOMEM12; |
288 | } |
289 | |
290 | /* allocate the process context bo and map it */ |
291 | r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE0x1000, PAGE_SIZE(1 << 12), |
292 | AMDGPU_GEM_DOMAIN_GTT0x2, |
293 | &process->proc_ctx_bo, |
294 | &process->proc_ctx_gpu_addr, |
295 | &process->proc_ctx_cpu_ptr); |
296 | if (r) { |
297 | DRM_ERROR("failed to allocate process context bo\n")__drm_err("failed to allocate process context bo\n"); |
298 | goto clean_up_memory; |
299 | } |
300 | memset(process->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE)__builtin_memset((process->proc_ctx_cpu_ptr), (0), (0x1000 )); |
301 | |
302 | /* |
303 | * Avoid taking any other locks under MES lock to avoid circular |
304 | * lock dependencies. |
305 | */ |
306 | amdgpu_mes_lock(&adev->mes); |
307 | |
308 | /* add the mes process to idr list */ |
309 | r = idr_alloc(&adev->mes.pasid_idr, process, pasid, pasid + 1, |
310 | GFP_KERNEL(0x0001 | 0x0004)); |
311 | if (r < 0) { |
312 | DRM_ERROR("failed to lock pasid=%d\n", pasid)__drm_err("failed to lock pasid=%d\n", pasid); |
313 | goto clean_up_ctx; |
314 | } |
315 | |
316 | /* allocate the starting doorbell index of the process */ |
317 | r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); |
318 | if (r < 0) { |
319 | DRM_ERROR("failed to allocate doorbell for process\n")__drm_err("failed to allocate doorbell for process\n"); |
320 | goto clean_up_pasid; |
321 | } |
322 | |
323 | DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index)___drm_dbg(((void *)0), DRM_UT_CORE, "process doorbell index = %d\n" , process->doorbell_index); |
324 | |
325 | INIT_LIST_HEAD(&process->gang_list); |
326 | process->vm = vm; |
327 | process->pasid = pasid; |
328 | process->process_quantum = adev->mes.default_process_quantum; |
329 | process->pd_gpu_addr = amdgpu_bo_gpu_offset(vm->root.bo); |
330 | |
331 | amdgpu_mes_unlock(&adev->mes); |
332 | return 0; |
333 | |
334 | clean_up_pasid: |
335 | idr_remove(&adev->mes.pasid_idr, pasid); |
336 | amdgpu_mes_unlock(&adev->mes); |
337 | clean_up_ctx: |
338 | amdgpu_bo_free_kernel(&process->proc_ctx_bo, |
339 | &process->proc_ctx_gpu_addr, |
340 | &process->proc_ctx_cpu_ptr); |
341 | clean_up_memory: |
342 | kfree(process->doorbell_bitmap); |
343 | kfree(process); |
344 | return r; |
345 | } |
346 | |
347 | void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) |
348 | { |
349 | struct amdgpu_mes_process *process; |
350 | struct amdgpu_mes_gang *gang, *tmp1; |
351 | struct amdgpu_mes_queue *queue, *tmp2; |
352 | struct mes_remove_queue_input queue_input; |
353 | unsigned long flags; |
354 | int r; |
355 | |
356 | /* |
357 | * Avoid taking any other locks under MES lock to avoid circular |
358 | * lock dependencies. |
359 | */ |
360 | amdgpu_mes_lock(&adev->mes); |
361 | |
362 | process = idr_find(&adev->mes.pasid_idr, pasid); |
363 | if (!process) { |
364 | DRM_WARN("pasid %d doesn't exist\n", pasid)printk("\0014" "[" "drm" "] " "pasid %d doesn't exist\n", pasid ); |
365 | amdgpu_mes_unlock(&adev->mes); |
366 | return; |
367 | } |
368 | |
369 | /* Remove all queues from hardware */ |
370 | list_for_each_entry_safe(gang, tmp1, &process->gang_list, list)for (gang = ({ const __typeof( ((__typeof(*gang) *)0)->list ) *__mptr = ((&process->gang_list)->next); (__typeof (*gang) *)( (char *)__mptr - __builtin_offsetof(__typeof(*gang ), list) );}), tmp1 = ({ const __typeof( ((__typeof(*gang) *) 0)->list ) *__mptr = (gang->list.next); (__typeof(*gang ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*gang), list ) );}); &gang->list != (&process->gang_list); gang = tmp1, tmp1 = ({ const __typeof( ((__typeof(*tmp1) *)0)-> list ) *__mptr = (tmp1->list.next); (__typeof(*tmp1) *)( ( char *)__mptr - __builtin_offsetof(__typeof(*tmp1), list) );} )) { |
371 | list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list)for (queue = ({ const __typeof( ((__typeof(*queue) *)0)->list ) *__mptr = ((&gang->queue_list)->next); (__typeof (*queue) *)( (char *)__mptr - __builtin_offsetof(__typeof(*queue ), list) );}), tmp2 = ({ const __typeof( ((__typeof(*queue) * )0)->list ) *__mptr = (queue->list.next); (__typeof(*queue ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*queue), list ) );}); &queue->list != (&gang->queue_list); queue = tmp2, tmp2 = ({ const __typeof( ((__typeof(*tmp2) *)0)-> list ) *__mptr = (tmp2->list.next); (__typeof(*tmp2) *)( ( char *)__mptr - __builtin_offsetof(__typeof(*tmp2), list) );} )) { |
372 | spin_lock_irqsave(&adev->mes.queue_id_lock, flags)do { flags = 0; mtx_enter(&adev->mes.queue_id_lock); } while (0); |
373 | idr_remove(&adev->mes.queue_id_idr, queue->queue_id); |
374 | spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags)do { (void)(flags); mtx_leave(&adev->mes.queue_id_lock ); } while (0); |
375 | |
376 | queue_input.doorbell_offset = queue->doorbell_off; |
377 | queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; |
378 | |
379 | r = adev->mes.funcs->remove_hw_queue(&adev->mes, |
380 | &queue_input); |
381 | if (r) |
382 | DRM_WARN("failed to remove hardware queue\n")printk("\0014" "[" "drm" "] " "failed to remove hardware queue\n" ); |
383 | } |
384 | |
385 | idr_remove(&adev->mes.gang_id_idr, gang->gang_id); |
386 | } |
387 | |
388 | amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); |
389 | idr_remove(&adev->mes.pasid_idr, pasid); |
390 | amdgpu_mes_unlock(&adev->mes); |
391 | |
392 | /* free all memory allocated by the process */ |
393 | list_for_each_entry_safe(gang, tmp1, &process->gang_list, list)for (gang = ({ const __typeof( ((__typeof(*gang) *)0)->list ) *__mptr = ((&process->gang_list)->next); (__typeof (*gang) *)( (char *)__mptr - __builtin_offsetof(__typeof(*gang ), list) );}), tmp1 = ({ const __typeof( ((__typeof(*gang) *) 0)->list ) *__mptr = (gang->list.next); (__typeof(*gang ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*gang), list ) );}); &gang->list != (&process->gang_list); gang = tmp1, tmp1 = ({ const __typeof( ((__typeof(*tmp1) *)0)-> list ) *__mptr = (tmp1->list.next); (__typeof(*tmp1) *)( ( char *)__mptr - __builtin_offsetof(__typeof(*tmp1), list) );} )) { |
394 | /* free all queues in the gang */ |
395 | list_for_each_entry_safe(queue, tmp2, &gang->queue_list, list)for (queue = ({ const __typeof( ((__typeof(*queue) *)0)->list ) *__mptr = ((&gang->queue_list)->next); (__typeof (*queue) *)( (char *)__mptr - __builtin_offsetof(__typeof(*queue ), list) );}), tmp2 = ({ const __typeof( ((__typeof(*queue) * )0)->list ) *__mptr = (queue->list.next); (__typeof(*queue ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*queue), list ) );}); &queue->list != (&gang->queue_list); queue = tmp2, tmp2 = ({ const __typeof( ((__typeof(*tmp2) *)0)-> list ) *__mptr = (tmp2->list.next); (__typeof(*tmp2) *)( ( char *)__mptr - __builtin_offsetof(__typeof(*tmp2), list) );} )) { |
396 | amdgpu_mes_queue_free_mqd(queue); |
397 | list_del(&queue->list); |
398 | kfree(queue); |
399 | } |
400 | amdgpu_bo_free_kernel(&gang->gang_ctx_bo, |
401 | &gang->gang_ctx_gpu_addr, |
402 | &gang->gang_ctx_cpu_ptr); |
403 | list_del(&gang->list); |
404 | kfree(gang); |
405 | |
406 | } |
407 | amdgpu_bo_free_kernel(&process->proc_ctx_bo, |
408 | &process->proc_ctx_gpu_addr, |
409 | &process->proc_ctx_cpu_ptr); |
410 | kfree(process->doorbell_bitmap); |
411 | kfree(process); |
412 | } |
413 | |
414 | int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid, |
415 | struct amdgpu_mes_gang_properties *gprops, |
416 | int *gang_id) |
417 | { |
418 | struct amdgpu_mes_process *process; |
419 | struct amdgpu_mes_gang *gang; |
420 | int r; |
421 | |
422 | /* allocate the mes gang buffer */ |
423 | gang = kzalloc(sizeof(struct amdgpu_mes_gang), GFP_KERNEL(0x0001 | 0x0004)); |
424 | if (!gang) { |
425 | return -ENOMEM12; |
426 | } |
427 | |
428 | /* allocate the gang context bo and map it to cpu space */ |
429 | r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_GANG_CTX_SIZE0x1000, PAGE_SIZE(1 << 12), |
430 | AMDGPU_GEM_DOMAIN_GTT0x2, |
431 | &gang->gang_ctx_bo, |
432 | &gang->gang_ctx_gpu_addr, |
433 | &gang->gang_ctx_cpu_ptr); |
434 | if (r) { |
435 | DRM_ERROR("failed to allocate process context bo\n")__drm_err("failed to allocate process context bo\n"); |
436 | goto clean_up_mem; |
437 | } |
438 | memset(gang->gang_ctx_cpu_ptr, 0, AMDGPU_MES_GANG_CTX_SIZE)__builtin_memset((gang->gang_ctx_cpu_ptr), (0), (0x1000)); |
439 | |
440 | /* |
441 | * Avoid taking any other locks under MES lock to avoid circular |
442 | * lock dependencies. |
443 | */ |
444 | amdgpu_mes_lock(&adev->mes); |
445 | |
446 | process = idr_find(&adev->mes.pasid_idr, pasid); |
447 | if (!process) { |
448 | DRM_ERROR("pasid %d doesn't exist\n", pasid)__drm_err("pasid %d doesn't exist\n", pasid); |
449 | r = -EINVAL22; |
450 | goto clean_up_ctx; |
451 | } |
452 | |
453 | /* add the mes gang to idr list */ |
454 | r = idr_alloc(&adev->mes.gang_id_idr, gang, 1, 0, |
455 | GFP_KERNEL(0x0001 | 0x0004)); |
456 | if (r < 0) { |
457 | DRM_ERROR("failed to allocate idr for gang\n")__drm_err("failed to allocate idr for gang\n"); |
458 | goto clean_up_ctx; |
459 | } |
460 | |
461 | gang->gang_id = r; |
462 | *gang_id = r; |
463 | |
464 | INIT_LIST_HEAD(&gang->queue_list); |
465 | gang->process = process; |
466 | gang->priority = gprops->priority; |
467 | gang->gang_quantum = gprops->gang_quantum ? |
468 | gprops->gang_quantum : adev->mes.default_gang_quantum; |
469 | gang->global_priority_level = gprops->global_priority_level; |
470 | gang->inprocess_gang_priority = gprops->inprocess_gang_priority; |
471 | list_add_tail(&gang->list, &process->gang_list); |
472 | |
473 | amdgpu_mes_unlock(&adev->mes); |
474 | return 0; |
475 | |
476 | clean_up_ctx: |
477 | amdgpu_mes_unlock(&adev->mes); |
478 | amdgpu_bo_free_kernel(&gang->gang_ctx_bo, |
479 | &gang->gang_ctx_gpu_addr, |
480 | &gang->gang_ctx_cpu_ptr); |
481 | clean_up_mem: |
482 | kfree(gang); |
483 | return r; |
484 | } |
485 | |
486 | int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id) |
487 | { |
488 | struct amdgpu_mes_gang *gang; |
489 | |
490 | /* |
491 | * Avoid taking any other locks under MES lock to avoid circular |
492 | * lock dependencies. |
493 | */ |
494 | amdgpu_mes_lock(&adev->mes); |
495 | |
496 | gang = idr_find(&adev->mes.gang_id_idr, gang_id); |
497 | if (!gang) { |
498 | DRM_ERROR("gang id %d doesn't exist\n", gang_id)__drm_err("gang id %d doesn't exist\n", gang_id); |
499 | amdgpu_mes_unlock(&adev->mes); |
500 | return -EINVAL22; |
501 | } |
502 | |
503 | if (!list_empty(&gang->queue_list)) { |
504 | DRM_ERROR("queue list is not empty\n")__drm_err("queue list is not empty\n"); |
505 | amdgpu_mes_unlock(&adev->mes); |
506 | return -EBUSY16; |
507 | } |
508 | |
509 | idr_remove(&adev->mes.gang_id_idr, gang->gang_id); |
510 | list_del(&gang->list); |
511 | amdgpu_mes_unlock(&adev->mes); |
512 | |
513 | amdgpu_bo_free_kernel(&gang->gang_ctx_bo, |
514 | &gang->gang_ctx_gpu_addr, |
515 | &gang->gang_ctx_cpu_ptr); |
516 | |
517 | kfree(gang); |
518 | |
519 | return 0; |
520 | } |
521 | |
522 | int amdgpu_mes_suspend(struct amdgpu_device *adev) |
523 | { |
524 | struct idr *idp; |
525 | struct amdgpu_mes_process *process; |
526 | struct amdgpu_mes_gang *gang; |
527 | struct mes_suspend_gang_input input; |
528 | int r, pasid; |
529 | |
530 | /* |
531 | * Avoid taking any other locks under MES lock to avoid circular |
532 | * lock dependencies. |
533 | */ |
534 | amdgpu_mes_lock(&adev->mes); |
535 | |
536 | idp = &adev->mes.pasid_idr; |
537 | |
538 | idr_for_each_entry(idp, process, pasid)for (pasid = 0; ((process) = idr_get_next(idp, &(pasid))) != ((void *)0); pasid++) { |
539 | list_for_each_entry(gang, &process->gang_list, list)for (gang = ({ const __typeof( ((__typeof(*gang) *)0)->list ) *__mptr = ((&process->gang_list)->next); (__typeof (*gang) *)( (char *)__mptr - __builtin_offsetof(__typeof(*gang ), list) );}); &gang->list != (&process->gang_list ); gang = ({ const __typeof( ((__typeof(*gang) *)0)->list ) *__mptr = (gang->list.next); (__typeof(*gang) *)( (char * )__mptr - __builtin_offsetof(__typeof(*gang), list) );})) { |
540 | r = adev->mes.funcs->suspend_gang(&adev->mes, &input); |
541 | if (r) |
542 | DRM_ERROR("failed to suspend pasid %d gangid %d",__drm_err("failed to suspend pasid %d gangid %d", pasid, gang ->gang_id) |
543 | pasid, gang->gang_id)__drm_err("failed to suspend pasid %d gangid %d", pasid, gang ->gang_id); |
544 | } |
545 | } |
546 | |
547 | amdgpu_mes_unlock(&adev->mes); |
548 | return 0; |
549 | } |
550 | |
551 | int amdgpu_mes_resume(struct amdgpu_device *adev) |
552 | { |
553 | struct idr *idp; |
554 | struct amdgpu_mes_process *process; |
555 | struct amdgpu_mes_gang *gang; |
556 | struct mes_resume_gang_input input; |
557 | int r, pasid; |
558 | |
559 | /* |
560 | * Avoid taking any other locks under MES lock to avoid circular |
561 | * lock dependencies. |
562 | */ |
563 | amdgpu_mes_lock(&adev->mes); |
564 | |
565 | idp = &adev->mes.pasid_idr; |
566 | |
567 | idr_for_each_entry(idp, process, pasid)for (pasid = 0; ((process) = idr_get_next(idp, &(pasid))) != ((void *)0); pasid++) { |
568 | list_for_each_entry(gang, &process->gang_list, list)for (gang = ({ const __typeof( ((__typeof(*gang) *)0)->list ) *__mptr = ((&process->gang_list)->next); (__typeof (*gang) *)( (char *)__mptr - __builtin_offsetof(__typeof(*gang ), list) );}); &gang->list != (&process->gang_list ); gang = ({ const __typeof( ((__typeof(*gang) *)0)->list ) *__mptr = (gang->list.next); (__typeof(*gang) *)( (char * )__mptr - __builtin_offsetof(__typeof(*gang), list) );})) { |
569 | r = adev->mes.funcs->resume_gang(&adev->mes, &input); |
570 | if (r) |
571 | DRM_ERROR("failed to resume pasid %d gangid %d",__drm_err("failed to resume pasid %d gangid %d", pasid, gang-> gang_id) |
572 | pasid, gang->gang_id)__drm_err("failed to resume pasid %d gangid %d", pasid, gang-> gang_id); |
573 | } |
574 | } |
575 | |
576 | amdgpu_mes_unlock(&adev->mes); |
577 | return 0; |
578 | } |
579 | |
580 | static int amdgpu_mes_queue_alloc_mqd(struct amdgpu_device *adev, |
581 | struct amdgpu_mes_queue *q, |
582 | struct amdgpu_mes_queue_properties *p) |
583 | { |
584 | struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; |
585 | u32 mqd_size = mqd_mgr->mqd_size; |
586 | int r; |
587 | |
588 | r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE(1 << 12), |
589 | AMDGPU_GEM_DOMAIN_GTT0x2, |
590 | &q->mqd_obj, |
591 | &q->mqd_gpu_addr, &q->mqd_cpu_ptr); |
592 | if (r) { |
593 | dev_warn(adev->dev, "failed to create queue mqd bo (%d)", r)printf("drm:pid%d:%s *WARNING* " "failed to create queue mqd bo (%d)" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); |
594 | return r; |
595 | } |
596 | memset(q->mqd_cpu_ptr, 0, mqd_size)__builtin_memset((q->mqd_cpu_ptr), (0), (mqd_size)); |
597 | |
598 | r = amdgpu_bo_reserve(q->mqd_obj, false0); |
599 | if (unlikely(r != 0)__builtin_expect(!!(r != 0), 0)) |
600 | goto clean_up; |
601 | |
602 | return 0; |
603 | |
604 | clean_up: |
605 | amdgpu_bo_free_kernel(&q->mqd_obj, |
606 | &q->mqd_gpu_addr, |
607 | &q->mqd_cpu_ptr); |
608 | return r; |
609 | } |
610 | |
611 | static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, |
612 | struct amdgpu_mes_queue *q, |
613 | struct amdgpu_mes_queue_properties *p) |
614 | { |
615 | struct amdgpu_mqd *mqd_mgr = &adev->mqds[p->queue_type]; |
616 | struct amdgpu_mqd_prop mqd_prop = {0}; |
617 | |
618 | mqd_prop.mqd_gpu_addr = q->mqd_gpu_addr; |
619 | mqd_prop.hqd_base_gpu_addr = p->hqd_base_gpu_addr; |
620 | mqd_prop.rptr_gpu_addr = p->rptr_gpu_addr; |
621 | mqd_prop.wptr_gpu_addr = p->wptr_gpu_addr; |
622 | mqd_prop.queue_size = p->queue_size; |
623 | mqd_prop.use_doorbell = true1; |
624 | mqd_prop.doorbell_index = p->doorbell_off; |
625 | mqd_prop.eop_gpu_addr = p->eop_gpu_addr; |
626 | mqd_prop.hqd_pipe_priority = p->hqd_pipe_priority; |
627 | mqd_prop.hqd_queue_priority = p->hqd_queue_priority; |
628 | mqd_prop.hqd_active = false0; |
629 | |
630 | mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); |
631 | |
632 | amdgpu_bo_unreserve(q->mqd_obj); |
633 | } |
634 | |
635 | int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, |
636 | struct amdgpu_mes_queue_properties *qprops, |
637 | int *queue_id) |
638 | { |
639 | struct amdgpu_mes_queue *queue; |
640 | struct amdgpu_mes_gang *gang; |
641 | struct mes_add_queue_input queue_input; |
642 | unsigned long flags; |
643 | int r; |
644 | |
645 | /* allocate the mes queue buffer */ |
646 | queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL(0x0001 | 0x0004)); |
647 | if (!queue) { |
648 | DRM_ERROR("Failed to allocate memory for queue\n")__drm_err("Failed to allocate memory for queue\n"); |
649 | return -ENOMEM12; |
650 | } |
651 | |
652 | /* Allocate the queue mqd */ |
653 | r = amdgpu_mes_queue_alloc_mqd(adev, queue, qprops); |
654 | if (r) |
655 | goto clean_up_memory; |
656 | |
657 | /* |
658 | * Avoid taking any other locks under MES lock to avoid circular |
659 | * lock dependencies. |
660 | */ |
661 | amdgpu_mes_lock(&adev->mes); |
662 | |
663 | gang = idr_find(&adev->mes.gang_id_idr, gang_id); |
664 | if (!gang) { |
665 | DRM_ERROR("gang id %d doesn't exist\n", gang_id)__drm_err("gang id %d doesn't exist\n", gang_id); |
666 | r = -EINVAL22; |
667 | goto clean_up_mqd; |
668 | } |
669 | |
670 | /* add the mes gang to idr list */ |
671 | spin_lock_irqsave(&adev->mes.queue_id_lock, flags)do { flags = 0; mtx_enter(&adev->mes.queue_id_lock); } while (0); |
672 | r = idr_alloc(&adev->mes.queue_id_idr, queue, 1, 0, |
673 | GFP_ATOMIC0x0002); |
674 | if (r < 0) { |
675 | spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags)do { (void)(flags); mtx_leave(&adev->mes.queue_id_lock ); } while (0); |
676 | goto clean_up_mqd; |
677 | } |
678 | spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags)do { (void)(flags); mtx_leave(&adev->mes.queue_id_lock ); } while (0); |
679 | *queue_id = queue->queue_id = r; |
680 | |
681 | /* allocate a doorbell index for the queue */ |
682 | r = amdgpu_mes_queue_doorbell_get(adev, gang->process, |
683 | qprops->queue_type, |
684 | &qprops->doorbell_off); |
685 | if (r) |
686 | goto clean_up_queue_id; |
687 | |
688 | /* initialize the queue mqd */ |
689 | amdgpu_mes_queue_init_mqd(adev, queue, qprops); |
690 | |
691 | /* add hw queue to mes */ |
692 | queue_input.process_id = gang->process->pasid; |
693 | |
694 | queue_input.page_table_base_addr = |
695 | adev->vm_manager.vram_base_offset + gang->process->pd_gpu_addr - |
696 | adev->gmc.vram_start; |
697 | |
698 | queue_input.process_va_start = 0; |
699 | queue_input.process_va_end = |
700 | (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT12; |
701 | queue_input.process_quantum = gang->process->process_quantum; |
702 | queue_input.process_context_addr = gang->process->proc_ctx_gpu_addr; |
703 | queue_input.gang_quantum = gang->gang_quantum; |
704 | queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; |
705 | queue_input.inprocess_gang_priority = gang->inprocess_gang_priority; |
706 | queue_input.gang_global_priority_level = gang->global_priority_level; |
707 | queue_input.doorbell_offset = qprops->doorbell_off; |
708 | queue_input.mqd_addr = queue->mqd_gpu_addr; |
709 | queue_input.wptr_addr = qprops->wptr_gpu_addr; |
710 | queue_input.wptr_mc_addr = qprops->wptr_mc_addr; |
711 | queue_input.queue_type = qprops->queue_type; |
712 | queue_input.paging = qprops->paging; |
713 | queue_input.is_kfd_process = 0; |
714 | |
715 | r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); |
716 | if (r) { |
717 | DRM_ERROR("failed to add hardware queue to MES, doorbell=0x%llx\n",__drm_err("failed to add hardware queue to MES, doorbell=0x%llx\n" , qprops->doorbell_off) |
718 | qprops->doorbell_off)__drm_err("failed to add hardware queue to MES, doorbell=0x%llx\n" , qprops->doorbell_off); |
719 | goto clean_up_doorbell; |
720 | } |
721 | |
722 | DRM_DEBUG("MES hw queue was added, pasid=%d, gang id=%d, "___drm_dbg(((void *)0), DRM_UT_CORE, "MES hw queue was added, pasid=%d, gang id=%d, " "queue type=%d, doorbell=0x%llx\n", gang->process->pasid , gang_id, qprops->queue_type, qprops->doorbell_off) |
723 | "queue type=%d, doorbell=0x%llx\n",___drm_dbg(((void *)0), DRM_UT_CORE, "MES hw queue was added, pasid=%d, gang id=%d, " "queue type=%d, doorbell=0x%llx\n", gang->process->pasid , gang_id, qprops->queue_type, qprops->doorbell_off) |
724 | gang->process->pasid, gang_id, qprops->queue_type,___drm_dbg(((void *)0), DRM_UT_CORE, "MES hw queue was added, pasid=%d, gang id=%d, " "queue type=%d, doorbell=0x%llx\n", gang->process->pasid , gang_id, qprops->queue_type, qprops->doorbell_off) |
725 | qprops->doorbell_off)___drm_dbg(((void *)0), DRM_UT_CORE, "MES hw queue was added, pasid=%d, gang id=%d, " "queue type=%d, doorbell=0x%llx\n", gang->process->pasid , gang_id, qprops->queue_type, qprops->doorbell_off); |
726 | |
727 | queue->ring = qprops->ring; |
728 | queue->doorbell_off = qprops->doorbell_off; |
729 | queue->wptr_gpu_addr = qprops->wptr_gpu_addr; |
730 | queue->queue_type = qprops->queue_type; |
731 | queue->paging = qprops->paging; |
732 | queue->gang = gang; |
733 | queue->ring->mqd_ptr = queue->mqd_cpu_ptr; |
734 | list_add_tail(&queue->list, &gang->queue_list); |
735 | |
736 | amdgpu_mes_unlock(&adev->mes); |
737 | return 0; |
738 | |
739 | clean_up_doorbell: |
740 | amdgpu_mes_queue_doorbell_free(adev, gang->process, |
741 | qprops->doorbell_off); |
742 | clean_up_queue_id: |
743 | spin_lock_irqsave(&adev->mes.queue_id_lock, flags)do { flags = 0; mtx_enter(&adev->mes.queue_id_lock); } while (0); |
744 | idr_remove(&adev->mes.queue_id_idr, queue->queue_id); |
745 | spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags)do { (void)(flags); mtx_leave(&adev->mes.queue_id_lock ); } while (0); |
746 | clean_up_mqd: |
747 | amdgpu_mes_unlock(&adev->mes); |
748 | amdgpu_mes_queue_free_mqd(queue); |
749 | clean_up_memory: |
750 | kfree(queue); |
751 | return r; |
752 | } |
753 | |
754 | int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) |
755 | { |
756 | unsigned long flags; |
757 | struct amdgpu_mes_queue *queue; |
758 | struct amdgpu_mes_gang *gang; |
759 | struct mes_remove_queue_input queue_input; |
760 | int r; |
761 | |
762 | /* |
763 | * Avoid taking any other locks under MES lock to avoid circular |
764 | * lock dependencies. |
765 | */ |
766 | amdgpu_mes_lock(&adev->mes); |
767 | |
768 | /* remove the mes gang from idr list */ |
769 | spin_lock_irqsave(&adev->mes.queue_id_lock, flags)do { flags = 0; mtx_enter(&adev->mes.queue_id_lock); } while (0); |
770 | |
771 | queue = idr_find(&adev->mes.queue_id_idr, queue_id); |
772 | if (!queue) { |
773 | spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags)do { (void)(flags); mtx_leave(&adev->mes.queue_id_lock ); } while (0); |
774 | amdgpu_mes_unlock(&adev->mes); |
775 | DRM_ERROR("queue id %d doesn't exist\n", queue_id)__drm_err("queue id %d doesn't exist\n", queue_id); |
776 | return -EINVAL22; |
777 | } |
778 | |
779 | idr_remove(&adev->mes.queue_id_idr, queue_id); |
780 | spin_unlock_irqrestore(&adev->mes.queue_id_lock, flags)do { (void)(flags); mtx_leave(&adev->mes.queue_id_lock ); } while (0); |
781 | |
782 | DRM_DEBUG("try to remove queue, doorbell off = 0x%llx\n",___drm_dbg(((void *)0), DRM_UT_CORE, "try to remove queue, doorbell off = 0x%llx\n" , queue->doorbell_off) |
783 | queue->doorbell_off)___drm_dbg(((void *)0), DRM_UT_CORE, "try to remove queue, doorbell off = 0x%llx\n" , queue->doorbell_off); |
784 | |
785 | gang = queue->gang; |
786 | queue_input.doorbell_offset = queue->doorbell_off; |
787 | queue_input.gang_context_addr = gang->gang_ctx_gpu_addr; |
788 | |
789 | r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); |
790 | if (r) |
791 | DRM_ERROR("failed to remove hardware queue, queue id = %d\n",__drm_err("failed to remove hardware queue, queue id = %d\n", queue_id) |
792 | queue_id)__drm_err("failed to remove hardware queue, queue id = %d\n", queue_id); |
793 | |
794 | list_del(&queue->list); |
795 | amdgpu_mes_queue_doorbell_free(adev, gang->process, |
796 | queue->doorbell_off); |
797 | amdgpu_mes_unlock(&adev->mes); |
798 | |
799 | amdgpu_mes_queue_free_mqd(queue); |
800 | kfree(queue); |
801 | return 0; |
802 | } |
803 | |
804 | int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, |
805 | struct amdgpu_ring *ring, |
806 | enum amdgpu_unmap_queues_action action, |
807 | u64 gpu_addr, u64 seq) |
808 | { |
809 | struct mes_unmap_legacy_queue_input queue_input; |
810 | int r; |
811 | |
812 | queue_input.action = action; |
813 | queue_input.queue_type = ring->funcs->type; |
814 | queue_input.doorbell_offset = ring->doorbell_index; |
815 | queue_input.pipe_id = ring->pipe; |
816 | queue_input.queue_id = ring->queue; |
817 | queue_input.trail_fence_addr = gpu_addr; |
818 | queue_input.trail_fence_data = seq; |
819 | |
820 | r = adev->mes.funcs->unmap_legacy_queue(&adev->mes, &queue_input); |
821 | if (r) |
822 | DRM_ERROR("failed to unmap legacy queue\n")__drm_err("failed to unmap legacy queue\n"); |
823 | |
824 | return r; |
825 | } |
826 | |
827 | uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) |
828 | { |
829 | struct mes_misc_op_input op_input; |
830 | int r, val = 0; |
831 | |
832 | op_input.op = MES_MISC_OP_READ_REG; |
833 | op_input.read_reg.reg_offset = reg; |
834 | op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr; |
835 | |
836 | if (!adev->mes.funcs->misc_op) { |
837 | DRM_ERROR("mes rreg is not supported!\n")__drm_err("mes rreg is not supported!\n"); |
838 | goto error; |
839 | } |
840 | |
841 | r = adev->mes.funcs->misc_op(&adev->mes, &op_input); |
842 | if (r) |
843 | DRM_ERROR("failed to read reg (0x%x)\n", reg)__drm_err("failed to read reg (0x%x)\n", reg); |
844 | else |
845 | val = *(adev->mes.read_val_ptr); |
846 | |
847 | error: |
848 | return val; |
849 | } |
850 | |
851 | int amdgpu_mes_wreg(struct amdgpu_device *adev, |
852 | uint32_t reg, uint32_t val) |
853 | { |
854 | struct mes_misc_op_input op_input; |
855 | int r; |
856 | |
857 | op_input.op = MES_MISC_OP_WRITE_REG; |
858 | op_input.write_reg.reg_offset = reg; |
859 | op_input.write_reg.reg_value = val; |
860 | |
861 | if (!adev->mes.funcs->misc_op) { |
862 | DRM_ERROR("mes wreg is not supported!\n")__drm_err("mes wreg is not supported!\n"); |
863 | r = -EINVAL22; |
864 | goto error; |
865 | } |
866 | |
867 | r = adev->mes.funcs->misc_op(&adev->mes, &op_input); |
868 | if (r) |
869 | DRM_ERROR("failed to write reg (0x%x)\n", reg)__drm_err("failed to write reg (0x%x)\n", reg); |
870 | |
871 | error: |
872 | return r; |
873 | } |
874 | |
875 | int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev, |
876 | uint32_t reg0, uint32_t reg1, |
877 | uint32_t ref, uint32_t mask) |
878 | { |
879 | struct mes_misc_op_input op_input; |
880 | int r; |
881 | |
882 | op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT; |
883 | op_input.wrm_reg.reg0 = reg0; |
884 | op_input.wrm_reg.reg1 = reg1; |
885 | op_input.wrm_reg.ref = ref; |
886 | op_input.wrm_reg.mask = mask; |
887 | |
888 | if (!adev->mes.funcs->misc_op) { |
889 | DRM_ERROR("mes reg_write_reg_wait is not supported!\n")__drm_err("mes reg_write_reg_wait is not supported!\n"); |
890 | r = -EINVAL22; |
891 | goto error; |
892 | } |
893 | |
894 | r = adev->mes.funcs->misc_op(&adev->mes, &op_input); |
895 | if (r) |
896 | DRM_ERROR("failed to reg_write_reg_wait\n")__drm_err("failed to reg_write_reg_wait\n"); |
897 | |
898 | error: |
899 | return r; |
900 | } |
901 | |
902 | int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg, |
903 | uint32_t val, uint32_t mask) |
904 | { |
905 | struct mes_misc_op_input op_input; |
906 | int r; |
907 | |
908 | op_input.op = MES_MISC_OP_WRM_REG_WAIT; |
909 | op_input.wrm_reg.reg0 = reg; |
910 | op_input.wrm_reg.ref = val; |
911 | op_input.wrm_reg.mask = mask; |
912 | |
913 | if (!adev->mes.funcs->misc_op) { |
914 | DRM_ERROR("mes reg wait is not supported!\n")__drm_err("mes reg wait is not supported!\n"); |
915 | r = -EINVAL22; |
916 | goto error; |
917 | } |
918 | |
919 | r = adev->mes.funcs->misc_op(&adev->mes, &op_input); |
920 | if (r) |
921 | DRM_ERROR("failed to reg_write_reg_wait\n")__drm_err("failed to reg_write_reg_wait\n"); |
922 | |
923 | error: |
924 | return r; |
925 | } |
926 | |
927 | static void |
928 | amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, |
929 | struct amdgpu_ring *ring, |
930 | struct amdgpu_mes_queue_properties *props) |
931 | { |
932 | props->queue_type = ring->funcs->type; |
933 | props->hqd_base_gpu_addr = ring->gpu_addr; |
934 | props->rptr_gpu_addr = ring->rptr_gpu_addr; |
935 | props->wptr_gpu_addr = ring->wptr_gpu_addr; |
936 | props->wptr_mc_addr = |
937 | ring->mes_ctx->meta_data_mc_addr + ring->wptr_offs; |
938 | props->queue_size = ring->ring_size; |
939 | props->eop_gpu_addr = ring->eop_gpu_addr; |
940 | props->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_NORMAL; |
941 | props->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM0; |
942 | props->paging = false0; |
943 | props->ring = ring; |
944 | } |
945 | |
946 | #define DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(_eng)do { if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, _eng[ring->idx].slots[id_offs ]); else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, _eng[ring->idx].ring); else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, _eng[ring->idx].ib); else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, _eng[ring->idx].padding) ; } while(0) \ |
947 | do { \ |
948 | if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) \ |
949 | return offsetof(struct amdgpu_mes_ctx_meta_data, \__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].slots[id_offs]) |
950 | _eng[ring->idx].slots[id_offs])__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].slots[id_offs]); \ |
951 | else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) \ |
952 | return offsetof(struct amdgpu_mes_ctx_meta_data, \__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].ring) |
953 | _eng[ring->idx].ring)__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].ring); \ |
954 | else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) \ |
955 | return offsetof(struct amdgpu_mes_ctx_meta_data, \__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].ib) |
956 | _eng[ring->idx].ib)__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].ib); \ |
957 | else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) \ |
958 | return offsetof(struct amdgpu_mes_ctx_meta_data, \__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].padding) |
959 | _eng[ring->idx].padding)__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, _eng[ring ->idx].padding); \ |
960 | } while(0) |
961 | |
962 | int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs) |
963 | { |
964 | switch (ring->funcs->type) { |
965 | case AMDGPU_RING_TYPE_GFX: |
966 | DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(gfx)do { if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, gfx[ring->idx].slots[id_offs ]); else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, gfx[ring->idx].ring); else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, gfx[ring->idx].ib); else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, gfx[ring->idx].padding); } while(0); |
967 | break; |
968 | case AMDGPU_RING_TYPE_COMPUTE: |
969 | DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(compute)do { if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, compute[ring->idx].slots [id_offs]); else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) return __builtin_offsetof(struct amdgpu_mes_ctx_meta_data, compute[ ring->idx].ring); else if (id_offs == AMDGPU_MES_CTX_IB_OFFS ) return __builtin_offsetof(struct amdgpu_mes_ctx_meta_data, compute [ring->idx].ib); else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS ) return __builtin_offsetof(struct amdgpu_mes_ctx_meta_data, compute [ring->idx].padding); } while(0); |
970 | break; |
971 | case AMDGPU_RING_TYPE_SDMA: |
972 | DEFINE_AMDGPU_MES_CTX_GET_OFFS_ENG(sdma)do { if (id_offs < AMDGPU_MES_CTX_MAX_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, sdma[ring->idx].slots[id_offs ]); else if (id_offs == AMDGPU_MES_CTX_RING_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, sdma[ring->idx].ring); else if (id_offs == AMDGPU_MES_CTX_IB_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, sdma[ring->idx].ib); else if (id_offs == AMDGPU_MES_CTX_PADDING_OFFS) return __builtin_offsetof (struct amdgpu_mes_ctx_meta_data, sdma[ring->idx].padding) ; } while(0); |
973 | break; |
974 | default: |
975 | break; |
976 | } |
977 | |
978 | WARN_ON(1)({ int __ret = !!(1); if (__ret) printf("WARNING %s failed at %s:%d\n" , "1", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_mes.c", 978 ); __builtin_expect(!!(__ret), 0); }); |
979 | return -EINVAL22; |
980 | } |
981 | |
982 | int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, |
983 | int queue_type, int idx, |
984 | struct amdgpu_mes_ctx_data *ctx_data, |
985 | struct amdgpu_ring **out) |
986 | { |
987 | struct amdgpu_ring *ring; |
988 | struct amdgpu_mes_gang *gang; |
989 | struct amdgpu_mes_queue_properties qprops = {0}; |
990 | int r, queue_id, pasid; |
991 | |
992 | /* |
993 | * Avoid taking any other locks under MES lock to avoid circular |
994 | * lock dependencies. |
995 | */ |
996 | amdgpu_mes_lock(&adev->mes); |
997 | gang = idr_find(&adev->mes.gang_id_idr, gang_id); |
998 | if (!gang) { |
999 | DRM_ERROR("gang id %d doesn't exist\n", gang_id)__drm_err("gang id %d doesn't exist\n", gang_id); |
1000 | amdgpu_mes_unlock(&adev->mes); |
1001 | return -EINVAL22; |
1002 | } |
1003 | pasid = gang->process->pasid; |
1004 | |
1005 | ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL(0x0001 | 0x0004)); |
1006 | if (!ring) { |
1007 | amdgpu_mes_unlock(&adev->mes); |
1008 | return -ENOMEM12; |
1009 | } |
1010 | |
1011 | ring->ring_obj = NULL((void *)0); |
1012 | ring->use_doorbell = true1; |
1013 | ring->is_mes_queue = true1; |
1014 | ring->mes_ctx = ctx_data; |
1015 | ring->idx = idx; |
1016 | ring->no_scheduler = true1; |
1017 | |
1018 | if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { |
1019 | int offset = offsetof(struct amdgpu_mes_ctx_meta_data,__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, compute[ring ->idx].mec_hpd) |
1020 | compute[ring->idx].mec_hpd)__builtin_offsetof(struct amdgpu_mes_ctx_meta_data, compute[ring ->idx].mec_hpd); |
1021 | ring->eop_gpu_addr = |
1022 | amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset)(ring->is_mes_queue && ring->mes_ctx ? (ring-> mes_ctx->meta_data_gpu_addr + offset) : 0); |
1023 | } |
1024 | |
1025 | switch (queue_type) { |
1026 | case AMDGPU_RING_TYPE_GFX: |
1027 | ring->funcs = adev->gfx.gfx_ring[0].funcs; |
1028 | break; |
1029 | case AMDGPU_RING_TYPE_COMPUTE: |
1030 | ring->funcs = adev->gfx.compute_ring[0].funcs; |
1031 | break; |
1032 | case AMDGPU_RING_TYPE_SDMA: |
1033 | ring->funcs = adev->sdma.instance[0].ring.funcs; |
1034 | break; |
1035 | default: |
1036 | BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_mes.c" , 1036); } while (0); |
1037 | } |
1038 | |
1039 | r = amdgpu_ring_init(adev, ring, 1024, NULL((void *)0), 0, |
1040 | AMDGPU_RING_PRIO_DEFAULT, NULL((void *)0)); |
1041 | if (r) |
1042 | goto clean_up_memory; |
1043 | |
1044 | amdgpu_mes_ring_to_queue_props(adev, ring, &qprops); |
1045 | |
1046 | dma_fence_wait(gang->process->vm->last_update, false0); |
1047 | dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false0); |
1048 | amdgpu_mes_unlock(&adev->mes); |
1049 | |
1050 | r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id); |
1051 | if (r) |
1052 | goto clean_up_ring; |
1053 | |
1054 | ring->hw_queue_id = queue_id; |
1055 | ring->doorbell_index = qprops.doorbell_off; |
1056 | |
1057 | if (queue_type == AMDGPU_RING_TYPE_GFX) |
1058 | snprintf(ring->name, sizeof(ring->name), "gfx_%d.%d.%d", pasid, gang_id, queue_id); |
1059 | else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) |
1060 | snprintf(ring->name, sizeof(ring->name), "compute_%d.%d.%d", pasid, gang_id, |
1061 | queue_id); |
1062 | else if (queue_type == AMDGPU_RING_TYPE_SDMA) |
1063 | snprintf(ring->name, sizeof(ring->name), "sdma_%d.%d.%d", pasid, gang_id, |
1064 | queue_id); |
1065 | else |
1066 | BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_mes.c" , 1066); } while (0); |
1067 | |
1068 | *out = ring; |
1069 | return 0; |
1070 | |
1071 | clean_up_ring: |
1072 | amdgpu_ring_fini(ring); |
1073 | clean_up_memory: |
1074 | kfree(ring); |
1075 | amdgpu_mes_unlock(&adev->mes); |
1076 | return r; |
1077 | } |
1078 | |
1079 | void amdgpu_mes_remove_ring(struct amdgpu_device *adev, |
1080 | struct amdgpu_ring *ring) |
1081 | { |
1082 | if (!ring) |
1083 | return; |
1084 | |
1085 | amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); |
1086 | amdgpu_ring_fini(ring); |
1087 | kfree(ring); |
1088 | } |
1089 | |
1090 | uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev, |
1091 | enum amdgpu_mes_priority_level prio) |
1092 | { |
1093 | return adev->mes.aggregated_doorbells[prio]; |
1094 | } |
1095 | |
1096 | int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev, |
1097 | struct amdgpu_mes_ctx_data *ctx_data) |
1098 | { |
1099 | int r; |
1100 | |
1101 | r = amdgpu_bo_create_kernel(adev, |
Value stored to 'r' is never read | |
1102 | sizeof(struct amdgpu_mes_ctx_meta_data), |
1103 | PAGE_SIZE(1 << 12), AMDGPU_GEM_DOMAIN_GTT0x2, |
1104 | &ctx_data->meta_data_obj, |
1105 | &ctx_data->meta_data_mc_addr, |
1106 | &ctx_data->meta_data_ptr); |
1107 | if (!ctx_data->meta_data_obj) |
1108 | return -ENOMEM12; |
1109 | |
1110 | memset(ctx_data->meta_data_ptr, 0,__builtin_memset((ctx_data->meta_data_ptr), (0), (sizeof(struct amdgpu_mes_ctx_meta_data))) |
1111 | sizeof(struct amdgpu_mes_ctx_meta_data))__builtin_memset((ctx_data->meta_data_ptr), (0), (sizeof(struct amdgpu_mes_ctx_meta_data))); |
1112 | |
1113 | return 0; |
1114 | } |
1115 | |
1116 | void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data) |
1117 | { |
1118 | if (ctx_data->meta_data_obj) |
1119 | amdgpu_bo_free_kernel(&ctx_data->meta_data_obj, |
1120 | &ctx_data->meta_data_mc_addr, |
1121 | &ctx_data->meta_data_ptr); |
1122 | } |
1123 | |
1124 | int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, |
1125 | struct amdgpu_vm *vm, |
1126 | struct amdgpu_mes_ctx_data *ctx_data) |
1127 | { |
1128 | struct amdgpu_bo_va *bo_va; |
1129 | struct ww_acquire_ctx ticket; |
1130 | struct list_head list; |
1131 | struct amdgpu_bo_list_entry pd; |
1132 | struct ttm_validate_buffer csa_tv; |
1133 | struct amdgpu_sync sync; |
1134 | int r; |
1135 | |
1136 | amdgpu_sync_create(&sync); |
1137 | INIT_LIST_HEAD(&list); |
1138 | INIT_LIST_HEAD(&csa_tv.head); |
1139 | |
1140 | csa_tv.bo = &ctx_data->meta_data_obj->tbo; |
1141 | csa_tv.num_shared = 1; |
1142 | |
1143 | list_add(&csa_tv.head, &list); |
1144 | amdgpu_vm_get_pd_bo(vm, &list, &pd); |
1145 | |
1146 | r = ttm_eu_reserve_buffers(&ticket, &list, true1, NULL((void *)0)); |
1147 | if (r) { |
1148 | DRM_ERROR("failed to reserve meta data BO: err=%d\n", r)__drm_err("failed to reserve meta data BO: err=%d\n", r); |
1149 | return r; |
1150 | } |
1151 | |
1152 | bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); |
1153 | if (!bo_va) { |
1154 | ttm_eu_backoff_reservation(&ticket, &list); |
1155 | DRM_ERROR("failed to create bo_va for meta data BO\n")__drm_err("failed to create bo_va for meta data BO\n"); |
1156 | return -ENOMEM12; |
1157 | } |
1158 | |
1159 | r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, |
1160 | sizeof(struct amdgpu_mes_ctx_meta_data), |
1161 | AMDGPU_PTE_READABLE(1ULL << 5) | AMDGPU_PTE_WRITEABLE(1ULL << 6) | |
1162 | AMDGPU_PTE_EXECUTABLE(1ULL << 4)); |
1163 | |
1164 | if (r) { |
1165 | DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r)__drm_err("failed to do bo_map on meta data, err=%d\n", r); |
1166 | goto error; |
1167 | } |
1168 | |
1169 | r = amdgpu_vm_bo_update(adev, bo_va, false0); |
1170 | if (r) { |
1171 | DRM_ERROR("failed to do vm_bo_update on meta data\n")__drm_err("failed to do vm_bo_update on meta data\n"); |
1172 | goto error; |
1173 | } |
1174 | amdgpu_sync_fence(&sync, bo_va->last_pt_update); |
1175 | |
1176 | r = amdgpu_vm_update_pdes(adev, vm, false0); |
1177 | if (r) { |
1178 | DRM_ERROR("failed to update pdes on meta data\n")__drm_err("failed to update pdes on meta data\n"); |
1179 | goto error; |
1180 | } |
1181 | amdgpu_sync_fence(&sync, vm->last_update); |
1182 | |
1183 | amdgpu_sync_wait(&sync, false0); |
1184 | ttm_eu_backoff_reservation(&ticket, &list); |
1185 | |
1186 | amdgpu_sync_free(&sync); |
1187 | ctx_data->meta_data_va = bo_va; |
1188 | return 0; |
1189 | |
1190 | error: |
1191 | amdgpu_vm_bo_del(adev, bo_va); |
1192 | ttm_eu_backoff_reservation(&ticket, &list); |
1193 | amdgpu_sync_free(&sync); |
1194 | return r; |
1195 | } |
1196 | |
1197 | int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, |
1198 | struct amdgpu_mes_ctx_data *ctx_data) |
1199 | { |
1200 | struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; |
1201 | struct amdgpu_bo *bo = ctx_data->meta_data_obj; |
1202 | struct amdgpu_vm *vm = bo_va->base.vm; |
1203 | struct amdgpu_bo_list_entry vm_pd; |
1204 | struct list_head list, duplicates; |
1205 | struct dma_fence *fence = NULL((void *)0); |
1206 | struct ttm_validate_buffer tv; |
1207 | struct ww_acquire_ctx ticket; |
1208 | long r = 0; |
1209 | |
1210 | INIT_LIST_HEAD(&list); |
1211 | INIT_LIST_HEAD(&duplicates); |
1212 | |
1213 | tv.bo = &bo->tbo; |
1214 | tv.num_shared = 2; |
1215 | list_add(&tv.head, &list); |
1216 | |
1217 | amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); |
1218 | |
1219 | r = ttm_eu_reserve_buffers(&ticket, &list, false0, &duplicates); |
1220 | if (r) { |
1221 | dev_err(adev->dev, "leaking bo va because "printf("drm:pid%d:%s *ERROR* " "leaking bo va because " "we fail to reserve bo (%ld)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) |
1222 | "we fail to reserve bo (%ld)\n", r)printf("drm:pid%d:%s *ERROR* " "leaking bo va because " "we fail to reserve bo (%ld)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); |
1223 | return r; |
1224 | } |
1225 | |
1226 | amdgpu_vm_bo_del(adev, bo_va); |
1227 | if (!amdgpu_vm_ready(vm)) |
1228 | goto out_unlock; |
1229 | |
1230 | r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence); |
1231 | if (r) |
1232 | goto out_unlock; |
1233 | if (fence) { |
1234 | amdgpu_bo_fence(bo, fence, true1); |
1235 | fence = NULL((void *)0); |
1236 | } |
1237 | |
1238 | r = amdgpu_vm_clear_freed(adev, vm, &fence); |
1239 | if (r || !fence) |
1240 | goto out_unlock; |
1241 | |
1242 | dma_fence_wait(fence, false0); |
1243 | amdgpu_bo_fence(bo, fence, true1); |
1244 | dma_fence_put(fence); |
1245 | |
1246 | out_unlock: |
1247 | if (unlikely(r < 0)__builtin_expect(!!(r < 0), 0)) |
1248 | dev_err(adev->dev, "failed to clear page tables (%ld)\n", r)printf("drm:pid%d:%s *ERROR* " "failed to clear page tables (%ld)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); |
1249 | ttm_eu_backoff_reservation(&ticket, &list); |
1250 | |
1251 | return r; |
1252 | } |
1253 | |
1254 | static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev, |
1255 | int pasid, int *gang_id, |
1256 | int queue_type, int num_queue, |
1257 | struct amdgpu_ring **added_rings, |
1258 | struct amdgpu_mes_ctx_data *ctx_data) |
1259 | { |
1260 | struct amdgpu_ring *ring; |
1261 | struct amdgpu_mes_gang_properties gprops = {0}; |
1262 | int r, j; |
1263 | |
1264 | /* create a gang for the process */ |
1265 | gprops.priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; |
1266 | gprops.gang_quantum = adev->mes.default_gang_quantum; |
1267 | gprops.inprocess_gang_priority = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; |
1268 | gprops.priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; |
1269 | gprops.global_priority_level = AMDGPU_MES_PRIORITY_LEVEL_NORMAL; |
1270 | |
1271 | r = amdgpu_mes_add_gang(adev, pasid, &gprops, gang_id); |
1272 | if (r) { |
1273 | DRM_ERROR("failed to add gang\n")__drm_err("failed to add gang\n"); |
1274 | return r; |
1275 | } |
1276 | |
1277 | /* create queues for the gang */ |
1278 | for (j = 0; j < num_queue; j++) { |
1279 | r = amdgpu_mes_add_ring(adev, *gang_id, queue_type, j, |
1280 | ctx_data, &ring); |
1281 | if (r) { |
1282 | DRM_ERROR("failed to add ring\n")__drm_err("failed to add ring\n"); |
1283 | break; |
1284 | } |
1285 | |
1286 | DRM_INFO("ring %s was added\n", ring->name)printk("\0016" "[" "drm" "] " "ring %s was added\n", ring-> name); |
1287 | added_rings[j] = ring; |
1288 | } |
1289 | |
1290 | return 0; |
1291 | } |
1292 | |
1293 | static int amdgpu_mes_test_queues(struct amdgpu_ring **added_rings) |
1294 | { |
1295 | struct amdgpu_ring *ring; |
1296 | int i, r; |
1297 | |
1298 | for (i = 0; i < AMDGPU_MES_CTX_MAX_RINGS(1 + 4 + 2); i++) { |
1299 | ring = added_rings[i]; |
1300 | if (!ring) |
1301 | continue; |
1302 | |
1303 | r = amdgpu_ring_test_ring(ring)(ring)->funcs->test_ring((ring)); |
1304 | if (r) { |
1305 | DRM_DEV_ERROR(ring->adev->dev,drm_dev_printk(ring->adev->dev, "\0013", "*ERROR* " "ring %s test failed (%d)\n" , ring->name, r) |
1306 | "ring %s test failed (%d)\n",drm_dev_printk(ring->adev->dev, "\0013", "*ERROR* " "ring %s test failed (%d)\n" , ring->name, r) |
1307 | ring->name, r)drm_dev_printk(ring->adev->dev, "\0013", "*ERROR* " "ring %s test failed (%d)\n" , ring->name, r); |
1308 | return r; |
1309 | } else |
1310 | DRM_INFO("ring %s test pass\n", ring->name)printk("\0016" "[" "drm" "] " "ring %s test pass\n", ring-> name); |
1311 | |
1312 | r = amdgpu_ring_test_ib(ring, 1000 * 10)(ring)->funcs->test_ib((ring), (1000 * 10)); |
1313 | if (r) { |
1314 | DRM_DEV_ERROR(ring->adev->dev,drm_dev_printk(ring->adev->dev, "\0013", "*ERROR* " "ring %s ib test failed (%d)\n" , ring->name, r) |
1315 | "ring %s ib test failed (%d)\n",drm_dev_printk(ring->adev->dev, "\0013", "*ERROR* " "ring %s ib test failed (%d)\n" , ring->name, r) |
1316 | ring->name, r)drm_dev_printk(ring->adev->dev, "\0013", "*ERROR* " "ring %s ib test failed (%d)\n" , ring->name, r); |
1317 | return r; |
1318 | } else |
1319 | DRM_INFO("ring %s ib test pass\n", ring->name)printk("\0016" "[" "drm" "] " "ring %s ib test pass\n", ring-> name); |
1320 | } |
1321 | |
1322 | return 0; |
1323 | } |
1324 | |
1325 | int amdgpu_mes_self_test(struct amdgpu_device *adev) |
1326 | { |
1327 | struct amdgpu_vm *vm = NULL((void *)0); |
1328 | struct amdgpu_mes_ctx_data ctx_data = {0}; |
1329 | struct amdgpu_ring *added_rings[AMDGPU_MES_CTX_MAX_RINGS(1 + 4 + 2)] = { NULL((void *)0) }; |
1330 | int gang_ids[3] = {0}; |
1331 | int queue_types[][2] = { { AMDGPU_RING_TYPE_GFX, 1 }, |
1332 | { AMDGPU_RING_TYPE_COMPUTE, 1 }, |
1333 | { AMDGPU_RING_TYPE_SDMA, 1} }; |
1334 | int i, r, pasid, k = 0; |
1335 | |
1336 | pasid = amdgpu_pasid_alloc(16); |
1337 | if (pasid < 0) { |
1338 | dev_warn(adev->dev, "No more PASIDs available!")printf("drm:pid%d:%s *WARNING* " "No more PASIDs available!", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); |
1339 | pasid = 0; |
1340 | } |
1341 | |
1342 | vm = kzalloc(sizeof(*vm), GFP_KERNEL(0x0001 | 0x0004)); |
1343 | if (!vm) { |
1344 | r = -ENOMEM12; |
1345 | goto error_pasid; |
1346 | } |
1347 | |
1348 | r = amdgpu_vm_init(adev, vm); |
1349 | if (r) { |
1350 | DRM_ERROR("failed to initialize vm\n")__drm_err("failed to initialize vm\n"); |
1351 | goto error_pasid; |
1352 | } |
1353 | |
1354 | r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data); |
1355 | if (r) { |
1356 | DRM_ERROR("failed to alloc ctx meta data\n")__drm_err("failed to alloc ctx meta data\n"); |
1357 | goto error_fini; |
1358 | } |
1359 | |
1360 | ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE(2ULL << 20); |
1361 | r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); |
1362 | if (r) { |
1363 | DRM_ERROR("failed to map ctx meta data\n")__drm_err("failed to map ctx meta data\n"); |
1364 | goto error_vm; |
1365 | } |
1366 | |
1367 | r = amdgpu_mes_create_process(adev, pasid, vm); |
1368 | if (r) { |
1369 | DRM_ERROR("failed to create MES process\n")__drm_err("failed to create MES process\n"); |
1370 | goto error_vm; |
1371 | } |
1372 | |
1373 | for (i = 0; i < ARRAY_SIZE(queue_types)(sizeof((queue_types)) / sizeof((queue_types)[0])); i++) { |
1374 | /* On GFX v10.3, fw hasn't supported to map sdma queue. */ |
1375 | if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)(((10) << 16) | ((3) << 8) | (0)) && |
1376 | adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)(((11) << 16) | ((0) << 8) | (0)) && |
1377 | queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) |
1378 | continue; |
1379 | |
1380 | r = amdgpu_mes_test_create_gang_and_queues(adev, pasid, |
1381 | &gang_ids[i], |
1382 | queue_types[i][0], |
1383 | queue_types[i][1], |
1384 | &added_rings[k], |
1385 | &ctx_data); |
1386 | if (r) |
1387 | goto error_queues; |
1388 | |
1389 | k += queue_types[i][1]; |
1390 | } |
1391 | |
1392 | /* start ring test and ib test for MES queues */ |
1393 | amdgpu_mes_test_queues(added_rings); |
1394 | |
1395 | error_queues: |
1396 | /* remove all queues */ |
1397 | for (i = 0; i < ARRAY_SIZE(added_rings)(sizeof((added_rings)) / sizeof((added_rings)[0])); i++) { |
1398 | if (!added_rings[i]) |
1399 | continue; |
1400 | amdgpu_mes_remove_ring(adev, added_rings[i]); |
1401 | } |
1402 | |
1403 | for (i = 0; i < ARRAY_SIZE(gang_ids)(sizeof((gang_ids)) / sizeof((gang_ids)[0])); i++) { |
1404 | if (!gang_ids[i]) |
1405 | continue; |
1406 | amdgpu_mes_remove_gang(adev, gang_ids[i]); |
1407 | } |
1408 | |
1409 | amdgpu_mes_destroy_process(adev, pasid); |
1410 | |
1411 | error_vm: |
1412 | amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data); |
1413 | |
1414 | error_fini: |
1415 | amdgpu_vm_fini(adev, vm); |
1416 | |
1417 | error_pasid: |
1418 | if (pasid) |
1419 | amdgpu_pasid_free(pasid); |
1420 | |
1421 | amdgpu_mes_ctx_free_meta_data(&ctx_data); |
1422 | kfree(vm); |
1423 | return 0; |
1424 | } |
1425 | |
1426 | int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) |
1427 | { |
1428 | const struct mes_firmware_header_v1_0 *mes_hdr; |
1429 | struct amdgpu_firmware_info *info; |
1430 | char ucode_prefix[30]; |
1431 | char fw_name[40]; |
1432 | bool_Bool need_retry = false0; |
1433 | int r; |
1434 | |
1435 | amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, |
1436 | sizeof(ucode_prefix)); |
1437 | if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)(((11) << 16) | ((0) << 8) | (0))) { |
1438 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", |
1439 | ucode_prefix, |
1440 | pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); |
1441 | need_retry = true1; |
1442 | } else { |
1443 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", |
1444 | ucode_prefix, |
1445 | pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); |
1446 | } |
1447 | |
1448 | r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], fw_name); |
1449 | if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { |
1450 | snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes.bin", |
1451 | ucode_prefix); |
1452 | DRM_INFO("try to fall back to %s\n", fw_name)printk("\0016" "[" "drm" "] " "try to fall back to %s\n", fw_name ); |
1453 | r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], |
1454 | fw_name); |
1455 | } |
1456 | |
1457 | if (r) |
1458 | goto out; |
1459 | |
1460 | mes_hdr = (const struct mes_firmware_header_v1_0 *) |
1461 | adev->mes.fw[pipe]->data; |
1462 | adev->mes.uc_start_addr[pipe] = |
1463 | le32_to_cpu(mes_hdr->mes_uc_start_addr_lo)((__uint32_t)(mes_hdr->mes_uc_start_addr_lo)) | |
1464 | ((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)((__uint32_t)(mes_hdr->mes_uc_start_addr_hi))) << 32); |
1465 | adev->mes.data_start_addr[pipe] = |
1466 | le32_to_cpu(mes_hdr->mes_data_start_addr_lo)((__uint32_t)(mes_hdr->mes_data_start_addr_lo)) | |
1467 | ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)((__uint32_t)(mes_hdr->mes_data_start_addr_hi))) << 32); |
1468 | |
1469 | if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { |
1470 | int ucode, ucode_data; |
1471 | |
1472 | if (pipe == AMDGPU_MES_SCHED_PIPE) { |
1473 | ucode = AMDGPU_UCODE_ID_CP_MES; |
1474 | ucode_data = AMDGPU_UCODE_ID_CP_MES_DATA; |
1475 | } else { |
1476 | ucode = AMDGPU_UCODE_ID_CP_MES1; |
1477 | ucode_data = AMDGPU_UCODE_ID_CP_MES1_DATA; |
1478 | } |
1479 | |
1480 | info = &adev->firmware.ucode[ucode]; |
1481 | info->ucode_id = ucode; |
1482 | info->fw = adev->mes.fw[pipe]; |
1483 | adev->firmware.fw_size += |
1484 | roundup2(le32_to_cpu(mes_hdr->mes_ucode_size_bytes),(((((__uint32_t)(mes_hdr->mes_ucode_size_bytes))) + (((1 << 12)) - 1)) & (~((__typeof(((__uint32_t)(mes_hdr->mes_ucode_size_bytes ))))((1 << 12)) - 1))) |
1485 | PAGE_SIZE)(((((__uint32_t)(mes_hdr->mes_ucode_size_bytes))) + (((1 << 12)) - 1)) & (~((__typeof(((__uint32_t)(mes_hdr->mes_ucode_size_bytes ))))((1 << 12)) - 1))); |
1486 | |
1487 | info = &adev->firmware.ucode[ucode_data]; |
1488 | info->ucode_id = ucode_data; |
1489 | info->fw = adev->mes.fw[pipe]; |
1490 | adev->firmware.fw_size += |
1491 | roundup2(le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes),(((((__uint32_t)(mes_hdr->mes_ucode_data_size_bytes))) + ( ((1 << 12)) - 1)) & (~((__typeof(((__uint32_t)(mes_hdr ->mes_ucode_data_size_bytes))))((1 << 12)) - 1))) |
1492 | PAGE_SIZE)(((((__uint32_t)(mes_hdr->mes_ucode_data_size_bytes))) + ( ((1 << 12)) - 1)) & (~((__typeof(((__uint32_t)(mes_hdr ->mes_ucode_data_size_bytes))))((1 << 12)) - 1))); |
1493 | } |
1494 | |
1495 | return 0; |
1496 | out: |
1497 | amdgpu_ucode_release(&adev->mes.fw[pipe]); |
1498 | return r; |
1499 | } |