File: | dev/pci/drm/amd/amdgpu/amdgpu_amdkfd.c |
Warning: | line 585, column 27 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // SPDX-License-Identifier: MIT | |||
2 | /* | |||
3 | * Copyright 2014 Advanced Micro Devices, Inc. | |||
4 | * | |||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
6 | * copy of this software and associated documentation files (the "Software"), | |||
7 | * to deal in the Software without restriction, including without limitation | |||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
9 | * and/or sell copies of the Software, and to permit persons to whom the | |||
10 | * Software is furnished to do so, subject to the following conditions: | |||
11 | * | |||
12 | * The above copyright notice and this permission notice shall be included in | |||
13 | * all copies or substantial portions of the Software. | |||
14 | * | |||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
21 | * OTHER DEALINGS IN THE SOFTWARE. | |||
22 | */ | |||
23 | ||||
24 | #include "amdgpu_amdkfd.h" | |||
25 | #include "amd_pcie.h" | |||
26 | #include "amd_shared.h" | |||
27 | ||||
28 | #include "amdgpu.h" | |||
29 | #include "amdgpu_gfx.h" | |||
30 | #include "amdgpu_dma_buf.h" | |||
31 | #include <linux/module.h> | |||
32 | #include <linux/dma-buf.h> | |||
33 | #include "amdgpu_xgmi.h" | |||
34 | #include <uapi/linux/kfd_ioctl.h> | |||
35 | #include "amdgpu_ras.h" | |||
36 | #include "amdgpu_umc.h" | |||
37 | #include "amdgpu_reset.h" | |||
38 | ||||
39 | /* Total memory size in system memory and all GPU VRAM. Used to | |||
40 | * estimate worst case amount of memory to reserve for page tables | |||
41 | */ | |||
42 | uint64_t amdgpu_amdkfd_total_mem_size; | |||
43 | ||||
44 | static bool_Bool kfd_initialized; | |||
45 | ||||
46 | int amdgpu_amdkfd_init(void) | |||
47 | { | |||
48 | #ifdef __linux__ | |||
49 | struct sysinfo si; | |||
50 | int ret; | |||
51 | ||||
52 | si_meminfo(&si); | |||
53 | amdgpu_amdkfd_total_mem_size = si.freeram - si.freehigh; | |||
54 | amdgpu_amdkfd_total_mem_size *= si.mem_unit; | |||
55 | #else | |||
56 | int ret; | |||
57 | ||||
58 | amdgpu_amdkfd_total_mem_size = ptoa(physmem)((paddr_t)(physmem) << 12); | |||
59 | #endif | |||
60 | ret = kgd2kfd_init(); | |||
61 | amdgpu_amdkfd_gpuvm_init_mem_limits(); | |||
62 | kfd_initialized = !ret; | |||
63 | ||||
64 | return ret; | |||
65 | } | |||
66 | ||||
67 | void amdgpu_amdkfd_fini(void) | |||
68 | { | |||
69 | if (kfd_initialized) { | |||
70 | kgd2kfd_exit(); | |||
71 | kfd_initialized = false0; | |||
72 | } | |||
73 | } | |||
74 | ||||
75 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) | |||
76 | { | |||
77 | bool_Bool vf = amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2)); | |||
78 | ||||
79 | if (!kfd_initialized) | |||
80 | return; | |||
81 | ||||
82 | adev->kfd.dev = kgd2kfd_probe(adev, vf); | |||
83 | } | |||
84 | ||||
85 | /** | |||
86 | * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to | |||
87 | * setup amdkfd | |||
88 | * | |||
89 | * @adev: amdgpu_device pointer | |||
90 | * @aperture_base: output returning doorbell aperture base physical address | |||
91 | * @aperture_size: output returning doorbell aperture size in bytes | |||
92 | * @start_offset: output returning # of doorbell bytes reserved for amdgpu. | |||
93 | * | |||
94 | * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, | |||
95 | * takes doorbells required for its own rings and reports the setup to amdkfd. | |||
96 | * amdgpu reserved doorbells are at the start of the doorbell aperture. | |||
97 | */ | |||
98 | static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, | |||
99 | phys_addr_t *aperture_base, | |||
100 | size_t *aperture_size, | |||
101 | size_t *start_offset) | |||
102 | { | |||
103 | /* | |||
104 | * The first num_doorbells are used by amdgpu. | |||
105 | * amdkfd takes whatever's left in the aperture. | |||
106 | */ | |||
107 | if (adev->enable_mes) { | |||
108 | /* | |||
109 | * With MES enabled, we only need to initialize | |||
110 | * the base address. The size and offset are | |||
111 | * not initialized as AMDGPU manages the whole | |||
112 | * doorbell space. | |||
113 | */ | |||
114 | *aperture_base = adev->doorbell.base; | |||
115 | *aperture_size = 0; | |||
116 | *start_offset = 0; | |||
117 | } else if (adev->doorbell.size > adev->doorbell.num_doorbells * | |||
118 | sizeof(u32)) { | |||
119 | *aperture_base = adev->doorbell.base; | |||
120 | *aperture_size = adev->doorbell.size; | |||
121 | *start_offset = adev->doorbell.num_doorbells * sizeof(u32); | |||
122 | } else { | |||
123 | *aperture_base = 0; | |||
124 | *aperture_size = 0; | |||
125 | *start_offset = 0; | |||
126 | } | |||
127 | } | |||
128 | ||||
129 | ||||
130 | static void amdgpu_amdkfd_reset_work(struct work_struct *work) | |||
131 | { | |||
132 | struct amdgpu_device *adev = container_of(work, struct amdgpu_device,({ const __typeof( ((struct amdgpu_device *)0)->kfd.reset_work ) *__mptr = (work); (struct amdgpu_device *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device, kfd.reset_work) ) ;}) | |||
133 | kfd.reset_work)({ const __typeof( ((struct amdgpu_device *)0)->kfd.reset_work ) *__mptr = (work); (struct amdgpu_device *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_device, kfd.reset_work) ) ;}); | |||
134 | ||||
135 | struct amdgpu_reset_context reset_context; | |||
136 | ||||
137 | memset(&reset_context, 0, sizeof(reset_context))__builtin_memset((&reset_context), (0), (sizeof(reset_context ))); | |||
138 | ||||
139 | reset_context.method = AMD_RESET_METHOD_NONE; | |||
140 | reset_context.reset_req_dev = adev; | |||
141 | clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); | |||
142 | ||||
143 | amdgpu_device_gpu_recover(adev, NULL((void *)0), &reset_context); | |||
144 | } | |||
145 | ||||
146 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) | |||
147 | { | |||
148 | int i; | |||
149 | int last_valid_bit; | |||
150 | ||||
151 | if (adev->kfd.dev) { | |||
152 | struct kgd2kfd_shared_resources gpu_resources = { | |||
153 | .compute_vmid_bitmap = | |||
154 | ((1 << AMDGPU_NUM_VMID16) - 1) - | |||
155 | ((1 << adev->vm_manager.first_kfd_vmid) - 1), | |||
156 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, | |||
157 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, | |||
158 | .gpuvm_size = min(adev->vm_manager.max_pfn(((adev->vm_manager.max_pfn << 12)<(0x0000800000000000ULL ))?(adev->vm_manager.max_pfn << 12):(0x0000800000000000ULL )) | |||
159 | << AMDGPU_GPU_PAGE_SHIFT,(((adev->vm_manager.max_pfn << 12)<(0x0000800000000000ULL ))?(adev->vm_manager.max_pfn << 12):(0x0000800000000000ULL )) | |||
160 | AMDGPU_GMC_HOLE_START)(((adev->vm_manager.max_pfn << 12)<(0x0000800000000000ULL ))?(adev->vm_manager.max_pfn << 12):(0x0000800000000000ULL )), | |||
161 | .drm_render_minor = adev_to_drm(adev)->render->index, | |||
162 | .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, | |||
163 | .enable_mes = adev->enable_mes, | |||
164 | }; | |||
165 | ||||
166 | /* this is going to have a few of the MSBs set that we need to | |||
167 | * clear | |||
168 | */ | |||
169 | bitmap_complement(gpu_resources.cp_queue_bitmap, | |||
170 | adev->gfx.mec.queue_bitmap, | |||
171 | KGD_MAX_QUEUES128); | |||
172 | ||||
173 | /* According to linux/bitmap.h we shouldn't use bitmap_clear if | |||
174 | * nbits is not compile time constant | |||
175 | */ | |||
176 | last_valid_bit = 1 /* only first MEC can have compute queues */ | |||
177 | * adev->gfx.mec.num_pipe_per_mec | |||
178 | * adev->gfx.mec.num_queue_per_pipe; | |||
179 | for (i = last_valid_bit; i < KGD_MAX_QUEUES128; ++i) | |||
180 | clear_bit(i, gpu_resources.cp_queue_bitmap); | |||
181 | ||||
182 | amdgpu_doorbell_get_kfd_info(adev, | |||
183 | &gpu_resources.doorbell_physical_address, | |||
184 | &gpu_resources.doorbell_aperture_size, | |||
185 | &gpu_resources.doorbell_start_offset); | |||
186 | ||||
187 | /* Since SOC15, BIF starts to statically use the | |||
188 | * lower 12 bits of doorbell addresses for routing | |||
189 | * based on settings in registers like | |||
190 | * SDMA0_DOORBELL_RANGE etc.. | |||
191 | * In order to route a doorbell to CP engine, the lower | |||
192 | * 12 bits of its address has to be outside the range | |||
193 | * set for SDMA, VCN, and IH blocks. | |||
194 | */ | |||
195 | if (adev->asic_type >= CHIP_VEGA10) { | |||
196 | gpu_resources.non_cp_doorbells_start = | |||
197 | adev->doorbell_index.first_non_cp; | |||
198 | gpu_resources.non_cp_doorbells_end = | |||
199 | adev->doorbell_index.last_non_cp; | |||
200 | } | |||
201 | ||||
202 | adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, | |||
203 | adev_to_drm(adev), &gpu_resources); | |||
204 | ||||
205 | amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; | |||
206 | ||||
207 | INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work); | |||
208 | } | |||
209 | } | |||
210 | ||||
211 | void amdgpu_amdkfd_device_fini_sw(struct amdgpu_device *adev) | |||
212 | { | |||
213 | if (adev->kfd.dev) { | |||
214 | kgd2kfd_device_exit(adev->kfd.dev); | |||
215 | adev->kfd.dev = NULL((void *)0); | |||
216 | amdgpu_amdkfd_total_mem_size -= adev->gmc.real_vram_size; | |||
217 | } | |||
218 | } | |||
219 | ||||
220 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, | |||
221 | const void *ih_ring_entry) | |||
222 | { | |||
223 | if (adev->kfd.dev) | |||
224 | kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); | |||
225 | } | |||
226 | ||||
227 | void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool_Bool run_pm) | |||
228 | { | |||
229 | if (adev->kfd.dev) | |||
230 | kgd2kfd_suspend(adev->kfd.dev, run_pm); | |||
231 | } | |||
232 | ||||
233 | int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) | |||
234 | { | |||
235 | int r = 0; | |||
236 | ||||
237 | if (adev->kfd.dev) | |||
238 | r = kgd2kfd_resume_iommu(adev->kfd.dev); | |||
239 | ||||
240 | return r; | |||
241 | } | |||
242 | ||||
243 | int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool_Bool run_pm) | |||
244 | { | |||
245 | int r = 0; | |||
246 | ||||
247 | if (adev->kfd.dev) | |||
248 | r = kgd2kfd_resume(adev->kfd.dev, run_pm); | |||
249 | ||||
250 | return r; | |||
251 | } | |||
252 | ||||
253 | int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) | |||
254 | { | |||
255 | int r = 0; | |||
256 | ||||
257 | if (adev->kfd.dev) | |||
258 | r = kgd2kfd_pre_reset(adev->kfd.dev); | |||
259 | ||||
260 | return r; | |||
261 | } | |||
262 | ||||
263 | int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) | |||
264 | { | |||
265 | int r = 0; | |||
266 | ||||
267 | if (adev->kfd.dev) | |||
268 | r = kgd2kfd_post_reset(adev->kfd.dev); | |||
269 | ||||
270 | return r; | |||
271 | } | |||
272 | ||||
273 | void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) | |||
274 | { | |||
275 | if (amdgpu_device_should_recover_gpu(adev)) | |||
276 | amdgpu_reset_domain_schedule(adev->reset_domain, | |||
277 | &adev->kfd.reset_work); | |||
278 | } | |||
279 | ||||
280 | int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, | |||
281 | void **mem_obj, uint64_t *gpu_addr, | |||
282 | void **cpu_ptr, bool_Bool cp_mqd_gfx9) | |||
283 | { | |||
284 | struct amdgpu_bo *bo = NULL((void *)0); | |||
285 | struct amdgpu_bo_param bp; | |||
286 | int r; | |||
287 | void *cpu_ptr_tmp = NULL((void *)0); | |||
288 | ||||
289 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | |||
290 | bp.size = size; | |||
291 | bp.byte_align = PAGE_SIZE(1 << 12); | |||
292 | bp.domain = AMDGPU_GEM_DOMAIN_GTT0x2; | |||
293 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | |||
294 | bp.type = ttm_bo_type_kernel; | |||
295 | bp.resv = NULL((void *)0); | |||
296 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); | |||
297 | ||||
298 | if (cp_mqd_gfx9) | |||
299 | bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9(1 << 8); | |||
300 | ||||
301 | r = amdgpu_bo_create(adev, &bp, &bo); | |||
302 | if (r) { | |||
303 | dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "failed to allocate BO for amdkfd (%d)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) | |||
304 | "failed to allocate BO for amdkfd (%d)\n", r)printf("drm:pid%d:%s *ERROR* " "failed to allocate BO for amdkfd (%d)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
305 | return r; | |||
306 | } | |||
307 | ||||
308 | /* map the buffer */ | |||
309 | r = amdgpu_bo_reserve(bo, true1); | |||
310 | if (r) { | |||
311 | dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r)printf("drm:pid%d:%s *ERROR* " "(%d) failed to reserve bo for amdkfd\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
312 | goto allocate_mem_reserve_bo_failed; | |||
313 | } | |||
314 | ||||
315 | r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT0x2); | |||
316 | if (r) { | |||
317 | dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r)printf("drm:pid%d:%s *ERROR* " "(%d) failed to pin bo for amdkfd\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
318 | goto allocate_mem_pin_bo_failed; | |||
319 | } | |||
320 | ||||
321 | r = amdgpu_ttm_alloc_gart(&bo->tbo); | |||
322 | if (r) { | |||
323 | dev_err(adev->dev, "%p bind failed\n", bo)printf("drm:pid%d:%s *ERROR* " "%p bind failed\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p-> ps_pid, __func__ , bo); | |||
324 | goto allocate_mem_kmap_bo_failed; | |||
325 | } | |||
326 | ||||
327 | r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); | |||
328 | if (r) { | |||
329 | dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "(%d) failed to map bo to kernel for amdkfd\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) | |||
330 | "(%d) failed to map bo to kernel for amdkfd\n", r)printf("drm:pid%d:%s *ERROR* " "(%d) failed to map bo to kernel for amdkfd\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
331 | goto allocate_mem_kmap_bo_failed; | |||
332 | } | |||
333 | ||||
334 | *mem_obj = bo; | |||
335 | *gpu_addr = amdgpu_bo_gpu_offset(bo); | |||
336 | *cpu_ptr = cpu_ptr_tmp; | |||
337 | ||||
338 | amdgpu_bo_unreserve(bo); | |||
339 | ||||
340 | return 0; | |||
341 | ||||
342 | allocate_mem_kmap_bo_failed: | |||
343 | amdgpu_bo_unpin(bo); | |||
344 | allocate_mem_pin_bo_failed: | |||
345 | amdgpu_bo_unreserve(bo); | |||
346 | allocate_mem_reserve_bo_failed: | |||
347 | amdgpu_bo_unref(&bo); | |||
348 | ||||
349 | return r; | |||
350 | } | |||
351 | ||||
352 | void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj) | |||
353 | { | |||
354 | struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; | |||
355 | ||||
356 | amdgpu_bo_reserve(bo, true1); | |||
357 | amdgpu_bo_kunmap(bo); | |||
358 | amdgpu_bo_unpin(bo); | |||
359 | amdgpu_bo_unreserve(bo); | |||
360 | amdgpu_bo_unref(&(bo)); | |||
361 | } | |||
362 | ||||
363 | int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size, | |||
364 | void **mem_obj) | |||
365 | { | |||
366 | struct amdgpu_bo *bo = NULL((void *)0); | |||
367 | struct amdgpu_bo_user *ubo; | |||
368 | struct amdgpu_bo_param bp; | |||
369 | int r; | |||
370 | ||||
371 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | |||
372 | bp.size = size; | |||
373 | bp.byte_align = 1; | |||
374 | bp.domain = AMDGPU_GEM_DOMAIN_GWS0x10; | |||
375 | bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS(1 << 1); | |||
376 | bp.type = ttm_bo_type_device; | |||
377 | bp.resv = NULL((void *)0); | |||
378 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); | |||
379 | ||||
380 | r = amdgpu_bo_create_user(adev, &bp, &ubo); | |||
381 | if (r) { | |||
382 | dev_err(adev->dev,printf("drm:pid%d:%s *ERROR* " "failed to allocate gws BO for amdkfd (%d)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r) | |||
383 | "failed to allocate gws BO for amdkfd (%d)\n", r)printf("drm:pid%d:%s *ERROR* " "failed to allocate gws BO for amdkfd (%d)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , r); | |||
384 | return r; | |||
385 | } | |||
386 | ||||
387 | bo = &ubo->bo; | |||
388 | *mem_obj = bo; | |||
389 | return 0; | |||
390 | } | |||
391 | ||||
392 | void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj) | |||
393 | { | |||
394 | struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; | |||
395 | ||||
396 | amdgpu_bo_unref(&bo); | |||
397 | } | |||
398 | ||||
399 | uint32_t amdgpu_amdkfd_get_fw_version(struct amdgpu_device *adev, | |||
400 | enum kgd_engine_type type) | |||
401 | { | |||
402 | switch (type) { | |||
403 | case KGD_ENGINE_PFP: | |||
404 | return adev->gfx.pfp_fw_version; | |||
405 | ||||
406 | case KGD_ENGINE_ME: | |||
407 | return adev->gfx.me_fw_version; | |||
408 | ||||
409 | case KGD_ENGINE_CE: | |||
410 | return adev->gfx.ce_fw_version; | |||
411 | ||||
412 | case KGD_ENGINE_MEC1: | |||
413 | return adev->gfx.mec_fw_version; | |||
414 | ||||
415 | case KGD_ENGINE_MEC2: | |||
416 | return adev->gfx.mec2_fw_version; | |||
417 | ||||
418 | case KGD_ENGINE_RLC: | |||
419 | return adev->gfx.rlc_fw_version; | |||
420 | ||||
421 | case KGD_ENGINE_SDMA1: | |||
422 | return adev->sdma.instance[0].fw_version; | |||
423 | ||||
424 | case KGD_ENGINE_SDMA2: | |||
425 | return adev->sdma.instance[1].fw_version; | |||
426 | ||||
427 | default: | |||
428 | return 0; | |||
429 | } | |||
430 | ||||
431 | return 0; | |||
432 | } | |||
433 | ||||
434 | void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, | |||
435 | struct kfd_local_mem_info *mem_info) | |||
436 | { | |||
437 | memset(mem_info, 0, sizeof(*mem_info))__builtin_memset((mem_info), (0), (sizeof(*mem_info))); | |||
438 | ||||
439 | mem_info->local_mem_size_public = adev->gmc.visible_vram_size; | |||
440 | mem_info->local_mem_size_private = adev->gmc.real_vram_size - | |||
441 | adev->gmc.visible_vram_size; | |||
442 | ||||
443 | mem_info->vram_width = adev->gmc.vram_width; | |||
444 | ||||
445 | pr_debug("Address base: %pap public 0x%llx private 0x%llx\n",do { } while(0) | |||
446 | &adev->gmc.aper_base,do { } while(0) | |||
447 | mem_info->local_mem_size_public,do { } while(0) | |||
448 | mem_info->local_mem_size_private)do { } while(0); | |||
449 | ||||
450 | if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) | |||
451 | mem_info->mem_clk_max = adev->clock.default_mclk / 100; | |||
452 | else if (adev->pm.dpm_enabled) { | |||
453 | if (amdgpu_emu_mode == 1) | |||
454 | mem_info->mem_clk_max = 0; | |||
455 | else | |||
456 | mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false0) / 100; | |||
457 | } else | |||
458 | mem_info->mem_clk_max = 100; | |||
459 | } | |||
460 | ||||
461 | uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev) | |||
462 | { | |||
463 | if (adev->gfx.funcs->get_gpu_clock_counter) | |||
464 | return adev->gfx.funcs->get_gpu_clock_counter(adev); | |||
465 | return 0; | |||
466 | } | |||
467 | ||||
468 | uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) | |||
469 | { | |||
470 | /* the sclk is in quantas of 10kHz */ | |||
471 | if (amdgpu_sriov_vf(adev)((adev)->virt.caps & (1 << 2))) | |||
472 | return adev->clock.default_sclk / 100; | |||
473 | else if (adev->pm.dpm_enabled) | |||
474 | return amdgpu_dpm_get_sclk(adev, false0) / 100; | |||
475 | else | |||
476 | return 100; | |||
477 | } | |||
478 | ||||
479 | void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info) | |||
480 | { | |||
481 | struct amdgpu_cu_info acu_info = adev->gfx.cu_info; | |||
482 | ||||
483 | memset(cu_info, 0, sizeof(*cu_info))__builtin_memset((cu_info), (0), (sizeof(*cu_info))); | |||
484 | if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) | |||
485 | return; | |||
486 | ||||
487 | cu_info->cu_active_number = acu_info.number; | |||
488 | cu_info->cu_ao_mask = acu_info.ao_cu_mask; | |||
489 | memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],__builtin_memcpy((&cu_info->cu_bitmap[0]), (&acu_info .bitmap[0]), (sizeof(acu_info.bitmap))) | |||
490 | sizeof(acu_info.bitmap))__builtin_memcpy((&cu_info->cu_bitmap[0]), (&acu_info .bitmap[0]), (sizeof(acu_info.bitmap))); | |||
491 | cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; | |||
492 | cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; | |||
493 | cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; | |||
494 | cu_info->simd_per_cu = acu_info.simd_per_cu; | |||
495 | cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; | |||
496 | cu_info->wave_front_size = acu_info.wave_front_size; | |||
497 | cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; | |||
498 | cu_info->lds_size = acu_info.lds_size; | |||
499 | } | |||
500 | ||||
501 | int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, | |||
502 | struct amdgpu_device **dmabuf_adev, | |||
503 | uint64_t *bo_size, void *metadata_buffer, | |||
504 | size_t buffer_size, uint32_t *metadata_size, | |||
505 | uint32_t *flags) | |||
506 | { | |||
507 | struct dma_buf *dma_buf; | |||
508 | struct drm_gem_object *obj; | |||
509 | struct amdgpu_bo *bo; | |||
510 | uint64_t metadata_flags; | |||
511 | int r = -EINVAL22; | |||
512 | ||||
513 | dma_buf = dma_buf_get(dma_buf_fd); | |||
514 | if (IS_ERR(dma_buf)) | |||
515 | return PTR_ERR(dma_buf); | |||
516 | ||||
517 | if (dma_buf->ops != &amdgpu_dmabuf_ops) | |||
518 | /* Can't handle non-graphics buffers */ | |||
519 | goto out_put; | |||
520 | ||||
521 | obj = dma_buf->priv; | |||
522 | if (obj->dev->driver != adev_to_drm(adev)->driver) | |||
523 | /* Can't handle buffers from different drivers */ | |||
524 | goto out_put; | |||
525 | ||||
526 | adev = drm_to_adev(obj->dev); | |||
527 | bo = gem_to_amdgpu_bo(obj)({ const __typeof( ((struct amdgpu_bo *)0)->tbo.base ) *__mptr = ((obj)); (struct amdgpu_bo *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo, tbo.base) );}); | |||
528 | if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM0x4 | | |||
529 | AMDGPU_GEM_DOMAIN_GTT0x2))) | |||
530 | /* Only VRAM and GTT BOs are supported */ | |||
531 | goto out_put; | |||
532 | ||||
533 | r = 0; | |||
534 | if (dmabuf_adev) | |||
535 | *dmabuf_adev = adev; | |||
536 | if (bo_size) | |||
537 | *bo_size = amdgpu_bo_size(bo); | |||
538 | if (metadata_buffer) | |||
539 | r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, | |||
540 | metadata_size, &metadata_flags); | |||
541 | if (flags) { | |||
542 | *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM0x4) ? | |||
543 | KFD_IOC_ALLOC_MEM_FLAGS_VRAM(1 << 0) | |||
544 | : KFD_IOC_ALLOC_MEM_FLAGS_GTT(1 << 1); | |||
545 | ||||
546 | if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED(1 << 0)) | |||
547 | *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC(1 << 29); | |||
548 | } | |||
549 | ||||
550 | out_put: | |||
551 | dma_buf_put(dma_buf); | |||
552 | return r; | |||
553 | } | |||
554 | ||||
555 | uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, | |||
556 | struct amdgpu_device *src) | |||
557 | { | |||
558 | struct amdgpu_device *peer_adev = src; | |||
559 | struct amdgpu_device *adev = dst; | |||
560 | int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); | |||
561 | ||||
562 | if (ret < 0) { | |||
563 | DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",__drm_err("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n" , adev->gmc.xgmi.physical_node_id, peer_adev->gmc.xgmi. physical_node_id, ret) | |||
564 | adev->gmc.xgmi.physical_node_id,__drm_err("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n" , adev->gmc.xgmi.physical_node_id, peer_adev->gmc.xgmi. physical_node_id, ret) | |||
565 | peer_adev->gmc.xgmi.physical_node_id, ret)__drm_err("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n" , adev->gmc.xgmi.physical_node_id, peer_adev->gmc.xgmi. physical_node_id, ret); | |||
566 | ret = 0; | |||
567 | } | |||
568 | return (uint8_t)ret; | |||
569 | } | |||
570 | ||||
571 | int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, | |||
572 | struct amdgpu_device *src, | |||
573 | bool_Bool is_min) | |||
574 | { | |||
575 | struct amdgpu_device *adev = dst, *peer_adev; | |||
| ||||
576 | int num_links; | |||
577 | ||||
578 | if (adev->asic_type != CHIP_ALDEBARAN) | |||
579 | return 0; | |||
580 | ||||
581 | if (src) | |||
582 | peer_adev = src; | |||
583 | ||||
584 | /* num links returns 0 for indirect peers since indirect route is unknown. */ | |||
585 | num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); | |||
| ||||
586 | if (num_links < 0) { | |||
587 | DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n",__drm_err("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n" , adev->gmc.xgmi.physical_node_id, peer_adev->gmc.xgmi. physical_node_id, num_links) | |||
588 | adev->gmc.xgmi.physical_node_id,__drm_err("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n" , adev->gmc.xgmi.physical_node_id, peer_adev->gmc.xgmi. physical_node_id, num_links) | |||
589 | peer_adev->gmc.xgmi.physical_node_id, num_links)__drm_err("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n" , adev->gmc.xgmi.physical_node_id, peer_adev->gmc.xgmi. physical_node_id, num_links); | |||
590 | num_links = 0; | |||
591 | } | |||
592 | ||||
593 | /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ | |||
594 | return (num_links * 16 * 25000)/BITS_PER_BYTE8; | |||
595 | } | |||
596 | ||||
597 | int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool_Bool is_min) | |||
598 | { | |||
599 | int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : | |||
600 | fls(adev->pm.pcie_mlw_mask)) - 1; | |||
601 | int gen_speed_shift = (is_min ? ffs(adev->pm.pcie_gen_mask & | |||
602 | CAIL_PCIE_LINK_SPEED_SUPPORT_MASK0xFFFF0000) : | |||
603 | fls(adev->pm.pcie_gen_mask & | |||
604 | CAIL_PCIE_LINK_SPEED_SUPPORT_MASK0xFFFF0000)) - 1; | |||
605 | uint32_t num_lanes_mask = 1 << num_lanes_shift; | |||
606 | uint32_t gen_speed_mask = 1 << gen_speed_shift; | |||
607 | int num_lanes_factor = 0, gen_speed_mbits_factor = 0; | |||
608 | ||||
609 | switch (num_lanes_mask) { | |||
610 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X10x00010000: | |||
611 | num_lanes_factor = 1; | |||
612 | break; | |||
613 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X20x00020000: | |||
614 | num_lanes_factor = 2; | |||
615 | break; | |||
616 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X40x00040000: | |||
617 | num_lanes_factor = 4; | |||
618 | break; | |||
619 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X80x00080000: | |||
620 | num_lanes_factor = 8; | |||
621 | break; | |||
622 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X120x00100000: | |||
623 | num_lanes_factor = 12; | |||
624 | break; | |||
625 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X160x00200000: | |||
626 | num_lanes_factor = 16; | |||
627 | break; | |||
628 | case CAIL_PCIE_LINK_WIDTH_SUPPORT_X320x00400000: | |||
629 | num_lanes_factor = 32; | |||
630 | break; | |||
631 | } | |||
632 | ||||
633 | switch (gen_speed_mask) { | |||
634 | case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN10x00010000: | |||
635 | gen_speed_mbits_factor = 2500; | |||
636 | break; | |||
637 | case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN20x00020000: | |||
638 | gen_speed_mbits_factor = 5000; | |||
639 | break; | |||
640 | case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN30x00040000: | |||
641 | gen_speed_mbits_factor = 8000; | |||
642 | break; | |||
643 | case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN40x00080000: | |||
644 | gen_speed_mbits_factor = 16000; | |||
645 | break; | |||
646 | case CAIL_PCIE_LINK_SPEED_SUPPORT_GEN50x00100000: | |||
647 | gen_speed_mbits_factor = 32000; | |||
648 | break; | |||
649 | } | |||
650 | ||||
651 | return (num_lanes_factor * gen_speed_mbits_factor)/BITS_PER_BYTE8; | |||
652 | } | |||
653 | ||||
654 | int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, | |||
655 | enum kgd_engine_type engine, | |||
656 | uint32_t vmid, uint64_t gpu_addr, | |||
657 | uint32_t *ib_cmd, uint32_t ib_len) | |||
658 | { | |||
659 | struct amdgpu_job *job; | |||
660 | struct amdgpu_ib *ib; | |||
661 | struct amdgpu_ring *ring; | |||
662 | struct dma_fence *f = NULL((void *)0); | |||
663 | int ret; | |||
664 | ||||
665 | switch (engine) { | |||
666 | case KGD_ENGINE_MEC1: | |||
667 | ring = &adev->gfx.compute_ring[0]; | |||
668 | break; | |||
669 | case KGD_ENGINE_SDMA1: | |||
670 | ring = &adev->sdma.instance[0].ring; | |||
671 | break; | |||
672 | case KGD_ENGINE_SDMA2: | |||
673 | ring = &adev->sdma.instance[1].ring; | |||
674 | break; | |||
675 | default: | |||
676 | pr_err("Invalid engine in IB submission: %d\n", engine)printk("\0013" "amdgpu: " "Invalid engine in IB submission: %d\n" , engine); | |||
677 | ret = -EINVAL22; | |||
678 | goto err; | |||
679 | } | |||
680 | ||||
681 | ret = amdgpu_job_alloc(adev, 1, &job, NULL((void *)0)); | |||
682 | if (ret) | |||
683 | goto err; | |||
684 | ||||
685 | ib = &job->ibs[0]; | |||
686 | memset(ib, 0, sizeof(struct amdgpu_ib))__builtin_memset((ib), (0), (sizeof(struct amdgpu_ib))); | |||
687 | ||||
688 | ib->gpu_addr = gpu_addr; | |||
689 | ib->ptr = ib_cmd; | |||
690 | ib->length_dw = ib_len; | |||
691 | /* This works for NO_HWS. TODO: need to handle without knowing VMID */ | |||
692 | job->vmid = vmid; | |||
693 | job->num_ibs = 1; | |||
694 | ||||
695 | ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); | |||
696 | ||||
697 | if (ret) { | |||
698 | DRM_ERROR("amdgpu: failed to schedule IB.\n")__drm_err("amdgpu: failed to schedule IB.\n"); | |||
699 | goto err_ib_sched; | |||
700 | } | |||
701 | ||||
702 | /* Drop the initial kref_init count (see drm_sched_main as example) */ | |||
703 | dma_fence_put(f); | |||
704 | ret = dma_fence_wait(f, false0); | |||
705 | ||||
706 | err_ib_sched: | |||
707 | amdgpu_job_free(job); | |||
708 | err: | |||
709 | return ret; | |||
710 | } | |||
711 | ||||
712 | void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool_Bool idle) | |||
713 | { | |||
714 | /* Temporary workaround to fix issues observed in some | |||
715 | * compute applications when GFXOFF is enabled on GFX11. | |||
716 | */ | |||
717 | if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0])((adev->ip_versions[GC_HWIP][0]) >> 16) == 11) { | |||
718 | pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled")do { } while(0); | |||
719 | amdgpu_gfx_off_ctrl(adev, idle); | |||
720 | } | |||
721 | amdgpu_dpm_switch_power_profile(adev, | |||
722 | PP_SMC_POWER_PROFILE_COMPUTE, | |||
723 | !idle); | |||
724 | } | |||
725 | ||||
726 | bool_Bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) | |||
727 | { | |||
728 | if (adev->kfd.dev) | |||
729 | return vmid >= adev->vm_manager.first_kfd_vmid; | |||
730 | ||||
731 | return false0; | |||
732 | } | |||
733 | ||||
734 | int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, | |||
735 | uint16_t vmid) | |||
736 | { | |||
737 | if (adev->family == AMDGPU_FAMILY_AI141) { | |||
738 | int i; | |||
739 | ||||
740 | for (i = 0; i < adev->num_vmhubs; i++) | |||
741 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0)((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), ( i), (0))); | |||
742 | } else { | |||
743 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0)((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), ( 0), (0))); | |||
744 | } | |||
745 | ||||
746 | return 0; | |||
747 | } | |||
748 | ||||
749 | int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, | |||
750 | uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) | |||
751 | { | |||
752 | bool_Bool all_hub = false0; | |||
753 | ||||
754 | if (adev->family == AMDGPU_FAMILY_AI141 || | |||
755 | adev->family == AMDGPU_FAMILY_RV142) | |||
756 | all_hub = true1; | |||
757 | ||||
758 | return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub)((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid ((adev), (pasid ), (flush_type), (all_hub))); | |||
759 | } | |||
760 | ||||
761 | bool_Bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) | |||
762 | { | |||
763 | return adev->have_atomics_support; | |||
764 | } | |||
765 | ||||
766 | void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool_Bool reset) | |||
767 | { | |||
768 | struct ras_err_data err_data = {0, 0, 0, NULL((void *)0)}; | |||
769 | ||||
770 | amdgpu_umc_poison_handler(adev, &err_data, reset); | |||
771 | } | |||
772 | ||||
773 | bool_Bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) | |||
774 | { | |||
775 | if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) | |||
776 | return adev->gfx.ras->query_utcl2_poison_status(adev); | |||
777 | else | |||
778 | return false0; | |||
779 | } |