File: | dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c |
Warning: | line 240, column 22 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long long' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // SPDX-License-Identifier: GPL-2.0 OR MIT | ||||
2 | /* | ||||
3 | * Copyright 2022 Advanced Micro Devices, Inc. | ||||
4 | * | ||||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | ||||
6 | * copy of this software and associated documentation files (the "Software"), | ||||
7 | * to deal in the Software without restriction, including without limitation | ||||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||||
9 | * and/or sell copies of the Software, and to permit persons to whom the | ||||
10 | * Software is furnished to do so, subject to the following conditions: | ||||
11 | * | ||||
12 | * The above copyright notice and this permission notice shall be included in | ||||
13 | * all copies or substantial portions of the Software. | ||||
14 | * | ||||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | ||||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | ||||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | ||||
21 | * OTHER DEALINGS IN THE SOFTWARE. | ||||
22 | */ | ||||
23 | |||||
24 | #include <drm/drm_drv.h> | ||||
25 | |||||
26 | #include "amdgpu.h" | ||||
27 | #include "amdgpu_trace.h" | ||||
28 | #include "amdgpu_vm.h" | ||||
29 | |||||
30 | /* | ||||
31 | * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt | ||||
32 | */ | ||||
33 | struct amdgpu_vm_pt_cursor { | ||||
34 | uint64_t pfn; | ||||
35 | struct amdgpu_vm_bo_base *parent; | ||||
36 | struct amdgpu_vm_bo_base *entry; | ||||
37 | unsigned int level; | ||||
38 | }; | ||||
39 | |||||
40 | /** | ||||
41 | * amdgpu_vm_pt_level_shift - return the addr shift for each level | ||||
42 | * | ||||
43 | * @adev: amdgpu_device pointer | ||||
44 | * @level: VMPT level | ||||
45 | * | ||||
46 | * Returns: | ||||
47 | * The number of bits the pfn needs to be right shifted for a level. | ||||
48 | */ | ||||
49 | static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, | ||||
50 | unsigned int level) | ||||
51 | { | ||||
52 | switch (level) { | ||||
53 | case AMDGPU_VM_PDB2: | ||||
54 | case AMDGPU_VM_PDB1: | ||||
55 | case AMDGPU_VM_PDB0: | ||||
56 | return 9 * (AMDGPU_VM_PDB0 - level) + | ||||
57 | adev->vm_manager.block_size; | ||||
58 | case AMDGPU_VM_PTB: | ||||
59 | return 0; | ||||
60 | default: | ||||
61 | return ~0; | ||||
62 | } | ||||
63 | } | ||||
64 | |||||
65 | /** | ||||
66 | * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT | ||||
67 | * | ||||
68 | * @adev: amdgpu_device pointer | ||||
69 | * @level: VMPT level | ||||
70 | * | ||||
71 | * Returns: | ||||
72 | * The number of entries in a page directory or page table. | ||||
73 | */ | ||||
74 | static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, | ||||
75 | unsigned int level) | ||||
76 | { | ||||
77 | unsigned int shift; | ||||
78 | |||||
79 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | ||||
80 | if (level == adev->vm_manager.root_level) | ||||
81 | /* For the root directory */ | ||||
82 | return round_up(adev->vm_manager.max_pfn, 1ULL << shift)((((adev->vm_manager.max_pfn) + ((1ULL << shift) - 1 )) / (1ULL << shift)) * (1ULL << shift)) | ||||
83 | >> shift; | ||||
84 | else if (level != AMDGPU_VM_PTB) | ||||
85 | /* Everything in between */ | ||||
86 | return 512; | ||||
87 | |||||
88 | /* For the page tables on the leaves */ | ||||
89 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size); | ||||
90 | } | ||||
91 | |||||
92 | /** | ||||
93 | * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD | ||||
94 | * | ||||
95 | * @adev: amdgpu_device pointer | ||||
96 | * | ||||
97 | * Returns: | ||||
98 | * The number of entries in the root page directory which needs the ATS setting. | ||||
99 | */ | ||||
100 | static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) | ||||
101 | { | ||||
102 | unsigned int shift; | ||||
103 | |||||
104 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | ||||
105 | return AMDGPU_GMC_HOLE_START0x0000800000000000ULL >> (shift + AMDGPU_GPU_PAGE_SHIFT12); | ||||
106 | } | ||||
107 | |||||
108 | /** | ||||
109 | * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT | ||||
110 | * | ||||
111 | * @adev: amdgpu_device pointer | ||||
112 | * @level: VMPT level | ||||
113 | * | ||||
114 | * Returns: | ||||
115 | * The mask to extract the entry number of a PD/PT from an address. | ||||
116 | */ | ||||
117 | static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, | ||||
118 | unsigned int level) | ||||
119 | { | ||||
120 | if (level <= adev->vm_manager.root_level) | ||||
121 | return 0xffffffff; | ||||
122 | else if (level != AMDGPU_VM_PTB) | ||||
123 | return 0x1ff; | ||||
124 | else | ||||
125 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size) - 1; | ||||
126 | } | ||||
127 | |||||
128 | /** | ||||
129 | * amdgpu_vm_pt_size - returns the size of the page table in bytes | ||||
130 | * | ||||
131 | * @adev: amdgpu_device pointer | ||||
132 | * @level: VMPT level | ||||
133 | * | ||||
134 | * Returns: | ||||
135 | * The size of the BO for a page directory or page table in bytes. | ||||
136 | */ | ||||
137 | static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, | ||||
138 | unsigned int level) | ||||
139 | { | ||||
140 | return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8)(((amdgpu_vm_pt_num_entries(adev, level) * 8) + (4096 - 1)) & ~(4096 - 1)); | ||||
141 | } | ||||
142 | |||||
143 | /** | ||||
144 | * amdgpu_vm_pt_parent - get the parent page directory | ||||
145 | * | ||||
146 | * @pt: child page table | ||||
147 | * | ||||
148 | * Helper to get the parent entry for the child page table. NULL if we are at | ||||
149 | * the root page directory. | ||||
150 | */ | ||||
151 | static struct amdgpu_vm_bo_base * | ||||
152 | amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) | ||||
153 | { | ||||
154 | struct amdgpu_bo *parent = pt->bo->parent; | ||||
155 | |||||
156 | if (!parent) | ||||
157 | return NULL((void *)0); | ||||
158 | |||||
159 | return parent->vm_bo; | ||||
160 | } | ||||
161 | |||||
162 | /** | ||||
163 | * amdgpu_vm_pt_start - start PD/PT walk | ||||
164 | * | ||||
165 | * @adev: amdgpu_device pointer | ||||
166 | * @vm: amdgpu_vm structure | ||||
167 | * @start: start address of the walk | ||||
168 | * @cursor: state to initialize | ||||
169 | * | ||||
170 | * Initialize a amdgpu_vm_pt_cursor to start a walk. | ||||
171 | */ | ||||
172 | static void amdgpu_vm_pt_start(struct amdgpu_device *adev, | ||||
173 | struct amdgpu_vm *vm, uint64_t start, | ||||
174 | struct amdgpu_vm_pt_cursor *cursor) | ||||
175 | { | ||||
176 | cursor->pfn = start; | ||||
177 | cursor->parent = NULL((void *)0); | ||||
178 | cursor->entry = &vm->root; | ||||
179 | cursor->level = adev->vm_manager.root_level; | ||||
180 | } | ||||
181 | |||||
182 | /** | ||||
183 | * amdgpu_vm_pt_descendant - go to child node | ||||
184 | * | ||||
185 | * @adev: amdgpu_device pointer | ||||
186 | * @cursor: current state | ||||
187 | * | ||||
188 | * Walk to the child node of the current node. | ||||
189 | * Returns: | ||||
190 | * True if the walk was possible, false otherwise. | ||||
191 | */ | ||||
192 | static bool_Bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, | ||||
193 | struct amdgpu_vm_pt_cursor *cursor) | ||||
194 | { | ||||
195 | unsigned int mask, shift, idx; | ||||
196 | |||||
197 | if ((cursor->level
| ||||
198 | !cursor->entry->bo) | ||||
199 | return false0; | ||||
200 | |||||
201 | mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); | ||||
202 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level); | ||||
203 | |||||
204 | ++cursor->level; | ||||
205 | idx = (cursor->pfn >> shift) & mask; | ||||
206 | cursor->parent = cursor->entry; | ||||
207 | cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((cursor->entry->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries[idx]; | ||||
208 | return true1; | ||||
209 | } | ||||
210 | |||||
211 | /** | ||||
212 | * amdgpu_vm_pt_sibling - go to sibling node | ||||
213 | * | ||||
214 | * @adev: amdgpu_device pointer | ||||
215 | * @cursor: current state | ||||
216 | * | ||||
217 | * Walk to the sibling node of the current node. | ||||
218 | * Returns: | ||||
219 | * True if the walk was possible, false otherwise. | ||||
220 | */ | ||||
221 | static bool_Bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, | ||||
222 | struct amdgpu_vm_pt_cursor *cursor) | ||||
223 | { | ||||
224 | |||||
225 | unsigned int shift, num_entries; | ||||
226 | struct amdgpu_bo_vm *parent; | ||||
227 | |||||
228 | /* Root doesn't have a sibling */ | ||||
229 | if (!cursor->parent
| ||||
230 | return false0; | ||||
231 | |||||
232 | /* Go to our parents and see if we got a sibling */ | ||||
233 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); | ||||
234 | num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); | ||||
235 | parent = to_amdgpu_bo_vm(cursor->parent->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((cursor->parent->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );}); | ||||
236 | |||||
237 | if (cursor->entry == &parent->entries[num_entries - 1]) | ||||
238 | return false0; | ||||
239 | |||||
240 | cursor->pfn += 1ULL << shift; | ||||
| |||||
241 | cursor->pfn &= ~((1ULL << shift) - 1); | ||||
242 | ++cursor->entry; | ||||
243 | return true1; | ||||
244 | } | ||||
245 | |||||
246 | /** | ||||
247 | * amdgpu_vm_pt_ancestor - go to parent node | ||||
248 | * | ||||
249 | * @cursor: current state | ||||
250 | * | ||||
251 | * Walk to the parent node of the current node. | ||||
252 | * Returns: | ||||
253 | * True if the walk was possible, false otherwise. | ||||
254 | */ | ||||
255 | static bool_Bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) | ||||
256 | { | ||||
257 | if (!cursor->parent
| ||||
258 | return false0; | ||||
259 | |||||
260 | --cursor->level; | ||||
261 | cursor->entry = cursor->parent; | ||||
262 | cursor->parent = amdgpu_vm_pt_parent(cursor->parent); | ||||
263 | return true1; | ||||
264 | } | ||||
265 | |||||
266 | /** | ||||
267 | * amdgpu_vm_pt_next - get next PD/PT in hieratchy | ||||
268 | * | ||||
269 | * @adev: amdgpu_device pointer | ||||
270 | * @cursor: current state | ||||
271 | * | ||||
272 | * Walk the PD/PT tree to the next node. | ||||
273 | */ | ||||
274 | static void amdgpu_vm_pt_next(struct amdgpu_device *adev, | ||||
275 | struct amdgpu_vm_pt_cursor *cursor) | ||||
276 | { | ||||
277 | /* First try a newborn child */ | ||||
278 | if (amdgpu_vm_pt_descendant(adev, cursor)) | ||||
279 | return; | ||||
280 | |||||
281 | /* If that didn't worked try to find a sibling */ | ||||
282 | while (!amdgpu_vm_pt_sibling(adev, cursor)) { | ||||
283 | /* No sibling, go to our parents and grandparents */ | ||||
284 | if (!amdgpu_vm_pt_ancestor(cursor)) { | ||||
285 | cursor->pfn = ~0ll; | ||||
286 | return; | ||||
287 | } | ||||
288 | } | ||||
289 | } | ||||
290 | |||||
291 | /** | ||||
292 | * amdgpu_vm_pt_first_dfs - start a deep first search | ||||
293 | * | ||||
294 | * @adev: amdgpu_device structure | ||||
295 | * @vm: amdgpu_vm structure | ||||
296 | * @start: optional cursor to start with | ||||
297 | * @cursor: state to initialize | ||||
298 | * | ||||
299 | * Starts a deep first traversal of the PD/PT tree. | ||||
300 | */ | ||||
301 | static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, | ||||
302 | struct amdgpu_vm *vm, | ||||
303 | struct amdgpu_vm_pt_cursor *start, | ||||
304 | struct amdgpu_vm_pt_cursor *cursor) | ||||
305 | { | ||||
306 | if (start
| ||||
307 | *cursor = *start; | ||||
308 | else | ||||
309 | amdgpu_vm_pt_start(adev, vm, 0, cursor); | ||||
310 | |||||
311 | while (amdgpu_vm_pt_descendant(adev, cursor)) | ||||
312 | ; | ||||
313 | } | ||||
314 | |||||
315 | /** | ||||
316 | * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue | ||||
317 | * | ||||
318 | * @start: starting point for the search | ||||
319 | * @entry: current entry | ||||
320 | * | ||||
321 | * Returns: | ||||
322 | * True when the search should continue, false otherwise. | ||||
323 | */ | ||||
324 | static bool_Bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, | ||||
325 | struct amdgpu_vm_bo_base *entry) | ||||
326 | { | ||||
327 | return entry && (!start || entry != start->entry); | ||||
328 | } | ||||
329 | |||||
330 | /** | ||||
331 | * amdgpu_vm_pt_next_dfs - get the next node for a deep first search | ||||
332 | * | ||||
333 | * @adev: amdgpu_device structure | ||||
334 | * @cursor: current state | ||||
335 | * | ||||
336 | * Move the cursor to the next node in a deep first search. | ||||
337 | */ | ||||
338 | static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, | ||||
339 | struct amdgpu_vm_pt_cursor *cursor) | ||||
340 | { | ||||
341 | if (!cursor->entry
| ||||
342 | return; | ||||
343 | |||||
344 | if (!cursor->parent
| ||||
345 | cursor->entry = NULL((void *)0); | ||||
346 | else if (amdgpu_vm_pt_sibling(adev, cursor)) | ||||
347 | while (amdgpu_vm_pt_descendant(adev, cursor)) | ||||
348 | ; | ||||
349 | else | ||||
350 | amdgpu_vm_pt_ancestor(cursor); | ||||
351 | } | ||||
352 | |||||
353 | /* | ||||
354 | * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs | ||||
355 | */ | ||||
356 | #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) \ | ||||
357 | for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ | ||||
358 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ | ||||
359 | amdgpu_vm_pt_continue_dfs((start), (entry)); \ | ||||
360 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) | ||||
361 | |||||
362 | /** | ||||
363 | * amdgpu_vm_pt_clear - initially clear the PDs/PTs | ||||
364 | * | ||||
365 | * @adev: amdgpu_device pointer | ||||
366 | * @vm: VM to clear BO from | ||||
367 | * @vmbo: BO to clear | ||||
368 | * @immediate: use an immediate update | ||||
369 | * | ||||
370 | * Root PD needs to be reserved when calling this. | ||||
371 | * | ||||
372 | * Returns: | ||||
373 | * 0 on success, errno otherwise. | ||||
374 | */ | ||||
375 | int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||||
376 | struct amdgpu_bo_vm *vmbo, bool_Bool immediate) | ||||
377 | { | ||||
378 | unsigned int level = adev->vm_manager.root_level; | ||||
379 | struct ttm_operation_ctx ctx = { true1, false0 }; | ||||
380 | struct amdgpu_vm_update_params params; | ||||
381 | struct amdgpu_bo *ancestor = &vmbo->bo; | ||||
382 | unsigned int entries, ats_entries; | ||||
383 | struct amdgpu_bo *bo = &vmbo->bo; | ||||
384 | uint64_t addr; | ||||
385 | int r, idx; | ||||
386 | |||||
387 | /* Figure out our place in the hierarchy */ | ||||
388 | if (ancestor->parent) { | ||||
389 | ++level; | ||||
390 | while (ancestor->parent->parent) { | ||||
391 | ++level; | ||||
392 | ancestor = ancestor->parent; | ||||
393 | } | ||||
394 | } | ||||
395 | |||||
396 | entries = amdgpu_bo_size(bo) / 8; | ||||
397 | if (!vm->pte_support_ats) { | ||||
398 | ats_entries = 0; | ||||
399 | |||||
400 | } else if (!bo->parent) { | ||||
401 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | ||||
402 | ats_entries = min(ats_entries, entries)(((ats_entries)<(entries))?(ats_entries):(entries)); | ||||
403 | entries -= ats_entries; | ||||
404 | |||||
405 | } else { | ||||
406 | struct amdgpu_vm_bo_base *pt; | ||||
407 | |||||
408 | pt = ancestor->vm_bo; | ||||
409 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | ||||
410 | if ((pt - to_amdgpu_bo_vm(vm->root.bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((vm->root.bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries) >= | ||||
411 | ats_entries) { | ||||
412 | ats_entries = 0; | ||||
413 | } else { | ||||
414 | ats_entries = entries; | ||||
415 | entries = 0; | ||||
416 | } | ||||
417 | } | ||||
418 | |||||
419 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | ||||
420 | if (r) | ||||
421 | return r; | ||||
422 | |||||
423 | if (vmbo->shadow) { | ||||
424 | struct amdgpu_bo *shadow = vmbo->shadow; | ||||
425 | |||||
426 | r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); | ||||
427 | if (r) | ||||
428 | return r; | ||||
429 | } | ||||
430 | |||||
431 | if (!drm_dev_enter(adev_to_drm(adev), &idx)) | ||||
432 | return -ENODEV19; | ||||
433 | |||||
434 | r = vm->update_funcs->map_table(vmbo); | ||||
435 | if (r) | ||||
436 | goto exit; | ||||
437 | |||||
438 | memset(¶ms, 0, sizeof(params))__builtin_memset((¶ms), (0), (sizeof(params))); | ||||
439 | params.adev = adev; | ||||
440 | params.vm = vm; | ||||
441 | params.immediate = immediate; | ||||
442 | |||||
443 | r = vm->update_funcs->prepare(¶ms, NULL((void *)0), AMDGPU_SYNC_EXPLICIT); | ||||
444 | if (r) | ||||
445 | goto exit; | ||||
446 | |||||
447 | addr = 0; | ||||
448 | if (ats_entries) { | ||||
449 | uint64_t value = 0, flags; | ||||
450 | |||||
451 | flags = AMDGPU_PTE_DEFAULT_ATC((1ULL << 1) | (1ULL << 2) | (1ULL << 4) | ( 1ULL << 5) | (1ULL << 6) | ((uint64_t)(2) << 57)); | ||||
452 | if (level != AMDGPU_VM_PTB) { | ||||
453 | /* Handle leaf PDEs as PTEs */ | ||||
454 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | ||||
455 | amdgpu_gmc_get_vm_pde(adev, level, &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | ||||
456 | } | ||||
457 | |||||
458 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, | ||||
459 | ats_entries, value, flags); | ||||
460 | if (r) | ||||
461 | goto exit; | ||||
462 | |||||
463 | addr += ats_entries * 8; | ||||
464 | } | ||||
465 | |||||
466 | if (entries) { | ||||
467 | uint64_t value = 0, flags = 0; | ||||
468 | |||||
469 | if (adev->asic_type >= CHIP_VEGA10) { | ||||
470 | if (level != AMDGPU_VM_PTB) { | ||||
471 | /* Handle leaf PDEs as PTEs */ | ||||
472 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | ||||
473 | amdgpu_gmc_get_vm_pde(adev, level,(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)) | ||||
474 | &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | ||||
475 | } else { | ||||
476 | /* Workaround for fault priority problem on GMC9 */ | ||||
477 | flags = AMDGPU_PTE_EXECUTABLE(1ULL << 4); | ||||
478 | } | ||||
479 | } | ||||
480 | |||||
481 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, | ||||
482 | value, flags); | ||||
483 | if (r) | ||||
484 | goto exit; | ||||
485 | } | ||||
486 | |||||
487 | r = vm->update_funcs->commit(¶ms, NULL((void *)0)); | ||||
488 | exit: | ||||
489 | drm_dev_exit(idx); | ||||
490 | return r; | ||||
491 | } | ||||
492 | |||||
493 | /** | ||||
494 | * amdgpu_vm_pt_create - create bo for PD/PT | ||||
495 | * | ||||
496 | * @adev: amdgpu_device pointer | ||||
497 | * @vm: requesting vm | ||||
498 | * @level: the page table level | ||||
499 | * @immediate: use a immediate update | ||||
500 | * @vmbo: pointer to the buffer object pointer | ||||
501 | */ | ||||
502 | int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, | ||||
503 | int level, bool_Bool immediate, struct amdgpu_bo_vm **vmbo) | ||||
504 | { | ||||
505 | struct amdgpu_bo_param bp; | ||||
506 | struct amdgpu_bo *bo; | ||||
507 | struct dma_resv *resv; | ||||
508 | unsigned int num_entries; | ||||
509 | int r; | ||||
510 | |||||
511 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | ||||
512 | |||||
513 | bp.size = amdgpu_vm_pt_size(adev, level); | ||||
514 | bp.byte_align = AMDGPU_GPU_PAGE_SIZE4096; | ||||
515 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM0x4; | ||||
516 | bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); | ||||
517 | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS(1 << 5) | | ||||
518 | AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | ||||
519 | |||||
520 | if (level < AMDGPU_VM_PTB) | ||||
521 | num_entries = amdgpu_vm_pt_num_entries(adev, level); | ||||
522 | else | ||||
523 | num_entries = 0; | ||||
524 | |||||
525 | bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries)(sizeof(*((*vmbo))) + ((num_entries) * (sizeof(*((*vmbo))-> entries)))); | ||||
526 | |||||
527 | if (vm->use_cpu_for_update) | ||||
528 | bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED(1 << 0); | ||||
529 | |||||
530 | bp.type = ttm_bo_type_kernel; | ||||
531 | bp.no_wait_gpu = immediate; | ||||
532 | if (vm->root.bo) | ||||
533 | bp.resv = vm->root.bo->tbo.base.resv; | ||||
534 | |||||
535 | r = amdgpu_bo_create_vm(adev, &bp, vmbo); | ||||
536 | if (r) | ||||
537 | return r; | ||||
538 | |||||
539 | bo = &(*vmbo)->bo; | ||||
540 | if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { | ||||
541 | (*vmbo)->shadow = NULL((void *)0); | ||||
542 | return 0; | ||||
543 | } | ||||
544 | |||||
545 | if (!bp.resv) | ||||
546 | WARN_ON(dma_resv_lock(bo->tbo.base.resv,({ int __ret = !!(dma_resv_lock(bo->tbo.base.resv, ((void * )0))); if (__ret) printf("WARNING %s failed at %s:%d\n", "dma_resv_lock(bo->tbo.base.resv, ((void *)0))" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c", 547); __builtin_expect(!!(__ret), 0); }) | ||||
547 | NULL))({ int __ret = !!(dma_resv_lock(bo->tbo.base.resv, ((void * )0))); if (__ret) printf("WARNING %s failed at %s:%d\n", "dma_resv_lock(bo->tbo.base.resv, ((void *)0))" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c", 547); __builtin_expect(!!(__ret), 0); }); | ||||
548 | resv = bp.resv; | ||||
549 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | ||||
550 | bp.size = amdgpu_vm_pt_size(adev, level); | ||||
551 | bp.domain = AMDGPU_GEM_DOMAIN_GTT0x2; | ||||
552 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | ||||
553 | bp.type = ttm_bo_type_kernel; | ||||
554 | bp.resv = bo->tbo.base.resv; | ||||
555 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); | ||||
556 | |||||
557 | r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); | ||||
558 | |||||
559 | if (!resv) | ||||
560 | dma_resv_unlock(bo->tbo.base.resv); | ||||
561 | |||||
562 | if (r) { | ||||
563 | amdgpu_bo_unref(&bo); | ||||
564 | return r; | ||||
565 | } | ||||
566 | |||||
567 | amdgpu_bo_add_to_shadow_list(*vmbo); | ||||
568 | |||||
569 | return 0; | ||||
570 | } | ||||
571 | |||||
572 | /** | ||||
573 | * amdgpu_vm_pt_alloc - Allocate a specific page table | ||||
574 | * | ||||
575 | * @adev: amdgpu_device pointer | ||||
576 | * @vm: VM to allocate page tables for | ||||
577 | * @cursor: Which page table to allocate | ||||
578 | * @immediate: use an immediate update | ||||
579 | * | ||||
580 | * Make sure a specific page table or directory is allocated. | ||||
581 | * | ||||
582 | * Returns: | ||||
583 | * 1 if page table needed to be allocated, 0 if page table was already | ||||
584 | * allocated, negative errno if an error occurred. | ||||
585 | */ | ||||
586 | static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, | ||||
587 | struct amdgpu_vm *vm, | ||||
588 | struct amdgpu_vm_pt_cursor *cursor, | ||||
589 | bool_Bool immediate) | ||||
590 | { | ||||
591 | struct amdgpu_vm_bo_base *entry = cursor->entry; | ||||
592 | struct amdgpu_bo *pt_bo; | ||||
593 | struct amdgpu_bo_vm *pt; | ||||
594 | int r; | ||||
595 | |||||
596 | if (entry->bo) | ||||
597 | return 0; | ||||
598 | |||||
599 | amdgpu_vm_eviction_unlock(vm); | ||||
600 | r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); | ||||
601 | amdgpu_vm_eviction_lock(vm); | ||||
602 | if (r) | ||||
603 | return r; | ||||
604 | |||||
605 | /* Keep a reference to the root directory to avoid | ||||
606 | * freeing them up in the wrong order. | ||||
607 | */ | ||||
608 | pt_bo = &pt->bo; | ||||
609 | pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); | ||||
610 | amdgpu_vm_bo_base_init(entry, vm, pt_bo); | ||||
611 | r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); | ||||
612 | if (r) | ||||
613 | goto error_free_pt; | ||||
614 | |||||
615 | return 0; | ||||
616 | |||||
617 | error_free_pt: | ||||
618 | amdgpu_bo_unref(&pt->shadow); | ||||
619 | amdgpu_bo_unref(&pt_bo); | ||||
620 | return r; | ||||
621 | } | ||||
622 | |||||
623 | /** | ||||
624 | * amdgpu_vm_pt_free - free one PD/PT | ||||
625 | * | ||||
626 | * @entry: PDE to free | ||||
627 | */ | ||||
628 | static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) | ||||
629 | { | ||||
630 | struct amdgpu_bo *shadow; | ||||
631 | |||||
632 | if (!entry->bo) | ||||
633 | return; | ||||
634 | |||||
635 | entry->bo->vm_bo = NULL((void *)0); | ||||
636 | shadow = amdgpu_bo_shadowed(entry->bo); | ||||
637 | if (shadow) { | ||||
638 | ttm_bo_set_bulk_move(&shadow->tbo, NULL((void *)0)); | ||||
639 | amdgpu_bo_unref(&shadow); | ||||
640 | } | ||||
641 | ttm_bo_set_bulk_move(&entry->bo->tbo, NULL((void *)0)); | ||||
642 | |||||
643 | spin_lock(&entry->vm->status_lock)mtx_enter(&entry->vm->status_lock); | ||||
644 | list_del(&entry->vm_status); | ||||
645 | spin_unlock(&entry->vm->status_lock)mtx_leave(&entry->vm->status_lock); | ||||
646 | amdgpu_bo_unref(&entry->bo); | ||||
647 | } | ||||
648 | |||||
649 | void amdgpu_vm_pt_free_work(struct work_struct *work) | ||||
650 | { | ||||
651 | struct amdgpu_vm_bo_base *entry, *next; | ||||
652 | struct amdgpu_vm *vm; | ||||
653 | DRM_LIST_HEAD(pt_freed)struct list_head pt_freed = { &(pt_freed), &(pt_freed ) }; | ||||
654 | |||||
655 | vm = container_of(work, struct amdgpu_vm, pt_free_work)({ const __typeof( ((struct amdgpu_vm *)0)->pt_free_work ) *__mptr = (work); (struct amdgpu_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_vm, pt_free_work) );}); | ||||
656 | |||||
657 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); | ||||
658 | list_splice_init(&vm->pt_freed, &pt_freed); | ||||
659 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); | ||||
660 | |||||
661 | /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ | ||||
662 | amdgpu_bo_reserve(vm->root.bo, true1); | ||||
663 | |||||
664 | list_for_each_entry_safe(entry, next, &pt_freed, vm_status)for (entry = ({ const __typeof( ((__typeof(*entry) *)0)->vm_status ) *__mptr = ((&pt_freed)->next); (__typeof(*entry) *) ( (char *)__mptr - __builtin_offsetof(__typeof(*entry), vm_status ) );}), next = ({ const __typeof( ((__typeof(*entry) *)0)-> vm_status ) *__mptr = (entry->vm_status.next); (__typeof(* entry) *)( (char *)__mptr - __builtin_offsetof(__typeof(*entry ), vm_status) );}); &entry->vm_status != (&pt_freed ); entry = next, next = ({ const __typeof( ((__typeof(*next) * )0)->vm_status ) *__mptr = (next->vm_status.next); (__typeof (*next) *)( (char *)__mptr - __builtin_offsetof(__typeof(*next ), vm_status) );})) | ||||
665 | amdgpu_vm_pt_free(entry); | ||||
666 | |||||
667 | amdgpu_bo_unreserve(vm->root.bo); | ||||
668 | } | ||||
669 | |||||
670 | /** | ||||
671 | * amdgpu_vm_pt_free_dfs - free PD/PT levels | ||||
672 | * | ||||
673 | * @adev: amdgpu device structure | ||||
674 | * @vm: amdgpu vm structure | ||||
675 | * @start: optional cursor where to start freeing PDs/PTs | ||||
676 | * | ||||
677 | * Free the page directory or page table level and all sub levels. | ||||
678 | */ | ||||
679 | static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, | ||||
680 | struct amdgpu_vm *vm, | ||||
681 | struct amdgpu_vm_pt_cursor *start, | ||||
682 | bool_Bool unlocked) | ||||
683 | { | ||||
684 | struct amdgpu_vm_pt_cursor cursor; | ||||
685 | struct amdgpu_vm_bo_base *entry; | ||||
686 | |||||
687 | if (unlocked
| ||||
688 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); | ||||
689 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | ||||
690 | list_move(&entry->vm_status, &vm->pt_freed); | ||||
691 | |||||
692 | if (start) | ||||
693 | list_move(&start->entry->vm_status, &vm->pt_freed); | ||||
694 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); | ||||
695 | schedule_work(&vm->pt_free_work); | ||||
696 | return; | ||||
697 | } | ||||
698 | |||||
699 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | ||||
700 | amdgpu_vm_pt_free(entry); | ||||
701 | |||||
702 | if (start) | ||||
703 | amdgpu_vm_pt_free(start->entry); | ||||
704 | } | ||||
705 | |||||
706 | /** | ||||
707 | * amdgpu_vm_pt_free_root - free root PD | ||||
708 | * @adev: amdgpu device structure | ||||
709 | * @vm: amdgpu vm structure | ||||
710 | * | ||||
711 | * Free the root page directory and everything below it. | ||||
712 | */ | ||||
713 | void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) | ||||
714 | { | ||||
715 | amdgpu_vm_pt_free_dfs(adev, vm, NULL((void *)0), false0); | ||||
| |||||
716 | } | ||||
717 | |||||
718 | /** | ||||
719 | * amdgpu_vm_pt_is_root_clean - check if a root PD is clean | ||||
720 | * | ||||
721 | * @adev: amdgpu_device pointer | ||||
722 | * @vm: the VM to check | ||||
723 | * | ||||
724 | * Check all entries of the root PD, if any subsequent PDs are allocated, | ||||
725 | * it means there are page table creating and filling, and is no a clean | ||||
726 | * VM | ||||
727 | * | ||||
728 | * Returns: | ||||
729 | * 0 if this VM is clean | ||||
730 | */ | ||||
731 | bool_Bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, | ||||
732 | struct amdgpu_vm *vm) | ||||
733 | { | ||||
734 | enum amdgpu_vm_level root = adev->vm_manager.root_level; | ||||
735 | unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); | ||||
736 | unsigned int i = 0; | ||||
737 | |||||
738 | for (i = 0; i < entries; i++) { | ||||
739 | if (to_amdgpu_bo_vm(vm->root.bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((vm->root.bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries[i].bo) | ||||
740 | return false0; | ||||
741 | } | ||||
742 | return true1; | ||||
743 | } | ||||
744 | |||||
745 | /** | ||||
746 | * amdgpu_vm_pde_update - update a single level in the hierarchy | ||||
747 | * | ||||
748 | * @params: parameters for the update | ||||
749 | * @entry: entry to update | ||||
750 | * | ||||
751 | * Makes sure the requested entry in parent is up to date. | ||||
752 | */ | ||||
753 | int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, | ||||
754 | struct amdgpu_vm_bo_base *entry) | ||||
755 | { | ||||
756 | struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); | ||||
757 | struct amdgpu_bo *bo = parent->bo, *pbo; | ||||
758 | struct amdgpu_vm *vm = params->vm; | ||||
759 | uint64_t pde, pt, flags; | ||||
760 | unsigned int level; | ||||
761 | |||||
762 | for (level = 0, pbo = bo->parent; pbo; ++level) | ||||
763 | pbo = pbo->parent; | ||||
764 | |||||
765 | level += params->adev->vm_manager.root_level; | ||||
766 | amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); | ||||
767 | pde = (entry - to_amdgpu_bo_vm(parent->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((parent->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries) * 8; | ||||
768 | return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_vm, bo) );}), pde, pt, | ||||
769 | 1, 0, flags); | ||||
770 | } | ||||
771 | |||||
772 | /* | ||||
773 | * amdgpu_vm_pte_update_flags - figure out flags for PTE updates | ||||
774 | * | ||||
775 | * Make sure to set the right flags for the PTEs at the desired level. | ||||
776 | */ | ||||
777 | static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, | ||||
778 | struct amdgpu_bo_vm *pt, | ||||
779 | unsigned int level, | ||||
780 | uint64_t pe, uint64_t addr, | ||||
781 | unsigned int count, uint32_t incr, | ||||
782 | uint64_t flags) | ||||
783 | |||||
784 | { | ||||
785 | if (level != AMDGPU_VM_PTB) { | ||||
786 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | ||||
787 | amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags)(params->adev)->gmc.gmc_funcs->get_vm_pde((params-> adev), (level), (&addr), (&flags)); | ||||
788 | |||||
789 | } else if (params->adev->asic_type >= CHIP_VEGA10 && | ||||
790 | !(flags & AMDGPU_PTE_VALID(1ULL << 0)) && | ||||
791 | !(flags & AMDGPU_PTE_PRT(1ULL << 51))) { | ||||
792 | |||||
793 | /* Workaround for fault priority problem on GMC9 */ | ||||
794 | flags |= AMDGPU_PTE_EXECUTABLE(1ULL << 4); | ||||
795 | } | ||||
796 | |||||
797 | params->vm->update_funcs->update(params, pt, pe, addr, count, incr, | ||||
798 | flags); | ||||
799 | } | ||||
800 | |||||
801 | /** | ||||
802 | * amdgpu_vm_pte_fragment - get fragment for PTEs | ||||
803 | * | ||||
804 | * @params: see amdgpu_vm_update_params definition | ||||
805 | * @start: first PTE to handle | ||||
806 | * @end: last PTE to handle | ||||
807 | * @flags: hw mapping flags | ||||
808 | * @frag: resulting fragment size | ||||
809 | * @frag_end: end of this fragment | ||||
810 | * | ||||
811 | * Returns the first possible fragment for the start and end address. | ||||
812 | */ | ||||
813 | static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, | ||||
814 | uint64_t start, uint64_t end, uint64_t flags, | ||||
815 | unsigned int *frag, uint64_t *frag_end) | ||||
816 | { | ||||
817 | /** | ||||
818 | * The MC L1 TLB supports variable sized pages, based on a fragment | ||||
819 | * field in the PTE. When this field is set to a non-zero value, page | ||||
820 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | ||||
821 | * flags are considered valid for all PTEs within the fragment range | ||||
822 | * and corresponding mappings are assumed to be physically contiguous. | ||||
823 | * | ||||
824 | * The L1 TLB can store a single PTE for the whole fragment, | ||||
825 | * significantly increasing the space available for translation | ||||
826 | * caching. This leads to large improvements in throughput when the | ||||
827 | * TLB is under pressure. | ||||
828 | * | ||||
829 | * The L2 TLB distributes small and large fragments into two | ||||
830 | * asymmetric partitions. The large fragment cache is significantly | ||||
831 | * larger. Thus, we try to use large fragments wherever possible. | ||||
832 | * Userspace can support this by aligning virtual base address and | ||||
833 | * allocation size to the fragment size. | ||||
834 | * | ||||
835 | * Starting with Vega10 the fragment size only controls the L1. The L2 | ||||
836 | * is now directly feed with small/huge/giant pages from the walker. | ||||
837 | */ | ||||
838 | unsigned int max_frag; | ||||
839 | |||||
840 | if (params->adev->asic_type < CHIP_VEGA10) | ||||
841 | max_frag = params->adev->vm_manager.fragment_size; | ||||
842 | else | ||||
843 | max_frag = 31; | ||||
844 | |||||
845 | /* system pages are non continuously */ | ||||
846 | if (params->pages_addr) { | ||||
847 | *frag = 0; | ||||
848 | *frag_end = end; | ||||
849 | return; | ||||
850 | } | ||||
851 | |||||
852 | /* This intentionally wraps around if no bit is set */ | ||||
853 | *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1)({ unsigned int __min_a = (ffs(start) - 1); unsigned int __min_b = (fls64(end - start) - 1); __min_a < __min_b ? __min_a : __min_b; }); | ||||
854 | if (*frag >= max_frag) { | ||||
855 | *frag = max_frag; | ||||
856 | *frag_end = end & ~((1ULL << max_frag) - 1); | ||||
857 | } else { | ||||
858 | *frag_end = start + (1 << *frag); | ||||
859 | } | ||||
860 | } | ||||
861 | |||||
862 | /** | ||||
863 | * amdgpu_vm_ptes_update - make sure that page tables are valid | ||||
864 | * | ||||
865 | * @params: see amdgpu_vm_update_params definition | ||||
866 | * @start: start of GPU address range | ||||
867 | * @end: end of GPU address range | ||||
868 | * @dst: destination address to map to, the next dst inside the function | ||||
869 | * @flags: mapping flags | ||||
870 | * | ||||
871 | * Update the page tables in the range @start - @end. | ||||
872 | * | ||||
873 | * Returns: | ||||
874 | * 0 for success, -EINVAL for failure. | ||||
875 | */ | ||||
876 | int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, | ||||
877 | uint64_t start, uint64_t end, | ||||
878 | uint64_t dst, uint64_t flags) | ||||
879 | { | ||||
880 | struct amdgpu_device *adev = params->adev; | ||||
881 | struct amdgpu_vm_pt_cursor cursor; | ||||
882 | uint64_t frag_start = start, frag_end; | ||||
883 | unsigned int frag; | ||||
884 | int r; | ||||
885 | |||||
886 | /* figure out the initial fragment */ | ||||
887 | amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, | ||||
888 | &frag_end); | ||||
889 | |||||
890 | /* walk over the address space and update the PTs */ | ||||
891 | amdgpu_vm_pt_start(adev, params->vm, start, &cursor); | ||||
892 | while (cursor.pfn < end) { | ||||
893 | unsigned int shift, parent_shift, mask; | ||||
894 | uint64_t incr, entry_end, pe_start; | ||||
895 | struct amdgpu_bo *pt; | ||||
896 | |||||
897 | if (!params->unlocked) { | ||||
898 | /* make sure that the page tables covering the | ||||
899 | * address range are actually allocated | ||||
900 | */ | ||||
901 | r = amdgpu_vm_pt_alloc(params->adev, params->vm, | ||||
902 | &cursor, params->immediate); | ||||
903 | if (r) | ||||
904 | return r; | ||||
905 | } | ||||
906 | |||||
907 | shift = amdgpu_vm_pt_level_shift(adev, cursor.level); | ||||
908 | parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); | ||||
909 | if (params->unlocked) { | ||||
910 | /* Unlocked updates are only allowed on the leaves */ | ||||
911 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | ||||
912 | continue; | ||||
913 | } else if (adev->asic_type < CHIP_VEGA10 && | ||||
914 | (flags & AMDGPU_PTE_VALID(1ULL << 0))) { | ||||
915 | /* No huge page support before GMC v9 */ | ||||
916 | if (cursor.level != AMDGPU_VM_PTB) { | ||||
917 | if (!amdgpu_vm_pt_descendant(adev, &cursor)) | ||||
918 | return -ENOENT2; | ||||
919 | continue; | ||||
920 | } | ||||
921 | } else if (frag < shift) { | ||||
922 | /* We can't use this level when the fragment size is | ||||
923 | * smaller than the address shift. Go to the next | ||||
924 | * child entry and try again. | ||||
925 | */ | ||||
926 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | ||||
927 | continue; | ||||
928 | } else if (frag >= parent_shift) { | ||||
929 | /* If the fragment size is even larger than the parent | ||||
930 | * shift we should go up one level and check it again. | ||||
931 | */ | ||||
932 | if (!amdgpu_vm_pt_ancestor(&cursor)) | ||||
933 | return -EINVAL22; | ||||
934 | continue; | ||||
935 | } | ||||
936 | |||||
937 | pt = cursor.entry->bo; | ||||
938 | if (!pt) { | ||||
939 | /* We need all PDs and PTs for mapping something, */ | ||||
940 | if (flags & AMDGPU_PTE_VALID(1ULL << 0)) | ||||
941 | return -ENOENT2; | ||||
942 | |||||
943 | /* but unmapping something can happen at a higher | ||||
944 | * level. | ||||
945 | */ | ||||
946 | if (!amdgpu_vm_pt_ancestor(&cursor)) | ||||
947 | return -EINVAL22; | ||||
948 | |||||
949 | pt = cursor.entry->bo; | ||||
950 | shift = parent_shift; | ||||
951 | frag_end = max(frag_end, roundup2(frag_start + 1,(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))) | ||||
952 | 1ULL << shift))(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))); | ||||
953 | } | ||||
954 | |||||
955 | /* Looks good so far, calculate parameters for the update */ | ||||
956 | incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE4096 << shift; | ||||
957 | mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); | ||||
958 | pe_start = ((cursor.pfn >> shift) & mask) * 8; | ||||
959 | entry_end = ((uint64_t)mask + 1) << shift; | ||||
960 | entry_end += cursor.pfn & ~(entry_end - 1); | ||||
961 | entry_end = min(entry_end, end)(((entry_end)<(end))?(entry_end):(end)); | ||||
962 | |||||
963 | do { | ||||
964 | struct amdgpu_vm *vm = params->vm; | ||||
965 | uint64_t upd_end = min(entry_end, frag_end)(((entry_end)<(frag_end))?(entry_end):(frag_end)); | ||||
966 | unsigned int nptes = (upd_end - frag_start) >> shift; | ||||
967 | uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag)((frag & 0x1fULL) << 7); | ||||
968 | |||||
969 | /* This can happen when we set higher level PDs to | ||||
970 | * silent to stop fault floods. | ||||
971 | */ | ||||
972 | nptes = max(nptes, 1u)(((nptes)>(1u))?(nptes):(1u)); | ||||
973 | |||||
974 | trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, | ||||
975 | min(nptes, 32u)(((nptes)<(32u))?(nptes):(32u)), dst, incr, | ||||
976 | upd_flags, | ||||
977 | vm->task_info.tgid, | ||||
978 | vm->immediate.fence_context); | ||||
979 | amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((pt)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_vm, bo) );}), | ||||
980 | cursor.level, pe_start, dst, | ||||
981 | nptes, incr, upd_flags); | ||||
982 | |||||
983 | pe_start += nptes * 8; | ||||
984 | dst += nptes * incr; | ||||
985 | |||||
986 | frag_start = upd_end; | ||||
987 | if (frag_start >= frag_end) { | ||||
988 | /* figure out the next fragment */ | ||||
989 | amdgpu_vm_pte_fragment(params, frag_start, end, | ||||
990 | flags, &frag, &frag_end); | ||||
991 | if (frag < shift) | ||||
992 | break; | ||||
993 | } | ||||
994 | } while (frag_start < entry_end); | ||||
995 | |||||
996 | if (amdgpu_vm_pt_descendant(adev, &cursor)) { | ||||
997 | /* Free all child entries. | ||||
998 | * Update the tables with the flags and addresses and free up subsequent | ||||
999 | * tables in the case of huge pages or freed up areas. | ||||
1000 | * This is the maximum you can free, because all other page tables are not | ||||
1001 | * completely covered by the range and so potentially still in use. | ||||
1002 | */ | ||||
1003 | while (cursor.pfn < frag_start) { | ||||
1004 | /* Make sure previous mapping is freed */ | ||||
1005 | if (cursor.entry->bo) { | ||||
1006 | params->table_freed = true1; | ||||
1007 | amdgpu_vm_pt_free_dfs(adev, params->vm, | ||||
1008 | &cursor, | ||||
1009 | params->unlocked); | ||||
1010 | } | ||||
1011 | amdgpu_vm_pt_next(adev, &cursor); | ||||
1012 | } | ||||
1013 | |||||
1014 | } else if (frag >= shift) { | ||||
1015 | /* or just move on to the next on the same level. */ | ||||
1016 | amdgpu_vm_pt_next(adev, &cursor); | ||||
1017 | } | ||||
1018 | } | ||||
1019 | |||||
1020 | return 0; | ||||
1021 | } |