File: | dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c |
Warning: | line 82, column 10 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long long' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // SPDX-License-Identifier: GPL-2.0 OR MIT | |||
2 | /* | |||
3 | * Copyright 2022 Advanced Micro Devices, Inc. | |||
4 | * | |||
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
6 | * copy of this software and associated documentation files (the "Software"), | |||
7 | * to deal in the Software without restriction, including without limitation | |||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
9 | * and/or sell copies of the Software, and to permit persons to whom the | |||
10 | * Software is furnished to do so, subject to the following conditions: | |||
11 | * | |||
12 | * The above copyright notice and this permission notice shall be included in | |||
13 | * all copies or substantial portions of the Software. | |||
14 | * | |||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
21 | * OTHER DEALINGS IN THE SOFTWARE. | |||
22 | */ | |||
23 | ||||
24 | #include <drm/drm_drv.h> | |||
25 | ||||
26 | #include "amdgpu.h" | |||
27 | #include "amdgpu_trace.h" | |||
28 | #include "amdgpu_vm.h" | |||
29 | ||||
30 | /* | |||
31 | * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt | |||
32 | */ | |||
33 | struct amdgpu_vm_pt_cursor { | |||
34 | uint64_t pfn; | |||
35 | struct amdgpu_vm_bo_base *parent; | |||
36 | struct amdgpu_vm_bo_base *entry; | |||
37 | unsigned int level; | |||
38 | }; | |||
39 | ||||
40 | /** | |||
41 | * amdgpu_vm_pt_level_shift - return the addr shift for each level | |||
42 | * | |||
43 | * @adev: amdgpu_device pointer | |||
44 | * @level: VMPT level | |||
45 | * | |||
46 | * Returns: | |||
47 | * The number of bits the pfn needs to be right shifted for a level. | |||
48 | */ | |||
49 | static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, | |||
50 | unsigned int level) | |||
51 | { | |||
52 | switch (level) { | |||
53 | case AMDGPU_VM_PDB2: | |||
54 | case AMDGPU_VM_PDB1: | |||
55 | case AMDGPU_VM_PDB0: | |||
56 | return 9 * (AMDGPU_VM_PDB0 - level) + | |||
57 | adev->vm_manager.block_size; | |||
58 | case AMDGPU_VM_PTB: | |||
59 | return 0; | |||
60 | default: | |||
61 | return ~0; | |||
62 | } | |||
63 | } | |||
64 | ||||
65 | /** | |||
66 | * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT | |||
67 | * | |||
68 | * @adev: amdgpu_device pointer | |||
69 | * @level: VMPT level | |||
70 | * | |||
71 | * Returns: | |||
72 | * The number of entries in a page directory or page table. | |||
73 | */ | |||
74 | static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, | |||
75 | unsigned int level) | |||
76 | { | |||
77 | unsigned int shift; | |||
78 | ||||
79 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | |||
80 | if (level
| |||
81 | /* For the root directory */ | |||
82 | return round_up(adev->vm_manager.max_pfn, 1ULL << shift)((((adev->vm_manager.max_pfn) + ((1ULL << shift) - 1 )) / (1ULL << shift)) * (1ULL << shift)) | |||
| ||||
83 | >> shift; | |||
84 | else if (level != AMDGPU_VM_PTB) | |||
85 | /* Everything in between */ | |||
86 | return 512; | |||
87 | ||||
88 | /* For the page tables on the leaves */ | |||
89 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size); | |||
90 | } | |||
91 | ||||
92 | /** | |||
93 | * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD | |||
94 | * | |||
95 | * @adev: amdgpu_device pointer | |||
96 | * | |||
97 | * Returns: | |||
98 | * The number of entries in the root page directory which needs the ATS setting. | |||
99 | */ | |||
100 | static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) | |||
101 | { | |||
102 | unsigned int shift; | |||
103 | ||||
104 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | |||
105 | return AMDGPU_GMC_HOLE_START0x0000800000000000ULL >> (shift + AMDGPU_GPU_PAGE_SHIFT12); | |||
106 | } | |||
107 | ||||
108 | /** | |||
109 | * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT | |||
110 | * | |||
111 | * @adev: amdgpu_device pointer | |||
112 | * @level: VMPT level | |||
113 | * | |||
114 | * Returns: | |||
115 | * The mask to extract the entry number of a PD/PT from an address. | |||
116 | */ | |||
117 | static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, | |||
118 | unsigned int level) | |||
119 | { | |||
120 | if (level <= adev->vm_manager.root_level) | |||
121 | return 0xffffffff; | |||
122 | else if (level != AMDGPU_VM_PTB) | |||
123 | return 0x1ff; | |||
124 | else | |||
125 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size) - 1; | |||
126 | } | |||
127 | ||||
128 | /** | |||
129 | * amdgpu_vm_pt_size - returns the size of the page table in bytes | |||
130 | * | |||
131 | * @adev: amdgpu_device pointer | |||
132 | * @level: VMPT level | |||
133 | * | |||
134 | * Returns: | |||
135 | * The size of the BO for a page directory or page table in bytes. | |||
136 | */ | |||
137 | static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, | |||
138 | unsigned int level) | |||
139 | { | |||
140 | return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8)(((amdgpu_vm_pt_num_entries(adev, level) * 8) + (4096 - 1)) & ~(4096 - 1)); | |||
141 | } | |||
142 | ||||
143 | /** | |||
144 | * amdgpu_vm_pt_parent - get the parent page directory | |||
145 | * | |||
146 | * @pt: child page table | |||
147 | * | |||
148 | * Helper to get the parent entry for the child page table. NULL if we are at | |||
149 | * the root page directory. | |||
150 | */ | |||
151 | static struct amdgpu_vm_bo_base * | |||
152 | amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) | |||
153 | { | |||
154 | struct amdgpu_bo *parent = pt->bo->parent; | |||
155 | ||||
156 | if (!parent) | |||
157 | return NULL((void *)0); | |||
158 | ||||
159 | return parent->vm_bo; | |||
160 | } | |||
161 | ||||
162 | /** | |||
163 | * amdgpu_vm_pt_start - start PD/PT walk | |||
164 | * | |||
165 | * @adev: amdgpu_device pointer | |||
166 | * @vm: amdgpu_vm structure | |||
167 | * @start: start address of the walk | |||
168 | * @cursor: state to initialize | |||
169 | * | |||
170 | * Initialize a amdgpu_vm_pt_cursor to start a walk. | |||
171 | */ | |||
172 | static void amdgpu_vm_pt_start(struct amdgpu_device *adev, | |||
173 | struct amdgpu_vm *vm, uint64_t start, | |||
174 | struct amdgpu_vm_pt_cursor *cursor) | |||
175 | { | |||
176 | cursor->pfn = start; | |||
177 | cursor->parent = NULL((void *)0); | |||
178 | cursor->entry = &vm->root; | |||
179 | cursor->level = adev->vm_manager.root_level; | |||
180 | } | |||
181 | ||||
182 | /** | |||
183 | * amdgpu_vm_pt_descendant - go to child node | |||
184 | * | |||
185 | * @adev: amdgpu_device pointer | |||
186 | * @cursor: current state | |||
187 | * | |||
188 | * Walk to the child node of the current node. | |||
189 | * Returns: | |||
190 | * True if the walk was possible, false otherwise. | |||
191 | */ | |||
192 | static bool_Bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, | |||
193 | struct amdgpu_vm_pt_cursor *cursor) | |||
194 | { | |||
195 | unsigned int mask, shift, idx; | |||
196 | ||||
197 | if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || | |||
198 | !cursor->entry->bo) | |||
199 | return false0; | |||
200 | ||||
201 | mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); | |||
202 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level); | |||
203 | ||||
204 | ++cursor->level; | |||
205 | idx = (cursor->pfn >> shift) & mask; | |||
206 | cursor->parent = cursor->entry; | |||
207 | cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((cursor->entry->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries[idx]; | |||
208 | return true1; | |||
209 | } | |||
210 | ||||
211 | /** | |||
212 | * amdgpu_vm_pt_sibling - go to sibling node | |||
213 | * | |||
214 | * @adev: amdgpu_device pointer | |||
215 | * @cursor: current state | |||
216 | * | |||
217 | * Walk to the sibling node of the current node. | |||
218 | * Returns: | |||
219 | * True if the walk was possible, false otherwise. | |||
220 | */ | |||
221 | static bool_Bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, | |||
222 | struct amdgpu_vm_pt_cursor *cursor) | |||
223 | { | |||
224 | ||||
225 | unsigned int shift, num_entries; | |||
226 | struct amdgpu_bo_vm *parent; | |||
227 | ||||
228 | /* Root doesn't have a sibling */ | |||
229 | if (!cursor->parent) | |||
230 | return false0; | |||
231 | ||||
232 | /* Go to our parents and see if we got a sibling */ | |||
233 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); | |||
234 | num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); | |||
235 | parent = to_amdgpu_bo_vm(cursor->parent->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((cursor->parent->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );}); | |||
236 | ||||
237 | if (cursor->entry == &parent->entries[num_entries - 1]) | |||
238 | return false0; | |||
239 | ||||
240 | cursor->pfn += 1ULL << shift; | |||
241 | cursor->pfn &= ~((1ULL << shift) - 1); | |||
242 | ++cursor->entry; | |||
243 | return true1; | |||
244 | } | |||
245 | ||||
246 | /** | |||
247 | * amdgpu_vm_pt_ancestor - go to parent node | |||
248 | * | |||
249 | * @cursor: current state | |||
250 | * | |||
251 | * Walk to the parent node of the current node. | |||
252 | * Returns: | |||
253 | * True if the walk was possible, false otherwise. | |||
254 | */ | |||
255 | static bool_Bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) | |||
256 | { | |||
257 | if (!cursor->parent) | |||
258 | return false0; | |||
259 | ||||
260 | --cursor->level; | |||
261 | cursor->entry = cursor->parent; | |||
262 | cursor->parent = amdgpu_vm_pt_parent(cursor->parent); | |||
263 | return true1; | |||
264 | } | |||
265 | ||||
266 | /** | |||
267 | * amdgpu_vm_pt_next - get next PD/PT in hieratchy | |||
268 | * | |||
269 | * @adev: amdgpu_device pointer | |||
270 | * @cursor: current state | |||
271 | * | |||
272 | * Walk the PD/PT tree to the next node. | |||
273 | */ | |||
274 | static void amdgpu_vm_pt_next(struct amdgpu_device *adev, | |||
275 | struct amdgpu_vm_pt_cursor *cursor) | |||
276 | { | |||
277 | /* First try a newborn child */ | |||
278 | if (amdgpu_vm_pt_descendant(adev, cursor)) | |||
279 | return; | |||
280 | ||||
281 | /* If that didn't worked try to find a sibling */ | |||
282 | while (!amdgpu_vm_pt_sibling(adev, cursor)) { | |||
283 | /* No sibling, go to our parents and grandparents */ | |||
284 | if (!amdgpu_vm_pt_ancestor(cursor)) { | |||
285 | cursor->pfn = ~0ll; | |||
286 | return; | |||
287 | } | |||
288 | } | |||
289 | } | |||
290 | ||||
291 | /** | |||
292 | * amdgpu_vm_pt_first_dfs - start a deep first search | |||
293 | * | |||
294 | * @adev: amdgpu_device structure | |||
295 | * @vm: amdgpu_vm structure | |||
296 | * @start: optional cursor to start with | |||
297 | * @cursor: state to initialize | |||
298 | * | |||
299 | * Starts a deep first traversal of the PD/PT tree. | |||
300 | */ | |||
301 | static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, | |||
302 | struct amdgpu_vm *vm, | |||
303 | struct amdgpu_vm_pt_cursor *start, | |||
304 | struct amdgpu_vm_pt_cursor *cursor) | |||
305 | { | |||
306 | if (start) | |||
307 | *cursor = *start; | |||
308 | else | |||
309 | amdgpu_vm_pt_start(adev, vm, 0, cursor); | |||
310 | ||||
311 | while (amdgpu_vm_pt_descendant(adev, cursor)) | |||
312 | ; | |||
313 | } | |||
314 | ||||
315 | /** | |||
316 | * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue | |||
317 | * | |||
318 | * @start: starting point for the search | |||
319 | * @entry: current entry | |||
320 | * | |||
321 | * Returns: | |||
322 | * True when the search should continue, false otherwise. | |||
323 | */ | |||
324 | static bool_Bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, | |||
325 | struct amdgpu_vm_bo_base *entry) | |||
326 | { | |||
327 | return entry && (!start || entry != start->entry); | |||
328 | } | |||
329 | ||||
330 | /** | |||
331 | * amdgpu_vm_pt_next_dfs - get the next node for a deep first search | |||
332 | * | |||
333 | * @adev: amdgpu_device structure | |||
334 | * @cursor: current state | |||
335 | * | |||
336 | * Move the cursor to the next node in a deep first search. | |||
337 | */ | |||
338 | static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, | |||
339 | struct amdgpu_vm_pt_cursor *cursor) | |||
340 | { | |||
341 | if (!cursor->entry) | |||
342 | return; | |||
343 | ||||
344 | if (!cursor->parent) | |||
345 | cursor->entry = NULL((void *)0); | |||
346 | else if (amdgpu_vm_pt_sibling(adev, cursor)) | |||
347 | while (amdgpu_vm_pt_descendant(adev, cursor)) | |||
348 | ; | |||
349 | else | |||
350 | amdgpu_vm_pt_ancestor(cursor); | |||
351 | } | |||
352 | ||||
353 | /* | |||
354 | * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs | |||
355 | */ | |||
356 | #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) \ | |||
357 | for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ | |||
358 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ | |||
359 | amdgpu_vm_pt_continue_dfs((start), (entry)); \ | |||
360 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) | |||
361 | ||||
362 | /** | |||
363 | * amdgpu_vm_pt_clear - initially clear the PDs/PTs | |||
364 | * | |||
365 | * @adev: amdgpu_device pointer | |||
366 | * @vm: VM to clear BO from | |||
367 | * @vmbo: BO to clear | |||
368 | * @immediate: use an immediate update | |||
369 | * | |||
370 | * Root PD needs to be reserved when calling this. | |||
371 | * | |||
372 | * Returns: | |||
373 | * 0 on success, errno otherwise. | |||
374 | */ | |||
375 | int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
376 | struct amdgpu_bo_vm *vmbo, bool_Bool immediate) | |||
377 | { | |||
378 | unsigned int level = adev->vm_manager.root_level; | |||
379 | struct ttm_operation_ctx ctx = { true1, false0 }; | |||
380 | struct amdgpu_vm_update_params params; | |||
381 | struct amdgpu_bo *ancestor = &vmbo->bo; | |||
382 | unsigned int entries, ats_entries; | |||
383 | struct amdgpu_bo *bo = &vmbo->bo; | |||
384 | uint64_t addr; | |||
385 | int r, idx; | |||
386 | ||||
387 | /* Figure out our place in the hierarchy */ | |||
388 | if (ancestor->parent) { | |||
389 | ++level; | |||
390 | while (ancestor->parent->parent) { | |||
391 | ++level; | |||
392 | ancestor = ancestor->parent; | |||
393 | } | |||
394 | } | |||
395 | ||||
396 | entries = amdgpu_bo_size(bo) / 8; | |||
397 | if (!vm->pte_support_ats) { | |||
398 | ats_entries = 0; | |||
399 | ||||
400 | } else if (!bo->parent) { | |||
401 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | |||
402 | ats_entries = min(ats_entries, entries)(((ats_entries)<(entries))?(ats_entries):(entries)); | |||
403 | entries -= ats_entries; | |||
404 | ||||
405 | } else { | |||
406 | struct amdgpu_vm_bo_base *pt; | |||
407 | ||||
408 | pt = ancestor->vm_bo; | |||
409 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | |||
410 | if ((pt - to_amdgpu_bo_vm(vm->root.bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((vm->root.bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries) >= | |||
411 | ats_entries) { | |||
412 | ats_entries = 0; | |||
413 | } else { | |||
414 | ats_entries = entries; | |||
415 | entries = 0; | |||
416 | } | |||
417 | } | |||
418 | ||||
419 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | |||
420 | if (r) | |||
421 | return r; | |||
422 | ||||
423 | if (vmbo->shadow) { | |||
424 | struct amdgpu_bo *shadow = vmbo->shadow; | |||
425 | ||||
426 | r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); | |||
427 | if (r) | |||
428 | return r; | |||
429 | } | |||
430 | ||||
431 | if (!drm_dev_enter(adev_to_drm(adev), &idx)) | |||
432 | return -ENODEV19; | |||
433 | ||||
434 | r = vm->update_funcs->map_table(vmbo); | |||
435 | if (r) | |||
436 | goto exit; | |||
437 | ||||
438 | memset(¶ms, 0, sizeof(params))__builtin_memset((¶ms), (0), (sizeof(params))); | |||
439 | params.adev = adev; | |||
440 | params.vm = vm; | |||
441 | params.immediate = immediate; | |||
442 | ||||
443 | r = vm->update_funcs->prepare(¶ms, NULL((void *)0), AMDGPU_SYNC_EXPLICIT); | |||
444 | if (r) | |||
445 | goto exit; | |||
446 | ||||
447 | addr = 0; | |||
448 | if (ats_entries) { | |||
449 | uint64_t value = 0, flags; | |||
450 | ||||
451 | flags = AMDGPU_PTE_DEFAULT_ATC((1ULL << 1) | (1ULL << 2) | (1ULL << 4) | ( 1ULL << 5) | (1ULL << 6) | ((uint64_t)(2) << 57)); | |||
452 | if (level != AMDGPU_VM_PTB) { | |||
453 | /* Handle leaf PDEs as PTEs */ | |||
454 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
455 | amdgpu_gmc_get_vm_pde(adev, level, &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | |||
456 | } | |||
457 | ||||
458 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, | |||
459 | ats_entries, value, flags); | |||
460 | if (r) | |||
461 | goto exit; | |||
462 | ||||
463 | addr += ats_entries * 8; | |||
464 | } | |||
465 | ||||
466 | if (entries) { | |||
467 | uint64_t value = 0, flags = 0; | |||
468 | ||||
469 | if (adev->asic_type >= CHIP_VEGA10) { | |||
470 | if (level != AMDGPU_VM_PTB) { | |||
471 | /* Handle leaf PDEs as PTEs */ | |||
472 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
473 | amdgpu_gmc_get_vm_pde(adev, level,(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)) | |||
474 | &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | |||
475 | } else { | |||
476 | /* Workaround for fault priority problem on GMC9 */ | |||
477 | flags = AMDGPU_PTE_EXECUTABLE(1ULL << 4); | |||
478 | } | |||
479 | } | |||
480 | ||||
481 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, | |||
482 | value, flags); | |||
483 | if (r) | |||
484 | goto exit; | |||
485 | } | |||
486 | ||||
487 | r = vm->update_funcs->commit(¶ms, NULL((void *)0)); | |||
488 | exit: | |||
489 | drm_dev_exit(idx); | |||
490 | return r; | |||
491 | } | |||
492 | ||||
493 | /** | |||
494 | * amdgpu_vm_pt_create - create bo for PD/PT | |||
495 | * | |||
496 | * @adev: amdgpu_device pointer | |||
497 | * @vm: requesting vm | |||
498 | * @level: the page table level | |||
499 | * @immediate: use a immediate update | |||
500 | * @vmbo: pointer to the buffer object pointer | |||
501 | */ | |||
502 | int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
503 | int level, bool_Bool immediate, struct amdgpu_bo_vm **vmbo) | |||
504 | { | |||
505 | struct amdgpu_bo_param bp; | |||
506 | struct amdgpu_bo *bo; | |||
507 | struct dma_resv *resv; | |||
508 | unsigned int num_entries; | |||
509 | int r; | |||
510 | ||||
511 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | |||
512 | ||||
513 | bp.size = amdgpu_vm_pt_size(adev, level); | |||
514 | bp.byte_align = AMDGPU_GPU_PAGE_SIZE4096; | |||
515 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM0x4; | |||
516 | bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); | |||
517 | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS(1 << 5) | | |||
518 | AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | |||
519 | ||||
520 | if (level < AMDGPU_VM_PTB) | |||
521 | num_entries = amdgpu_vm_pt_num_entries(adev, level); | |||
522 | else | |||
523 | num_entries = 0; | |||
524 | ||||
525 | bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries)(sizeof(*((*vmbo))) + ((num_entries) * (sizeof(*((*vmbo))-> entries)))); | |||
526 | ||||
527 | if (vm->use_cpu_for_update) | |||
528 | bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED(1 << 0); | |||
529 | ||||
530 | bp.type = ttm_bo_type_kernel; | |||
531 | bp.no_wait_gpu = immediate; | |||
532 | if (vm->root.bo) | |||
533 | bp.resv = vm->root.bo->tbo.base.resv; | |||
534 | ||||
535 | r = amdgpu_bo_create_vm(adev, &bp, vmbo); | |||
536 | if (r) | |||
537 | return r; | |||
538 | ||||
539 | bo = &(*vmbo)->bo; | |||
540 | if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { | |||
541 | (*vmbo)->shadow = NULL((void *)0); | |||
542 | return 0; | |||
543 | } | |||
544 | ||||
545 | if (!bp.resv) | |||
546 | WARN_ON(dma_resv_lock(bo->tbo.base.resv,({ int __ret = !!(dma_resv_lock(bo->tbo.base.resv, ((void * )0))); if (__ret) printf("WARNING %s failed at %s:%d\n", "dma_resv_lock(bo->tbo.base.resv, ((void *)0))" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c", 547); __builtin_expect(!!(__ret), 0); }) | |||
547 | NULL))({ int __ret = !!(dma_resv_lock(bo->tbo.base.resv, ((void * )0))); if (__ret) printf("WARNING %s failed at %s:%d\n", "dma_resv_lock(bo->tbo.base.resv, ((void *)0))" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c", 547); __builtin_expect(!!(__ret), 0); }); | |||
548 | resv = bp.resv; | |||
549 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | |||
550 | bp.size = amdgpu_vm_pt_size(adev, level); | |||
551 | bp.domain = AMDGPU_GEM_DOMAIN_GTT0x2; | |||
552 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | |||
553 | bp.type = ttm_bo_type_kernel; | |||
554 | bp.resv = bo->tbo.base.resv; | |||
555 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); | |||
556 | ||||
557 | r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); | |||
558 | ||||
559 | if (!resv) | |||
560 | dma_resv_unlock(bo->tbo.base.resv); | |||
561 | ||||
562 | if (r) { | |||
563 | amdgpu_bo_unref(&bo); | |||
564 | return r; | |||
565 | } | |||
566 | ||||
567 | amdgpu_bo_add_to_shadow_list(*vmbo); | |||
568 | ||||
569 | return 0; | |||
570 | } | |||
571 | ||||
572 | /** | |||
573 | * amdgpu_vm_pt_alloc - Allocate a specific page table | |||
574 | * | |||
575 | * @adev: amdgpu_device pointer | |||
576 | * @vm: VM to allocate page tables for | |||
577 | * @cursor: Which page table to allocate | |||
578 | * @immediate: use an immediate update | |||
579 | * | |||
580 | * Make sure a specific page table or directory is allocated. | |||
581 | * | |||
582 | * Returns: | |||
583 | * 1 if page table needed to be allocated, 0 if page table was already | |||
584 | * allocated, negative errno if an error occurred. | |||
585 | */ | |||
586 | static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, | |||
587 | struct amdgpu_vm *vm, | |||
588 | struct amdgpu_vm_pt_cursor *cursor, | |||
589 | bool_Bool immediate) | |||
590 | { | |||
591 | struct amdgpu_vm_bo_base *entry = cursor->entry; | |||
592 | struct amdgpu_bo *pt_bo; | |||
593 | struct amdgpu_bo_vm *pt; | |||
594 | int r; | |||
595 | ||||
596 | if (entry->bo) | |||
597 | return 0; | |||
598 | ||||
599 | amdgpu_vm_eviction_unlock(vm); | |||
600 | r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); | |||
601 | amdgpu_vm_eviction_lock(vm); | |||
602 | if (r) | |||
603 | return r; | |||
604 | ||||
605 | /* Keep a reference to the root directory to avoid | |||
606 | * freeing them up in the wrong order. | |||
607 | */ | |||
608 | pt_bo = &pt->bo; | |||
609 | pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); | |||
610 | amdgpu_vm_bo_base_init(entry, vm, pt_bo); | |||
611 | r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); | |||
612 | if (r) | |||
613 | goto error_free_pt; | |||
614 | ||||
615 | return 0; | |||
616 | ||||
617 | error_free_pt: | |||
618 | amdgpu_bo_unref(&pt->shadow); | |||
619 | amdgpu_bo_unref(&pt_bo); | |||
620 | return r; | |||
621 | } | |||
622 | ||||
623 | /** | |||
624 | * amdgpu_vm_pt_free - free one PD/PT | |||
625 | * | |||
626 | * @entry: PDE to free | |||
627 | */ | |||
628 | static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) | |||
629 | { | |||
630 | struct amdgpu_bo *shadow; | |||
631 | ||||
632 | if (!entry->bo) | |||
633 | return; | |||
634 | ||||
635 | entry->bo->vm_bo = NULL((void *)0); | |||
636 | shadow = amdgpu_bo_shadowed(entry->bo); | |||
637 | if (shadow) { | |||
638 | ttm_bo_set_bulk_move(&shadow->tbo, NULL((void *)0)); | |||
639 | amdgpu_bo_unref(&shadow); | |||
640 | } | |||
641 | ttm_bo_set_bulk_move(&entry->bo->tbo, NULL((void *)0)); | |||
642 | ||||
643 | spin_lock(&entry->vm->status_lock)mtx_enter(&entry->vm->status_lock); | |||
644 | list_del(&entry->vm_status); | |||
645 | spin_unlock(&entry->vm->status_lock)mtx_leave(&entry->vm->status_lock); | |||
646 | amdgpu_bo_unref(&entry->bo); | |||
647 | } | |||
648 | ||||
649 | void amdgpu_vm_pt_free_work(struct work_struct *work) | |||
650 | { | |||
651 | struct amdgpu_vm_bo_base *entry, *next; | |||
652 | struct amdgpu_vm *vm; | |||
653 | DRM_LIST_HEAD(pt_freed)struct list_head pt_freed = { &(pt_freed), &(pt_freed ) }; | |||
654 | ||||
655 | vm = container_of(work, struct amdgpu_vm, pt_free_work)({ const __typeof( ((struct amdgpu_vm *)0)->pt_free_work ) *__mptr = (work); (struct amdgpu_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_vm, pt_free_work) );}); | |||
656 | ||||
657 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); | |||
658 | list_splice_init(&vm->pt_freed, &pt_freed); | |||
659 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); | |||
660 | ||||
661 | /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ | |||
662 | amdgpu_bo_reserve(vm->root.bo, true1); | |||
663 | ||||
664 | list_for_each_entry_safe(entry, next, &pt_freed, vm_status)for (entry = ({ const __typeof( ((__typeof(*entry) *)0)->vm_status ) *__mptr = ((&pt_freed)->next); (__typeof(*entry) *) ( (char *)__mptr - __builtin_offsetof(__typeof(*entry), vm_status ) );}), next = ({ const __typeof( ((__typeof(*entry) *)0)-> vm_status ) *__mptr = (entry->vm_status.next); (__typeof(* entry) *)( (char *)__mptr - __builtin_offsetof(__typeof(*entry ), vm_status) );}); &entry->vm_status != (&pt_freed ); entry = next, next = ({ const __typeof( ((__typeof(*next) * )0)->vm_status ) *__mptr = (next->vm_status.next); (__typeof (*next) *)( (char *)__mptr - __builtin_offsetof(__typeof(*next ), vm_status) );})) | |||
665 | amdgpu_vm_pt_free(entry); | |||
666 | ||||
667 | amdgpu_bo_unreserve(vm->root.bo); | |||
668 | } | |||
669 | ||||
670 | /** | |||
671 | * amdgpu_vm_pt_free_dfs - free PD/PT levels | |||
672 | * | |||
673 | * @adev: amdgpu device structure | |||
674 | * @vm: amdgpu vm structure | |||
675 | * @start: optional cursor where to start freeing PDs/PTs | |||
676 | * | |||
677 | * Free the page directory or page table level and all sub levels. | |||
678 | */ | |||
679 | static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, | |||
680 | struct amdgpu_vm *vm, | |||
681 | struct amdgpu_vm_pt_cursor *start, | |||
682 | bool_Bool unlocked) | |||
683 | { | |||
684 | struct amdgpu_vm_pt_cursor cursor; | |||
685 | struct amdgpu_vm_bo_base *entry; | |||
686 | ||||
687 | if (unlocked) { | |||
688 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); | |||
689 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | |||
690 | list_move(&entry->vm_status, &vm->pt_freed); | |||
691 | ||||
692 | if (start) | |||
693 | list_move(&start->entry->vm_status, &vm->pt_freed); | |||
694 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); | |||
695 | schedule_work(&vm->pt_free_work); | |||
696 | return; | |||
697 | } | |||
698 | ||||
699 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | |||
700 | amdgpu_vm_pt_free(entry); | |||
701 | ||||
702 | if (start) | |||
703 | amdgpu_vm_pt_free(start->entry); | |||
704 | } | |||
705 | ||||
706 | /** | |||
707 | * amdgpu_vm_pt_free_root - free root PD | |||
708 | * @adev: amdgpu device structure | |||
709 | * @vm: amdgpu vm structure | |||
710 | * | |||
711 | * Free the root page directory and everything below it. | |||
712 | */ | |||
713 | void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
714 | { | |||
715 | amdgpu_vm_pt_free_dfs(adev, vm, NULL((void *)0), false0); | |||
716 | } | |||
717 | ||||
718 | /** | |||
719 | * amdgpu_vm_pt_is_root_clean - check if a root PD is clean | |||
720 | * | |||
721 | * @adev: amdgpu_device pointer | |||
722 | * @vm: the VM to check | |||
723 | * | |||
724 | * Check all entries of the root PD, if any subsequent PDs are allocated, | |||
725 | * it means there are page table creating and filling, and is no a clean | |||
726 | * VM | |||
727 | * | |||
728 | * Returns: | |||
729 | * 0 if this VM is clean | |||
730 | */ | |||
731 | bool_Bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, | |||
732 | struct amdgpu_vm *vm) | |||
733 | { | |||
734 | enum amdgpu_vm_level root = adev->vm_manager.root_level; | |||
735 | unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); | |||
| ||||
736 | unsigned int i = 0; | |||
737 | ||||
738 | for (i = 0; i < entries; i++) { | |||
739 | if (to_amdgpu_bo_vm(vm->root.bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((vm->root.bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries[i].bo) | |||
740 | return false0; | |||
741 | } | |||
742 | return true1; | |||
743 | } | |||
744 | ||||
745 | /** | |||
746 | * amdgpu_vm_pde_update - update a single level in the hierarchy | |||
747 | * | |||
748 | * @params: parameters for the update | |||
749 | * @entry: entry to update | |||
750 | * | |||
751 | * Makes sure the requested entry in parent is up to date. | |||
752 | */ | |||
753 | int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, | |||
754 | struct amdgpu_vm_bo_base *entry) | |||
755 | { | |||
756 | struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); | |||
757 | struct amdgpu_bo *bo = parent->bo, *pbo; | |||
758 | struct amdgpu_vm *vm = params->vm; | |||
759 | uint64_t pde, pt, flags; | |||
760 | unsigned int level; | |||
761 | ||||
762 | for (level = 0, pbo = bo->parent; pbo; ++level) | |||
763 | pbo = pbo->parent; | |||
764 | ||||
765 | level += params->adev->vm_manager.root_level; | |||
766 | amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); | |||
767 | pde = (entry - to_amdgpu_bo_vm(parent->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((parent->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries) * 8; | |||
768 | return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_vm, bo) );}), pde, pt, | |||
769 | 1, 0, flags); | |||
770 | } | |||
771 | ||||
772 | /* | |||
773 | * amdgpu_vm_pte_update_flags - figure out flags for PTE updates | |||
774 | * | |||
775 | * Make sure to set the right flags for the PTEs at the desired level. | |||
776 | */ | |||
777 | static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, | |||
778 | struct amdgpu_bo_vm *pt, | |||
779 | unsigned int level, | |||
780 | uint64_t pe, uint64_t addr, | |||
781 | unsigned int count, uint32_t incr, | |||
782 | uint64_t flags) | |||
783 | ||||
784 | { | |||
785 | if (level != AMDGPU_VM_PTB) { | |||
786 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
787 | amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags)(params->adev)->gmc.gmc_funcs->get_vm_pde((params-> adev), (level), (&addr), (&flags)); | |||
788 | ||||
789 | } else if (params->adev->asic_type >= CHIP_VEGA10 && | |||
790 | !(flags & AMDGPU_PTE_VALID(1ULL << 0)) && | |||
791 | !(flags & AMDGPU_PTE_PRT(1ULL << 51))) { | |||
792 | ||||
793 | /* Workaround for fault priority problem on GMC9 */ | |||
794 | flags |= AMDGPU_PTE_EXECUTABLE(1ULL << 4); | |||
795 | } | |||
796 | ||||
797 | params->vm->update_funcs->update(params, pt, pe, addr, count, incr, | |||
798 | flags); | |||
799 | } | |||
800 | ||||
801 | /** | |||
802 | * amdgpu_vm_pte_fragment - get fragment for PTEs | |||
803 | * | |||
804 | * @params: see amdgpu_vm_update_params definition | |||
805 | * @start: first PTE to handle | |||
806 | * @end: last PTE to handle | |||
807 | * @flags: hw mapping flags | |||
808 | * @frag: resulting fragment size | |||
809 | * @frag_end: end of this fragment | |||
810 | * | |||
811 | * Returns the first possible fragment for the start and end address. | |||
812 | */ | |||
813 | static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, | |||
814 | uint64_t start, uint64_t end, uint64_t flags, | |||
815 | unsigned int *frag, uint64_t *frag_end) | |||
816 | { | |||
817 | /** | |||
818 | * The MC L1 TLB supports variable sized pages, based on a fragment | |||
819 | * field in the PTE. When this field is set to a non-zero value, page | |||
820 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | |||
821 | * flags are considered valid for all PTEs within the fragment range | |||
822 | * and corresponding mappings are assumed to be physically contiguous. | |||
823 | * | |||
824 | * The L1 TLB can store a single PTE for the whole fragment, | |||
825 | * significantly increasing the space available for translation | |||
826 | * caching. This leads to large improvements in throughput when the | |||
827 | * TLB is under pressure. | |||
828 | * | |||
829 | * The L2 TLB distributes small and large fragments into two | |||
830 | * asymmetric partitions. The large fragment cache is significantly | |||
831 | * larger. Thus, we try to use large fragments wherever possible. | |||
832 | * Userspace can support this by aligning virtual base address and | |||
833 | * allocation size to the fragment size. | |||
834 | * | |||
835 | * Starting with Vega10 the fragment size only controls the L1. The L2 | |||
836 | * is now directly feed with small/huge/giant pages from the walker. | |||
837 | */ | |||
838 | unsigned int max_frag; | |||
839 | ||||
840 | if (params->adev->asic_type < CHIP_VEGA10) | |||
841 | max_frag = params->adev->vm_manager.fragment_size; | |||
842 | else | |||
843 | max_frag = 31; | |||
844 | ||||
845 | /* system pages are non continuously */ | |||
846 | if (params->pages_addr) { | |||
847 | *frag = 0; | |||
848 | *frag_end = end; | |||
849 | return; | |||
850 | } | |||
851 | ||||
852 | /* This intentionally wraps around if no bit is set */ | |||
853 | *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1)({ unsigned int __min_a = (ffs(start) - 1); unsigned int __min_b = (fls64(end - start) - 1); __min_a < __min_b ? __min_a : __min_b; }); | |||
854 | if (*frag >= max_frag) { | |||
855 | *frag = max_frag; | |||
856 | *frag_end = end & ~((1ULL << max_frag) - 1); | |||
857 | } else { | |||
858 | *frag_end = start + (1 << *frag); | |||
859 | } | |||
860 | } | |||
861 | ||||
862 | /** | |||
863 | * amdgpu_vm_ptes_update - make sure that page tables are valid | |||
864 | * | |||
865 | * @params: see amdgpu_vm_update_params definition | |||
866 | * @start: start of GPU address range | |||
867 | * @end: end of GPU address range | |||
868 | * @dst: destination address to map to, the next dst inside the function | |||
869 | * @flags: mapping flags | |||
870 | * | |||
871 | * Update the page tables in the range @start - @end. | |||
872 | * | |||
873 | * Returns: | |||
874 | * 0 for success, -EINVAL for failure. | |||
875 | */ | |||
876 | int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, | |||
877 | uint64_t start, uint64_t end, | |||
878 | uint64_t dst, uint64_t flags) | |||
879 | { | |||
880 | struct amdgpu_device *adev = params->adev; | |||
881 | struct amdgpu_vm_pt_cursor cursor; | |||
882 | uint64_t frag_start = start, frag_end; | |||
883 | unsigned int frag; | |||
884 | int r; | |||
885 | ||||
886 | /* figure out the initial fragment */ | |||
887 | amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, | |||
888 | &frag_end); | |||
889 | ||||
890 | /* walk over the address space and update the PTs */ | |||
891 | amdgpu_vm_pt_start(adev, params->vm, start, &cursor); | |||
892 | while (cursor.pfn < end) { | |||
893 | unsigned int shift, parent_shift, mask; | |||
894 | uint64_t incr, entry_end, pe_start; | |||
895 | struct amdgpu_bo *pt; | |||
896 | ||||
897 | if (!params->unlocked) { | |||
898 | /* make sure that the page tables covering the | |||
899 | * address range are actually allocated | |||
900 | */ | |||
901 | r = amdgpu_vm_pt_alloc(params->adev, params->vm, | |||
902 | &cursor, params->immediate); | |||
903 | if (r) | |||
904 | return r; | |||
905 | } | |||
906 | ||||
907 | shift = amdgpu_vm_pt_level_shift(adev, cursor.level); | |||
908 | parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); | |||
909 | if (params->unlocked) { | |||
910 | /* Unlocked updates are only allowed on the leaves */ | |||
911 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |||
912 | continue; | |||
913 | } else if (adev->asic_type < CHIP_VEGA10 && | |||
914 | (flags & AMDGPU_PTE_VALID(1ULL << 0))) { | |||
915 | /* No huge page support before GMC v9 */ | |||
916 | if (cursor.level != AMDGPU_VM_PTB) { | |||
917 | if (!amdgpu_vm_pt_descendant(adev, &cursor)) | |||
918 | return -ENOENT2; | |||
919 | continue; | |||
920 | } | |||
921 | } else if (frag < shift) { | |||
922 | /* We can't use this level when the fragment size is | |||
923 | * smaller than the address shift. Go to the next | |||
924 | * child entry and try again. | |||
925 | */ | |||
926 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |||
927 | continue; | |||
928 | } else if (frag >= parent_shift) { | |||
929 | /* If the fragment size is even larger than the parent | |||
930 | * shift we should go up one level and check it again. | |||
931 | */ | |||
932 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |||
933 | return -EINVAL22; | |||
934 | continue; | |||
935 | } | |||
936 | ||||
937 | pt = cursor.entry->bo; | |||
938 | if (!pt) { | |||
939 | /* We need all PDs and PTs for mapping something, */ | |||
940 | if (flags & AMDGPU_PTE_VALID(1ULL << 0)) | |||
941 | return -ENOENT2; | |||
942 | ||||
943 | /* but unmapping something can happen at a higher | |||
944 | * level. | |||
945 | */ | |||
946 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |||
947 | return -EINVAL22; | |||
948 | ||||
949 | pt = cursor.entry->bo; | |||
950 | shift = parent_shift; | |||
951 | frag_end = max(frag_end, roundup2(frag_start + 1,(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))) | |||
952 | 1ULL << shift))(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))); | |||
953 | } | |||
954 | ||||
955 | /* Looks good so far, calculate parameters for the update */ | |||
956 | incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE4096 << shift; | |||
957 | mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); | |||
958 | pe_start = ((cursor.pfn >> shift) & mask) * 8; | |||
959 | entry_end = ((uint64_t)mask + 1) << shift; | |||
960 | entry_end += cursor.pfn & ~(entry_end - 1); | |||
961 | entry_end = min(entry_end, end)(((entry_end)<(end))?(entry_end):(end)); | |||
962 | ||||
963 | do { | |||
964 | struct amdgpu_vm *vm = params->vm; | |||
965 | uint64_t upd_end = min(entry_end, frag_end)(((entry_end)<(frag_end))?(entry_end):(frag_end)); | |||
966 | unsigned int nptes = (upd_end - frag_start) >> shift; | |||
967 | uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag)((frag & 0x1fULL) << 7); | |||
968 | ||||
969 | /* This can happen when we set higher level PDs to | |||
970 | * silent to stop fault floods. | |||
971 | */ | |||
972 | nptes = max(nptes, 1u)(((nptes)>(1u))?(nptes):(1u)); | |||
973 | ||||
974 | trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, | |||
975 | min(nptes, 32u)(((nptes)<(32u))?(nptes):(32u)), dst, incr, | |||
976 | upd_flags, | |||
977 | vm->task_info.tgid, | |||
978 | vm->immediate.fence_context); | |||
979 | amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((pt)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_vm, bo) );}), | |||
980 | cursor.level, pe_start, dst, | |||
981 | nptes, incr, upd_flags); | |||
982 | ||||
983 | pe_start += nptes * 8; | |||
984 | dst += nptes * incr; | |||
985 | ||||
986 | frag_start = upd_end; | |||
987 | if (frag_start >= frag_end) { | |||
988 | /* figure out the next fragment */ | |||
989 | amdgpu_vm_pte_fragment(params, frag_start, end, | |||
990 | flags, &frag, &frag_end); | |||
991 | if (frag < shift) | |||
992 | break; | |||
993 | } | |||
994 | } while (frag_start < entry_end); | |||
995 | ||||
996 | if (amdgpu_vm_pt_descendant(adev, &cursor)) { | |||
997 | /* Free all child entries. | |||
998 | * Update the tables with the flags and addresses and free up subsequent | |||
999 | * tables in the case of huge pages or freed up areas. | |||
1000 | * This is the maximum you can free, because all other page tables are not | |||
1001 | * completely covered by the range and so potentially still in use. | |||
1002 | */ | |||
1003 | while (cursor.pfn < frag_start) { | |||
1004 | /* Make sure previous mapping is freed */ | |||
1005 | if (cursor.entry->bo) { | |||
1006 | params->table_freed = true1; | |||
1007 | amdgpu_vm_pt_free_dfs(adev, params->vm, | |||
1008 | &cursor, | |||
1009 | params->unlocked); | |||
1010 | } | |||
1011 | amdgpu_vm_pt_next(adev, &cursor); | |||
1012 | } | |||
1013 | ||||
1014 | } else if (frag >= shift) { | |||
1015 | /* or just move on to the next on the same level. */ | |||
1016 | amdgpu_vm_pt_next(adev, &cursor); | |||
1017 | } | |||
1018 | } | |||
1019 | ||||
1020 | return 0; | |||
1021 | } |