| File: | dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c |
| Warning: | line 205, column 21 The result of the right shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | // SPDX-License-Identifier: GPL-2.0 OR MIT | |||
| 2 | /* | |||
| 3 | * Copyright 2022 Advanced Micro Devices, Inc. | |||
| 4 | * | |||
| 5 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
| 6 | * copy of this software and associated documentation files (the "Software"), | |||
| 7 | * to deal in the Software without restriction, including without limitation | |||
| 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| 9 | * and/or sell copies of the Software, and to permit persons to whom the | |||
| 10 | * Software is furnished to do so, subject to the following conditions: | |||
| 11 | * | |||
| 12 | * The above copyright notice and this permission notice shall be included in | |||
| 13 | * all copies or substantial portions of the Software. | |||
| 14 | * | |||
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| 18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
| 19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
| 20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
| 21 | * OTHER DEALINGS IN THE SOFTWARE. | |||
| 22 | */ | |||
| 23 | ||||
| 24 | #include <drm/drm_drv.h> | |||
| 25 | ||||
| 26 | #include "amdgpu.h" | |||
| 27 | #include "amdgpu_trace.h" | |||
| 28 | #include "amdgpu_vm.h" | |||
| 29 | ||||
| 30 | /* | |||
| 31 | * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt | |||
| 32 | */ | |||
| 33 | struct amdgpu_vm_pt_cursor { | |||
| 34 | uint64_t pfn; | |||
| 35 | struct amdgpu_vm_bo_base *parent; | |||
| 36 | struct amdgpu_vm_bo_base *entry; | |||
| 37 | unsigned int level; | |||
| 38 | }; | |||
| 39 | ||||
| 40 | /** | |||
| 41 | * amdgpu_vm_pt_level_shift - return the addr shift for each level | |||
| 42 | * | |||
| 43 | * @adev: amdgpu_device pointer | |||
| 44 | * @level: VMPT level | |||
| 45 | * | |||
| 46 | * Returns: | |||
| 47 | * The number of bits the pfn needs to be right shifted for a level. | |||
| 48 | */ | |||
| 49 | static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, | |||
| 50 | unsigned int level) | |||
| 51 | { | |||
| 52 | switch (level) { | |||
| 53 | case AMDGPU_VM_PDB2: | |||
| 54 | case AMDGPU_VM_PDB1: | |||
| 55 | case AMDGPU_VM_PDB0: | |||
| 56 | return 9 * (AMDGPU_VM_PDB0 - level) + | |||
| 57 | adev->vm_manager.block_size; | |||
| 58 | case AMDGPU_VM_PTB: | |||
| 59 | return 0; | |||
| 60 | default: | |||
| 61 | return ~0; | |||
| 62 | } | |||
| 63 | } | |||
| 64 | ||||
| 65 | /** | |||
| 66 | * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT | |||
| 67 | * | |||
| 68 | * @adev: amdgpu_device pointer | |||
| 69 | * @level: VMPT level | |||
| 70 | * | |||
| 71 | * Returns: | |||
| 72 | * The number of entries in a page directory or page table. | |||
| 73 | */ | |||
| 74 | static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, | |||
| 75 | unsigned int level) | |||
| 76 | { | |||
| 77 | unsigned int shift; | |||
| 78 | ||||
| 79 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | |||
| 80 | if (level == adev->vm_manager.root_level) | |||
| 81 | /* For the root directory */ | |||
| 82 | return round_up(adev->vm_manager.max_pfn, 1ULL << shift)((((adev->vm_manager.max_pfn) + ((1ULL << shift) - 1 )) / (1ULL << shift)) * (1ULL << shift)) | |||
| 83 | >> shift; | |||
| 84 | else if (level != AMDGPU_VM_PTB) | |||
| 85 | /* Everything in between */ | |||
| 86 | return 512; | |||
| 87 | ||||
| 88 | /* For the page tables on the leaves */ | |||
| 89 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size); | |||
| 90 | } | |||
| 91 | ||||
| 92 | /** | |||
| 93 | * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD | |||
| 94 | * | |||
| 95 | * @adev: amdgpu_device pointer | |||
| 96 | * | |||
| 97 | * Returns: | |||
| 98 | * The number of entries in the root page directory which needs the ATS setting. | |||
| 99 | */ | |||
| 100 | static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) | |||
| 101 | { | |||
| 102 | unsigned int shift; | |||
| 103 | ||||
| 104 | shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); | |||
| 105 | return AMDGPU_GMC_HOLE_START0x0000800000000000ULL >> (shift + AMDGPU_GPU_PAGE_SHIFT12); | |||
| 106 | } | |||
| 107 | ||||
| 108 | /** | |||
| 109 | * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT | |||
| 110 | * | |||
| 111 | * @adev: amdgpu_device pointer | |||
| 112 | * @level: VMPT level | |||
| 113 | * | |||
| 114 | * Returns: | |||
| 115 | * The mask to extract the entry number of a PD/PT from an address. | |||
| 116 | */ | |||
| 117 | static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, | |||
| 118 | unsigned int level) | |||
| 119 | { | |||
| 120 | if (level <= adev->vm_manager.root_level) | |||
| 121 | return 0xffffffff; | |||
| 122 | else if (level != AMDGPU_VM_PTB) | |||
| 123 | return 0x1ff; | |||
| 124 | else | |||
| 125 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size) - 1; | |||
| 126 | } | |||
| 127 | ||||
| 128 | /** | |||
| 129 | * amdgpu_vm_pt_size - returns the size of the page table in bytes | |||
| 130 | * | |||
| 131 | * @adev: amdgpu_device pointer | |||
| 132 | * @level: VMPT level | |||
| 133 | * | |||
| 134 | * Returns: | |||
| 135 | * The size of the BO for a page directory or page table in bytes. | |||
| 136 | */ | |||
| 137 | static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, | |||
| 138 | unsigned int level) | |||
| 139 | { | |||
| 140 | return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8)(((amdgpu_vm_pt_num_entries(adev, level) * 8) + (4096 - 1)) & ~(4096 - 1)); | |||
| 141 | } | |||
| 142 | ||||
| 143 | /** | |||
| 144 | * amdgpu_vm_pt_parent - get the parent page directory | |||
| 145 | * | |||
| 146 | * @pt: child page table | |||
| 147 | * | |||
| 148 | * Helper to get the parent entry for the child page table. NULL if we are at | |||
| 149 | * the root page directory. | |||
| 150 | */ | |||
| 151 | static struct amdgpu_vm_bo_base * | |||
| 152 | amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) | |||
| 153 | { | |||
| 154 | struct amdgpu_bo *parent = pt->bo->parent; | |||
| 155 | ||||
| 156 | if (!parent) | |||
| 157 | return NULL((void *)0); | |||
| 158 | ||||
| 159 | return parent->vm_bo; | |||
| 160 | } | |||
| 161 | ||||
| 162 | /** | |||
| 163 | * amdgpu_vm_pt_start - start PD/PT walk | |||
| 164 | * | |||
| 165 | * @adev: amdgpu_device pointer | |||
| 166 | * @vm: amdgpu_vm structure | |||
| 167 | * @start: start address of the walk | |||
| 168 | * @cursor: state to initialize | |||
| 169 | * | |||
| 170 | * Initialize a amdgpu_vm_pt_cursor to start a walk. | |||
| 171 | */ | |||
| 172 | static void amdgpu_vm_pt_start(struct amdgpu_device *adev, | |||
| 173 | struct amdgpu_vm *vm, uint64_t start, | |||
| 174 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 175 | { | |||
| 176 | cursor->pfn = start; | |||
| 177 | cursor->parent = NULL((void *)0); | |||
| 178 | cursor->entry = &vm->root; | |||
| 179 | cursor->level = adev->vm_manager.root_level; | |||
| 180 | } | |||
| 181 | ||||
| 182 | /** | |||
| 183 | * amdgpu_vm_pt_descendant - go to child node | |||
| 184 | * | |||
| 185 | * @adev: amdgpu_device pointer | |||
| 186 | * @cursor: current state | |||
| 187 | * | |||
| 188 | * Walk to the child node of the current node. | |||
| 189 | * Returns: | |||
| 190 | * True if the walk was possible, false otherwise. | |||
| 191 | */ | |||
| 192 | static bool_Bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, | |||
| 193 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 194 | { | |||
| 195 | unsigned int mask, shift, idx; | |||
| 196 | ||||
| 197 | if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || | |||
| 198 | !cursor->entry->bo) | |||
| 199 | return false0; | |||
| 200 | ||||
| 201 | mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); | |||
| 202 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level); | |||
| 203 | ||||
| 204 | ++cursor->level; | |||
| 205 | idx = (cursor->pfn >> shift) & mask; | |||
| ||||
| 206 | cursor->parent = cursor->entry; | |||
| 207 | cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((cursor->entry->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries[idx]; | |||
| 208 | return true1; | |||
| 209 | } | |||
| 210 | ||||
| 211 | /** | |||
| 212 | * amdgpu_vm_pt_sibling - go to sibling node | |||
| 213 | * | |||
| 214 | * @adev: amdgpu_device pointer | |||
| 215 | * @cursor: current state | |||
| 216 | * | |||
| 217 | * Walk to the sibling node of the current node. | |||
| 218 | * Returns: | |||
| 219 | * True if the walk was possible, false otherwise. | |||
| 220 | */ | |||
| 221 | static bool_Bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, | |||
| 222 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 223 | { | |||
| 224 | ||||
| 225 | unsigned int shift, num_entries; | |||
| 226 | struct amdgpu_bo_vm *parent; | |||
| 227 | ||||
| 228 | /* Root doesn't have a sibling */ | |||
| 229 | if (!cursor->parent) | |||
| 230 | return false0; | |||
| 231 | ||||
| 232 | /* Go to our parents and see if we got a sibling */ | |||
| 233 | shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); | |||
| 234 | num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); | |||
| 235 | parent = to_amdgpu_bo_vm(cursor->parent->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((cursor->parent->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );}); | |||
| 236 | ||||
| 237 | if (cursor->entry == &parent->entries[num_entries - 1]) | |||
| 238 | return false0; | |||
| 239 | ||||
| 240 | cursor->pfn += 1ULL << shift; | |||
| 241 | cursor->pfn &= ~((1ULL << shift) - 1); | |||
| 242 | ++cursor->entry; | |||
| 243 | return true1; | |||
| 244 | } | |||
| 245 | ||||
| 246 | /** | |||
| 247 | * amdgpu_vm_pt_ancestor - go to parent node | |||
| 248 | * | |||
| 249 | * @cursor: current state | |||
| 250 | * | |||
| 251 | * Walk to the parent node of the current node. | |||
| 252 | * Returns: | |||
| 253 | * True if the walk was possible, false otherwise. | |||
| 254 | */ | |||
| 255 | static bool_Bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) | |||
| 256 | { | |||
| 257 | if (!cursor->parent) | |||
| 258 | return false0; | |||
| 259 | ||||
| 260 | --cursor->level; | |||
| 261 | cursor->entry = cursor->parent; | |||
| 262 | cursor->parent = amdgpu_vm_pt_parent(cursor->parent); | |||
| 263 | return true1; | |||
| 264 | } | |||
| 265 | ||||
| 266 | /** | |||
| 267 | * amdgpu_vm_pt_next - get next PD/PT in hieratchy | |||
| 268 | * | |||
| 269 | * @adev: amdgpu_device pointer | |||
| 270 | * @cursor: current state | |||
| 271 | * | |||
| 272 | * Walk the PD/PT tree to the next node. | |||
| 273 | */ | |||
| 274 | static void amdgpu_vm_pt_next(struct amdgpu_device *adev, | |||
| 275 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 276 | { | |||
| 277 | /* First try a newborn child */ | |||
| 278 | if (amdgpu_vm_pt_descendant(adev, cursor)) | |||
| 279 | return; | |||
| 280 | ||||
| 281 | /* If that didn't worked try to find a sibling */ | |||
| 282 | while (!amdgpu_vm_pt_sibling(adev, cursor)) { | |||
| 283 | /* No sibling, go to our parents and grandparents */ | |||
| 284 | if (!amdgpu_vm_pt_ancestor(cursor)) { | |||
| 285 | cursor->pfn = ~0ll; | |||
| 286 | return; | |||
| 287 | } | |||
| 288 | } | |||
| 289 | } | |||
| 290 | ||||
| 291 | /** | |||
| 292 | * amdgpu_vm_pt_first_dfs - start a deep first search | |||
| 293 | * | |||
| 294 | * @adev: amdgpu_device structure | |||
| 295 | * @vm: amdgpu_vm structure | |||
| 296 | * @start: optional cursor to start with | |||
| 297 | * @cursor: state to initialize | |||
| 298 | * | |||
| 299 | * Starts a deep first traversal of the PD/PT tree. | |||
| 300 | */ | |||
| 301 | static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, | |||
| 302 | struct amdgpu_vm *vm, | |||
| 303 | struct amdgpu_vm_pt_cursor *start, | |||
| 304 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 305 | { | |||
| 306 | if (start
| |||
| 307 | *cursor = *start; | |||
| 308 | else | |||
| 309 | amdgpu_vm_pt_start(adev, vm, 0, cursor); | |||
| 310 | ||||
| 311 | while (amdgpu_vm_pt_descendant(adev, cursor)) | |||
| 312 | ; | |||
| 313 | } | |||
| 314 | ||||
| 315 | /** | |||
| 316 | * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue | |||
| 317 | * | |||
| 318 | * @start: starting point for the search | |||
| 319 | * @entry: current entry | |||
| 320 | * | |||
| 321 | * Returns: | |||
| 322 | * True when the search should continue, false otherwise. | |||
| 323 | */ | |||
| 324 | static bool_Bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, | |||
| 325 | struct amdgpu_vm_bo_base *entry) | |||
| 326 | { | |||
| 327 | return entry && (!start || entry != start->entry); | |||
| 328 | } | |||
| 329 | ||||
| 330 | /** | |||
| 331 | * amdgpu_vm_pt_next_dfs - get the next node for a deep first search | |||
| 332 | * | |||
| 333 | * @adev: amdgpu_device structure | |||
| 334 | * @cursor: current state | |||
| 335 | * | |||
| 336 | * Move the cursor to the next node in a deep first search. | |||
| 337 | */ | |||
| 338 | static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, | |||
| 339 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 340 | { | |||
| 341 | if (!cursor->entry) | |||
| 342 | return; | |||
| 343 | ||||
| 344 | if (!cursor->parent) | |||
| 345 | cursor->entry = NULL((void *)0); | |||
| 346 | else if (amdgpu_vm_pt_sibling(adev, cursor)) | |||
| 347 | while (amdgpu_vm_pt_descendant(adev, cursor)) | |||
| 348 | ; | |||
| 349 | else | |||
| 350 | amdgpu_vm_pt_ancestor(cursor); | |||
| 351 | } | |||
| 352 | ||||
| 353 | /* | |||
| 354 | * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs | |||
| 355 | */ | |||
| 356 | #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) \ | |||
| 357 | for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ | |||
| 358 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ | |||
| 359 | amdgpu_vm_pt_continue_dfs((start), (entry)); \ | |||
| 360 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) | |||
| 361 | ||||
| 362 | /** | |||
| 363 | * amdgpu_vm_pt_clear - initially clear the PDs/PTs | |||
| 364 | * | |||
| 365 | * @adev: amdgpu_device pointer | |||
| 366 | * @vm: VM to clear BO from | |||
| 367 | * @vmbo: BO to clear | |||
| 368 | * @immediate: use an immediate update | |||
| 369 | * | |||
| 370 | * Root PD needs to be reserved when calling this. | |||
| 371 | * | |||
| 372 | * Returns: | |||
| 373 | * 0 on success, errno otherwise. | |||
| 374 | */ | |||
| 375 | int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 376 | struct amdgpu_bo_vm *vmbo, bool_Bool immediate) | |||
| 377 | { | |||
| 378 | unsigned int level = adev->vm_manager.root_level; | |||
| 379 | struct ttm_operation_ctx ctx = { true1, false0 }; | |||
| 380 | struct amdgpu_vm_update_params params; | |||
| 381 | struct amdgpu_bo *ancestor = &vmbo->bo; | |||
| 382 | unsigned int entries, ats_entries; | |||
| 383 | struct amdgpu_bo *bo = &vmbo->bo; | |||
| 384 | uint64_t addr; | |||
| 385 | int r, idx; | |||
| 386 | ||||
| 387 | /* Figure out our place in the hierarchy */ | |||
| 388 | if (ancestor->parent) { | |||
| 389 | ++level; | |||
| 390 | while (ancestor->parent->parent) { | |||
| 391 | ++level; | |||
| 392 | ancestor = ancestor->parent; | |||
| 393 | } | |||
| 394 | } | |||
| 395 | ||||
| 396 | entries = amdgpu_bo_size(bo) / 8; | |||
| 397 | if (!vm->pte_support_ats) { | |||
| 398 | ats_entries = 0; | |||
| 399 | ||||
| 400 | } else if (!bo->parent) { | |||
| 401 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | |||
| 402 | ats_entries = min(ats_entries, entries)(((ats_entries)<(entries))?(ats_entries):(entries)); | |||
| 403 | entries -= ats_entries; | |||
| 404 | ||||
| 405 | } else { | |||
| 406 | struct amdgpu_vm_bo_base *pt; | |||
| 407 | ||||
| 408 | pt = ancestor->vm_bo; | |||
| 409 | ats_entries = amdgpu_vm_pt_num_ats_entries(adev); | |||
| 410 | if ((pt - to_amdgpu_bo_vm(vm->root.bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((vm->root.bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries) >= | |||
| 411 | ats_entries) { | |||
| 412 | ats_entries = 0; | |||
| 413 | } else { | |||
| 414 | ats_entries = entries; | |||
| 415 | entries = 0; | |||
| 416 | } | |||
| 417 | } | |||
| 418 | ||||
| 419 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | |||
| 420 | if (r) | |||
| 421 | return r; | |||
| 422 | ||||
| 423 | if (vmbo->shadow) { | |||
| 424 | struct amdgpu_bo *shadow = vmbo->shadow; | |||
| 425 | ||||
| 426 | r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); | |||
| 427 | if (r) | |||
| 428 | return r; | |||
| 429 | } | |||
| 430 | ||||
| 431 | if (!drm_dev_enter(adev_to_drm(adev), &idx)) | |||
| 432 | return -ENODEV19; | |||
| 433 | ||||
| 434 | r = vm->update_funcs->map_table(vmbo); | |||
| 435 | if (r) | |||
| 436 | goto exit; | |||
| 437 | ||||
| 438 | memset(¶ms, 0, sizeof(params))__builtin_memset((¶ms), (0), (sizeof(params))); | |||
| 439 | params.adev = adev; | |||
| 440 | params.vm = vm; | |||
| 441 | params.immediate = immediate; | |||
| 442 | ||||
| 443 | r = vm->update_funcs->prepare(¶ms, NULL((void *)0), AMDGPU_SYNC_EXPLICIT); | |||
| 444 | if (r) | |||
| 445 | goto exit; | |||
| 446 | ||||
| 447 | addr = 0; | |||
| 448 | if (ats_entries) { | |||
| 449 | uint64_t value = 0, flags; | |||
| 450 | ||||
| 451 | flags = AMDGPU_PTE_DEFAULT_ATC((1ULL << 1) | (1ULL << 2) | (1ULL << 4) | ( 1ULL << 5) | (1ULL << 6) | ((uint64_t)(2) << 57)); | |||
| 452 | if (level != AMDGPU_VM_PTB) { | |||
| 453 | /* Handle leaf PDEs as PTEs */ | |||
| 454 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
| 455 | amdgpu_gmc_get_vm_pde(adev, level, &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | |||
| 456 | } | |||
| 457 | ||||
| 458 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, | |||
| 459 | ats_entries, value, flags); | |||
| 460 | if (r) | |||
| 461 | goto exit; | |||
| 462 | ||||
| 463 | addr += ats_entries * 8; | |||
| 464 | } | |||
| 465 | ||||
| 466 | if (entries) { | |||
| 467 | uint64_t value = 0, flags = 0; | |||
| 468 | ||||
| 469 | if (adev->asic_type >= CHIP_VEGA10) { | |||
| 470 | if (level != AMDGPU_VM_PTB) { | |||
| 471 | /* Handle leaf PDEs as PTEs */ | |||
| 472 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
| 473 | amdgpu_gmc_get_vm_pde(adev, level,(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)) | |||
| 474 | &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | |||
| 475 | } else { | |||
| 476 | /* Workaround for fault priority problem on GMC9 */ | |||
| 477 | flags = AMDGPU_PTE_EXECUTABLE(1ULL << 4); | |||
| 478 | } | |||
| 479 | } | |||
| 480 | ||||
| 481 | r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, | |||
| 482 | value, flags); | |||
| 483 | if (r) | |||
| 484 | goto exit; | |||
| 485 | } | |||
| 486 | ||||
| 487 | r = vm->update_funcs->commit(¶ms, NULL((void *)0)); | |||
| 488 | exit: | |||
| 489 | drm_dev_exit(idx); | |||
| 490 | return r; | |||
| 491 | } | |||
| 492 | ||||
| 493 | /** | |||
| 494 | * amdgpu_vm_pt_create - create bo for PD/PT | |||
| 495 | * | |||
| 496 | * @adev: amdgpu_device pointer | |||
| 497 | * @vm: requesting vm | |||
| 498 | * @level: the page table level | |||
| 499 | * @immediate: use a immediate update | |||
| 500 | * @vmbo: pointer to the buffer object pointer | |||
| 501 | */ | |||
| 502 | int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 503 | int level, bool_Bool immediate, struct amdgpu_bo_vm **vmbo) | |||
| 504 | { | |||
| 505 | struct amdgpu_bo_param bp; | |||
| 506 | struct amdgpu_bo *bo; | |||
| 507 | struct dma_resv *resv; | |||
| 508 | unsigned int num_entries; | |||
| 509 | int r; | |||
| 510 | ||||
| 511 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | |||
| 512 | ||||
| 513 | bp.size = amdgpu_vm_pt_size(adev, level); | |||
| 514 | bp.byte_align = AMDGPU_GPU_PAGE_SIZE4096; | |||
| 515 | bp.domain = AMDGPU_GEM_DOMAIN_VRAM0x4; | |||
| 516 | bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); | |||
| 517 | bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS(1 << 5) | | |||
| 518 | AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | |||
| 519 | ||||
| 520 | if (level < AMDGPU_VM_PTB) | |||
| 521 | num_entries = amdgpu_vm_pt_num_entries(adev, level); | |||
| 522 | else | |||
| 523 | num_entries = 0; | |||
| 524 | ||||
| 525 | bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries)(sizeof(*((*vmbo))) + ((num_entries) * (sizeof(*((*vmbo))-> entries)))); | |||
| 526 | ||||
| 527 | if (vm->use_cpu_for_update) | |||
| 528 | bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED(1 << 0); | |||
| 529 | ||||
| 530 | bp.type = ttm_bo_type_kernel; | |||
| 531 | bp.no_wait_gpu = immediate; | |||
| 532 | if (vm->root.bo) | |||
| 533 | bp.resv = vm->root.bo->tbo.base.resv; | |||
| 534 | ||||
| 535 | r = amdgpu_bo_create_vm(adev, &bp, vmbo); | |||
| 536 | if (r) | |||
| 537 | return r; | |||
| 538 | ||||
| 539 | bo = &(*vmbo)->bo; | |||
| 540 | if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { | |||
| 541 | (*vmbo)->shadow = NULL((void *)0); | |||
| 542 | return 0; | |||
| 543 | } | |||
| 544 | ||||
| 545 | if (!bp.resv) | |||
| 546 | WARN_ON(dma_resv_lock(bo->tbo.base.resv,({ int __ret = !!(dma_resv_lock(bo->tbo.base.resv, ((void * )0))); if (__ret) printf("WARNING %s failed at %s:%d\n", "dma_resv_lock(bo->tbo.base.resv, ((void *)0))" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c", 547); __builtin_expect(!!(__ret), 0); }) | |||
| 547 | NULL))({ int __ret = !!(dma_resv_lock(bo->tbo.base.resv, ((void * )0))); if (__ret) printf("WARNING %s failed at %s:%d\n", "dma_resv_lock(bo->tbo.base.resv, ((void *)0))" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm_pt.c", 547); __builtin_expect(!!(__ret), 0); }); | |||
| 548 | resv = bp.resv; | |||
| 549 | memset(&bp, 0, sizeof(bp))__builtin_memset((&bp), (0), (sizeof(bp))); | |||
| 550 | bp.size = amdgpu_vm_pt_size(adev, level); | |||
| 551 | bp.domain = AMDGPU_GEM_DOMAIN_GTT0x2; | |||
| 552 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | |||
| 553 | bp.type = ttm_bo_type_kernel; | |||
| 554 | bp.resv = bo->tbo.base.resv; | |||
| 555 | bp.bo_ptr_size = sizeof(struct amdgpu_bo); | |||
| 556 | ||||
| 557 | r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); | |||
| 558 | ||||
| 559 | if (!resv) | |||
| 560 | dma_resv_unlock(bo->tbo.base.resv); | |||
| 561 | ||||
| 562 | if (r) { | |||
| 563 | amdgpu_bo_unref(&bo); | |||
| 564 | return r; | |||
| 565 | } | |||
| 566 | ||||
| 567 | amdgpu_bo_add_to_shadow_list(*vmbo); | |||
| 568 | ||||
| 569 | return 0; | |||
| 570 | } | |||
| 571 | ||||
| 572 | /** | |||
| 573 | * amdgpu_vm_pt_alloc - Allocate a specific page table | |||
| 574 | * | |||
| 575 | * @adev: amdgpu_device pointer | |||
| 576 | * @vm: VM to allocate page tables for | |||
| 577 | * @cursor: Which page table to allocate | |||
| 578 | * @immediate: use an immediate update | |||
| 579 | * | |||
| 580 | * Make sure a specific page table or directory is allocated. | |||
| 581 | * | |||
| 582 | * Returns: | |||
| 583 | * 1 if page table needed to be allocated, 0 if page table was already | |||
| 584 | * allocated, negative errno if an error occurred. | |||
| 585 | */ | |||
| 586 | static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, | |||
| 587 | struct amdgpu_vm *vm, | |||
| 588 | struct amdgpu_vm_pt_cursor *cursor, | |||
| 589 | bool_Bool immediate) | |||
| 590 | { | |||
| 591 | struct amdgpu_vm_bo_base *entry = cursor->entry; | |||
| 592 | struct amdgpu_bo *pt_bo; | |||
| 593 | struct amdgpu_bo_vm *pt; | |||
| 594 | int r; | |||
| 595 | ||||
| 596 | if (entry->bo) | |||
| 597 | return 0; | |||
| 598 | ||||
| 599 | amdgpu_vm_eviction_unlock(vm); | |||
| 600 | r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); | |||
| 601 | amdgpu_vm_eviction_lock(vm); | |||
| 602 | if (r) | |||
| 603 | return r; | |||
| 604 | ||||
| 605 | /* Keep a reference to the root directory to avoid | |||
| 606 | * freeing them up in the wrong order. | |||
| 607 | */ | |||
| 608 | pt_bo = &pt->bo; | |||
| 609 | pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); | |||
| 610 | amdgpu_vm_bo_base_init(entry, vm, pt_bo); | |||
| 611 | r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); | |||
| 612 | if (r) | |||
| 613 | goto error_free_pt; | |||
| 614 | ||||
| 615 | return 0; | |||
| 616 | ||||
| 617 | error_free_pt: | |||
| 618 | amdgpu_bo_unref(&pt->shadow); | |||
| 619 | amdgpu_bo_unref(&pt_bo); | |||
| 620 | return r; | |||
| 621 | } | |||
| 622 | ||||
| 623 | /** | |||
| 624 | * amdgpu_vm_pt_free - free one PD/PT | |||
| 625 | * | |||
| 626 | * @entry: PDE to free | |||
| 627 | */ | |||
| 628 | static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) | |||
| 629 | { | |||
| 630 | struct amdgpu_bo *shadow; | |||
| 631 | ||||
| 632 | if (!entry->bo) | |||
| 633 | return; | |||
| 634 | ||||
| 635 | entry->bo->vm_bo = NULL((void *)0); | |||
| 636 | shadow = amdgpu_bo_shadowed(entry->bo); | |||
| 637 | if (shadow) { | |||
| 638 | ttm_bo_set_bulk_move(&shadow->tbo, NULL((void *)0)); | |||
| 639 | amdgpu_bo_unref(&shadow); | |||
| 640 | } | |||
| 641 | ttm_bo_set_bulk_move(&entry->bo->tbo, NULL((void *)0)); | |||
| 642 | ||||
| 643 | spin_lock(&entry->vm->status_lock)mtx_enter(&entry->vm->status_lock); | |||
| 644 | list_del(&entry->vm_status); | |||
| 645 | spin_unlock(&entry->vm->status_lock)mtx_leave(&entry->vm->status_lock); | |||
| 646 | amdgpu_bo_unref(&entry->bo); | |||
| 647 | } | |||
| 648 | ||||
| 649 | void amdgpu_vm_pt_free_work(struct work_struct *work) | |||
| 650 | { | |||
| 651 | struct amdgpu_vm_bo_base *entry, *next; | |||
| 652 | struct amdgpu_vm *vm; | |||
| 653 | DRM_LIST_HEAD(pt_freed)struct list_head pt_freed = { &(pt_freed), &(pt_freed ) }; | |||
| 654 | ||||
| 655 | vm = container_of(work, struct amdgpu_vm, pt_free_work)({ const __typeof( ((struct amdgpu_vm *)0)->pt_free_work ) *__mptr = (work); (struct amdgpu_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_vm, pt_free_work) );}); | |||
| 656 | ||||
| 657 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); | |||
| 658 | list_splice_init(&vm->pt_freed, &pt_freed); | |||
| 659 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); | |||
| 660 | ||||
| 661 | /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ | |||
| 662 | amdgpu_bo_reserve(vm->root.bo, true1); | |||
| 663 | ||||
| 664 | list_for_each_entry_safe(entry, next, &pt_freed, vm_status)for (entry = ({ const __typeof( ((__typeof(*entry) *)0)->vm_status ) *__mptr = ((&pt_freed)->next); (__typeof(*entry) *) ( (char *)__mptr - __builtin_offsetof(__typeof(*entry), vm_status ) );}), next = ({ const __typeof( ((__typeof(*entry) *)0)-> vm_status ) *__mptr = (entry->vm_status.next); (__typeof(* entry) *)( (char *)__mptr - __builtin_offsetof(__typeof(*entry ), vm_status) );}); &entry->vm_status != (&pt_freed ); entry = next, next = ({ const __typeof( ((__typeof(*next) * )0)->vm_status ) *__mptr = (next->vm_status.next); (__typeof (*next) *)( (char *)__mptr - __builtin_offsetof(__typeof(*next ), vm_status) );})) | |||
| 665 | amdgpu_vm_pt_free(entry); | |||
| 666 | ||||
| 667 | amdgpu_bo_unreserve(vm->root.bo); | |||
| 668 | } | |||
| 669 | ||||
| 670 | /** | |||
| 671 | * amdgpu_vm_pt_free_dfs - free PD/PT levels | |||
| 672 | * | |||
| 673 | * @adev: amdgpu device structure | |||
| 674 | * @vm: amdgpu vm structure | |||
| 675 | * @start: optional cursor where to start freeing PDs/PTs | |||
| 676 | * | |||
| 677 | * Free the page directory or page table level and all sub levels. | |||
| 678 | */ | |||
| 679 | static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, | |||
| 680 | struct amdgpu_vm *vm, | |||
| 681 | struct amdgpu_vm_pt_cursor *start, | |||
| 682 | bool_Bool unlocked) | |||
| 683 | { | |||
| 684 | struct amdgpu_vm_pt_cursor cursor; | |||
| 685 | struct amdgpu_vm_bo_base *entry; | |||
| 686 | ||||
| 687 | if (unlocked
| |||
| 688 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); | |||
| 689 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | |||
| 690 | list_move(&entry->vm_status, &vm->pt_freed); | |||
| 691 | ||||
| 692 | if (start) | |||
| 693 | list_move(&start->entry->vm_status, &vm->pt_freed); | |||
| 694 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); | |||
| 695 | schedule_work(&vm->pt_free_work); | |||
| 696 | return; | |||
| 697 | } | |||
| 698 | ||||
| 699 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | |||
| 700 | amdgpu_vm_pt_free(entry); | |||
| 701 | ||||
| 702 | if (start) | |||
| 703 | amdgpu_vm_pt_free(start->entry); | |||
| 704 | } | |||
| 705 | ||||
| 706 | /** | |||
| 707 | * amdgpu_vm_pt_free_root - free root PD | |||
| 708 | * @adev: amdgpu device structure | |||
| 709 | * @vm: amdgpu vm structure | |||
| 710 | * | |||
| 711 | * Free the root page directory and everything below it. | |||
| 712 | */ | |||
| 713 | void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 714 | { | |||
| 715 | amdgpu_vm_pt_free_dfs(adev, vm, NULL((void *)0), false0); | |||
| ||||
| 716 | } | |||
| 717 | ||||
| 718 | /** | |||
| 719 | * amdgpu_vm_pt_is_root_clean - check if a root PD is clean | |||
| 720 | * | |||
| 721 | * @adev: amdgpu_device pointer | |||
| 722 | * @vm: the VM to check | |||
| 723 | * | |||
| 724 | * Check all entries of the root PD, if any subsequent PDs are allocated, | |||
| 725 | * it means there are page table creating and filling, and is no a clean | |||
| 726 | * VM | |||
| 727 | * | |||
| 728 | * Returns: | |||
| 729 | * 0 if this VM is clean | |||
| 730 | */ | |||
| 731 | bool_Bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, | |||
| 732 | struct amdgpu_vm *vm) | |||
| 733 | { | |||
| 734 | enum amdgpu_vm_level root = adev->vm_manager.root_level; | |||
| 735 | unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); | |||
| 736 | unsigned int i = 0; | |||
| 737 | ||||
| 738 | for (i = 0; i < entries; i++) { | |||
| 739 | if (to_amdgpu_bo_vm(vm->root.bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((vm->root.bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries[i].bo) | |||
| 740 | return false0; | |||
| 741 | } | |||
| 742 | return true1; | |||
| 743 | } | |||
| 744 | ||||
| 745 | /** | |||
| 746 | * amdgpu_vm_pde_update - update a single level in the hierarchy | |||
| 747 | * | |||
| 748 | * @params: parameters for the update | |||
| 749 | * @entry: entry to update | |||
| 750 | * | |||
| 751 | * Makes sure the requested entry in parent is up to date. | |||
| 752 | */ | |||
| 753 | int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, | |||
| 754 | struct amdgpu_vm_bo_base *entry) | |||
| 755 | { | |||
| 756 | struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); | |||
| 757 | struct amdgpu_bo *bo = parent->bo, *pbo; | |||
| 758 | struct amdgpu_vm *vm = params->vm; | |||
| 759 | uint64_t pde, pt, flags; | |||
| 760 | unsigned int level; | |||
| 761 | ||||
| 762 | for (level = 0, pbo = bo->parent; pbo; ++level) | |||
| 763 | pbo = pbo->parent; | |||
| 764 | ||||
| 765 | level += params->adev->vm_manager.root_level; | |||
| 766 | amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); | |||
| 767 | pde = (entry - to_amdgpu_bo_vm(parent->bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((parent->bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_vm, bo) );})->entries) * 8; | |||
| 768 | return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((bo)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_vm, bo) );}), pde, pt, | |||
| 769 | 1, 0, flags); | |||
| 770 | } | |||
| 771 | ||||
| 772 | /* | |||
| 773 | * amdgpu_vm_pte_update_flags - figure out flags for PTE updates | |||
| 774 | * | |||
| 775 | * Make sure to set the right flags for the PTEs at the desired level. | |||
| 776 | */ | |||
| 777 | static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, | |||
| 778 | struct amdgpu_bo_vm *pt, | |||
| 779 | unsigned int level, | |||
| 780 | uint64_t pe, uint64_t addr, | |||
| 781 | unsigned int count, uint32_t incr, | |||
| 782 | uint64_t flags) | |||
| 783 | ||||
| 784 | { | |||
| 785 | if (level != AMDGPU_VM_PTB) { | |||
| 786 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
| 787 | amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags)(params->adev)->gmc.gmc_funcs->get_vm_pde((params-> adev), (level), (&addr), (&flags)); | |||
| 788 | ||||
| 789 | } else if (params->adev->asic_type >= CHIP_VEGA10 && | |||
| 790 | !(flags & AMDGPU_PTE_VALID(1ULL << 0)) && | |||
| 791 | !(flags & AMDGPU_PTE_PRT(1ULL << 51))) { | |||
| 792 | ||||
| 793 | /* Workaround for fault priority problem on GMC9 */ | |||
| 794 | flags |= AMDGPU_PTE_EXECUTABLE(1ULL << 4); | |||
| 795 | } | |||
| 796 | ||||
| 797 | params->vm->update_funcs->update(params, pt, pe, addr, count, incr, | |||
| 798 | flags); | |||
| 799 | } | |||
| 800 | ||||
| 801 | /** | |||
| 802 | * amdgpu_vm_pte_fragment - get fragment for PTEs | |||
| 803 | * | |||
| 804 | * @params: see amdgpu_vm_update_params definition | |||
| 805 | * @start: first PTE to handle | |||
| 806 | * @end: last PTE to handle | |||
| 807 | * @flags: hw mapping flags | |||
| 808 | * @frag: resulting fragment size | |||
| 809 | * @frag_end: end of this fragment | |||
| 810 | * | |||
| 811 | * Returns the first possible fragment for the start and end address. | |||
| 812 | */ | |||
| 813 | static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, | |||
| 814 | uint64_t start, uint64_t end, uint64_t flags, | |||
| 815 | unsigned int *frag, uint64_t *frag_end) | |||
| 816 | { | |||
| 817 | /** | |||
| 818 | * The MC L1 TLB supports variable sized pages, based on a fragment | |||
| 819 | * field in the PTE. When this field is set to a non-zero value, page | |||
| 820 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | |||
| 821 | * flags are considered valid for all PTEs within the fragment range | |||
| 822 | * and corresponding mappings are assumed to be physically contiguous. | |||
| 823 | * | |||
| 824 | * The L1 TLB can store a single PTE for the whole fragment, | |||
| 825 | * significantly increasing the space available for translation | |||
| 826 | * caching. This leads to large improvements in throughput when the | |||
| 827 | * TLB is under pressure. | |||
| 828 | * | |||
| 829 | * The L2 TLB distributes small and large fragments into two | |||
| 830 | * asymmetric partitions. The large fragment cache is significantly | |||
| 831 | * larger. Thus, we try to use large fragments wherever possible. | |||
| 832 | * Userspace can support this by aligning virtual base address and | |||
| 833 | * allocation size to the fragment size. | |||
| 834 | * | |||
| 835 | * Starting with Vega10 the fragment size only controls the L1. The L2 | |||
| 836 | * is now directly feed with small/huge/giant pages from the walker. | |||
| 837 | */ | |||
| 838 | unsigned int max_frag; | |||
| 839 | ||||
| 840 | if (params->adev->asic_type < CHIP_VEGA10) | |||
| 841 | max_frag = params->adev->vm_manager.fragment_size; | |||
| 842 | else | |||
| 843 | max_frag = 31; | |||
| 844 | ||||
| 845 | /* system pages are non continuously */ | |||
| 846 | if (params->pages_addr) { | |||
| 847 | *frag = 0; | |||
| 848 | *frag_end = end; | |||
| 849 | return; | |||
| 850 | } | |||
| 851 | ||||
| 852 | /* This intentionally wraps around if no bit is set */ | |||
| 853 | *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1)({ unsigned int __min_a = (ffs(start) - 1); unsigned int __min_b = (fls64(end - start) - 1); __min_a < __min_b ? __min_a : __min_b; }); | |||
| 854 | if (*frag >= max_frag) { | |||
| 855 | *frag = max_frag; | |||
| 856 | *frag_end = end & ~((1ULL << max_frag) - 1); | |||
| 857 | } else { | |||
| 858 | *frag_end = start + (1 << *frag); | |||
| 859 | } | |||
| 860 | } | |||
| 861 | ||||
| 862 | /** | |||
| 863 | * amdgpu_vm_ptes_update - make sure that page tables are valid | |||
| 864 | * | |||
| 865 | * @params: see amdgpu_vm_update_params definition | |||
| 866 | * @start: start of GPU address range | |||
| 867 | * @end: end of GPU address range | |||
| 868 | * @dst: destination address to map to, the next dst inside the function | |||
| 869 | * @flags: mapping flags | |||
| 870 | * | |||
| 871 | * Update the page tables in the range @start - @end. | |||
| 872 | * | |||
| 873 | * Returns: | |||
| 874 | * 0 for success, -EINVAL for failure. | |||
| 875 | */ | |||
| 876 | int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, | |||
| 877 | uint64_t start, uint64_t end, | |||
| 878 | uint64_t dst, uint64_t flags) | |||
| 879 | { | |||
| 880 | struct amdgpu_device *adev = params->adev; | |||
| 881 | struct amdgpu_vm_pt_cursor cursor; | |||
| 882 | uint64_t frag_start = start, frag_end; | |||
| 883 | unsigned int frag; | |||
| 884 | int r; | |||
| 885 | ||||
| 886 | /* figure out the initial fragment */ | |||
| 887 | amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, | |||
| 888 | &frag_end); | |||
| 889 | ||||
| 890 | /* walk over the address space and update the PTs */ | |||
| 891 | amdgpu_vm_pt_start(adev, params->vm, start, &cursor); | |||
| 892 | while (cursor.pfn < end) { | |||
| 893 | unsigned int shift, parent_shift, mask; | |||
| 894 | uint64_t incr, entry_end, pe_start; | |||
| 895 | struct amdgpu_bo *pt; | |||
| 896 | ||||
| 897 | if (!params->unlocked) { | |||
| 898 | /* make sure that the page tables covering the | |||
| 899 | * address range are actually allocated | |||
| 900 | */ | |||
| 901 | r = amdgpu_vm_pt_alloc(params->adev, params->vm, | |||
| 902 | &cursor, params->immediate); | |||
| 903 | if (r) | |||
| 904 | return r; | |||
| 905 | } | |||
| 906 | ||||
| 907 | shift = amdgpu_vm_pt_level_shift(adev, cursor.level); | |||
| 908 | parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); | |||
| 909 | if (params->unlocked) { | |||
| 910 | /* Unlocked updates are only allowed on the leaves */ | |||
| 911 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |||
| 912 | continue; | |||
| 913 | } else if (adev->asic_type < CHIP_VEGA10 && | |||
| 914 | (flags & AMDGPU_PTE_VALID(1ULL << 0))) { | |||
| 915 | /* No huge page support before GMC v9 */ | |||
| 916 | if (cursor.level != AMDGPU_VM_PTB) { | |||
| 917 | if (!amdgpu_vm_pt_descendant(adev, &cursor)) | |||
| 918 | return -ENOENT2; | |||
| 919 | continue; | |||
| 920 | } | |||
| 921 | } else if (frag < shift) { | |||
| 922 | /* We can't use this level when the fragment size is | |||
| 923 | * smaller than the address shift. Go to the next | |||
| 924 | * child entry and try again. | |||
| 925 | */ | |||
| 926 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |||
| 927 | continue; | |||
| 928 | } else if (frag >= parent_shift) { | |||
| 929 | /* If the fragment size is even larger than the parent | |||
| 930 | * shift we should go up one level and check it again. | |||
| 931 | */ | |||
| 932 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |||
| 933 | return -EINVAL22; | |||
| 934 | continue; | |||
| 935 | } | |||
| 936 | ||||
| 937 | pt = cursor.entry->bo; | |||
| 938 | if (!pt) { | |||
| 939 | /* We need all PDs and PTs for mapping something, */ | |||
| 940 | if (flags & AMDGPU_PTE_VALID(1ULL << 0)) | |||
| 941 | return -ENOENT2; | |||
| 942 | ||||
| 943 | /* but unmapping something can happen at a higher | |||
| 944 | * level. | |||
| 945 | */ | |||
| 946 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |||
| 947 | return -EINVAL22; | |||
| 948 | ||||
| 949 | pt = cursor.entry->bo; | |||
| 950 | shift = parent_shift; | |||
| 951 | frag_end = max(frag_end, roundup2(frag_start + 1,(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))) | |||
| 952 | 1ULL << shift))(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))); | |||
| 953 | } | |||
| 954 | ||||
| 955 | /* Looks good so far, calculate parameters for the update */ | |||
| 956 | incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE4096 << shift; | |||
| 957 | mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); | |||
| 958 | pe_start = ((cursor.pfn >> shift) & mask) * 8; | |||
| 959 | entry_end = ((uint64_t)mask + 1) << shift; | |||
| 960 | entry_end += cursor.pfn & ~(entry_end - 1); | |||
| 961 | entry_end = min(entry_end, end)(((entry_end)<(end))?(entry_end):(end)); | |||
| 962 | ||||
| 963 | do { | |||
| 964 | struct amdgpu_vm *vm = params->vm; | |||
| 965 | uint64_t upd_end = min(entry_end, frag_end)(((entry_end)<(frag_end))?(entry_end):(frag_end)); | |||
| 966 | unsigned int nptes = (upd_end - frag_start) >> shift; | |||
| 967 | uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag)((frag & 0x1fULL) << 7); | |||
| 968 | ||||
| 969 | /* This can happen when we set higher level PDs to | |||
| 970 | * silent to stop fault floods. | |||
| 971 | */ | |||
| 972 | nptes = max(nptes, 1u)(((nptes)>(1u))?(nptes):(1u)); | |||
| 973 | ||||
| 974 | trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, | |||
| 975 | min(nptes, 32u)(((nptes)<(32u))?(nptes):(32u)), dst, incr, | |||
| 976 | upd_flags, | |||
| 977 | vm->task_info.tgid, | |||
| 978 | vm->immediate.fence_context); | |||
| 979 | amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt)({ const __typeof( ((struct amdgpu_bo_vm *)0)->bo ) *__mptr = ((pt)); (struct amdgpu_bo_vm *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_vm, bo) );}), | |||
| 980 | cursor.level, pe_start, dst, | |||
| 981 | nptes, incr, upd_flags); | |||
| 982 | ||||
| 983 | pe_start += nptes * 8; | |||
| 984 | dst += nptes * incr; | |||
| 985 | ||||
| 986 | frag_start = upd_end; | |||
| 987 | if (frag_start >= frag_end) { | |||
| 988 | /* figure out the next fragment */ | |||
| 989 | amdgpu_vm_pte_fragment(params, frag_start, end, | |||
| 990 | flags, &frag, &frag_end); | |||
| 991 | if (frag < shift) | |||
| 992 | break; | |||
| 993 | } | |||
| 994 | } while (frag_start < entry_end); | |||
| 995 | ||||
| 996 | if (amdgpu_vm_pt_descendant(adev, &cursor)) { | |||
| 997 | /* Free all child entries. | |||
| 998 | * Update the tables with the flags and addresses and free up subsequent | |||
| 999 | * tables in the case of huge pages or freed up areas. | |||
| 1000 | * This is the maximum you can free, because all other page tables are not | |||
| 1001 | * completely covered by the range and so potentially still in use. | |||
| 1002 | */ | |||
| 1003 | while (cursor.pfn < frag_start) { | |||
| 1004 | /* Make sure previous mapping is freed */ | |||
| 1005 | if (cursor.entry->bo) { | |||
| 1006 | params->table_freed = true1; | |||
| 1007 | amdgpu_vm_pt_free_dfs(adev, params->vm, | |||
| 1008 | &cursor, | |||
| 1009 | params->unlocked); | |||
| 1010 | } | |||
| 1011 | amdgpu_vm_pt_next(adev, &cursor); | |||
| 1012 | } | |||
| 1013 | ||||
| 1014 | } else if (frag >= shift) { | |||
| 1015 | /* or just move on to the next on the same level. */ | |||
| 1016 | amdgpu_vm_pt_next(adev, &cursor); | |||
| 1017 | } | |||
| 1018 | } | |||
| 1019 | ||||
| 1020 | return 0; | |||
| 1021 | } |