| File: | dev/pci/drm/amd/amdgpu/amdgpu_vm.c |
| Warning: | line 222, column 10 The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long long' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | |||
| 2 | * Copyright 2008 Advanced Micro Devices, Inc. | |||
| 3 | * Copyright 2008 Red Hat Inc. | |||
| 4 | * Copyright 2009 Jerome Glisse. | |||
| 5 | * | |||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
| 7 | * copy of this software and associated documentation files (the "Software"), | |||
| 8 | * to deal in the Software without restriction, including without limitation | |||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | |||
| 11 | * Software is furnished to do so, subject to the following conditions: | |||
| 12 | * | |||
| 13 | * The above copyright notice and this permission notice shall be included in | |||
| 14 | * all copies or substantial portions of the Software. | |||
| 15 | * | |||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
| 19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
| 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
| 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
| 22 | * OTHER DEALINGS IN THE SOFTWARE. | |||
| 23 | * | |||
| 24 | * Authors: Dave Airlie | |||
| 25 | * Alex Deucher | |||
| 26 | * Jerome Glisse | |||
| 27 | */ | |||
| 28 | #include <linux/dma-fence-array.h> | |||
| 29 | #include <linux/interval_tree_generic.h> | |||
| 30 | #include <linux/idr.h> | |||
| 31 | #include <linux/dma-buf.h> | |||
| 32 | ||||
| 33 | #include <drm/amdgpu_drm.h> | |||
| 34 | #include "amdgpu.h" | |||
| 35 | #include "amdgpu_trace.h" | |||
| 36 | #include "amdgpu_amdkfd.h" | |||
| 37 | #include "amdgpu_gmc.h" | |||
| 38 | #include "amdgpu_xgmi.h" | |||
| 39 | #include "amdgpu_dma_buf.h" | |||
| 40 | ||||
| 41 | /** | |||
| 42 | * DOC: GPUVM | |||
| 43 | * | |||
| 44 | * GPUVM is similar to the legacy gart on older asics, however | |||
| 45 | * rather than there being a single global gart table | |||
| 46 | * for the entire GPU, there are multiple VM page tables active | |||
| 47 | * at any given time. The VM page tables can contain a mix | |||
| 48 | * vram pages and system memory pages and system memory pages | |||
| 49 | * can be mapped as snooped (cached system pages) or unsnooped | |||
| 50 | * (uncached system pages). | |||
| 51 | * Each VM has an ID associated with it and there is a page table | |||
| 52 | * associated with each VMID. When execting a command buffer, | |||
| 53 | * the kernel tells the the ring what VMID to use for that command | |||
| 54 | * buffer. VMIDs are allocated dynamically as commands are submitted. | |||
| 55 | * The userspace drivers maintain their own address space and the kernel | |||
| 56 | * sets up their pages tables accordingly when they submit their | |||
| 57 | * command buffers and a VMID is assigned. | |||
| 58 | * Cayman/Trinity support up to 8 active VMs at any given time; | |||
| 59 | * SI supports 16. | |||
| 60 | */ | |||
| 61 | ||||
| 62 | #define START(node) ((node)->start) | |||
| 63 | #define LAST(node) ((node)->last) | |||
| 64 | ||||
| 65 | #ifdef __linux__ | |||
| 66 | INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, | |||
| 67 | START, LAST, static, amdgpu_vm_it) | |||
| 68 | #else | |||
| 69 | static struct amdgpu_bo_va_mapping * | |||
| 70 | amdgpu_vm_it_iter_first(struct rb_root_cached *root, uint64_t start, | |||
| 71 | uint64_t last) | |||
| 72 | { | |||
| 73 | struct amdgpu_bo_va_mapping *node; | |||
| 74 | struct rb_node *rb; | |||
| 75 | ||||
| 76 | for (rb = rb_first_cached(root)linux_root_RB_MINMAX((struct linux_root *)(&(root)->rb_root ), -1); rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) { | |||
| 77 | node = rb_entry(rb, typeof(*node), rb)({ const __typeof( ((typeof(*node) *)0)->rb ) *__mptr = (rb ); (typeof(*node) *)( (char *)__mptr - __builtin_offsetof(typeof (*node), rb) );}); | |||
| 78 | if (LAST(node) >= start && START(node) <= last) | |||
| 79 | return node; | |||
| 80 | } | |||
| 81 | return NULL((void *)0); | |||
| 82 | } | |||
| 83 | ||||
| 84 | static struct amdgpu_bo_va_mapping * | |||
| 85 | amdgpu_vm_it_iter_next(struct amdgpu_bo_va_mapping *node, uint64_t start, | |||
| 86 | uint64_t last) | |||
| 87 | { | |||
| 88 | STUB()do { printf("%s: stub\n", __func__); } while(0); | |||
| 89 | struct rb_node *rb = &node->rb; | |||
| 90 | ||||
| 91 | for (rb = rb_next(rb)linux_root_RB_NEXT((rb)); rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) { | |||
| 92 | node = rb_entry(rb, typeof(*node), rb)({ const __typeof( ((typeof(*node) *)0)->rb ) *__mptr = (rb ); (typeof(*node) *)( (char *)__mptr - __builtin_offsetof(typeof (*node), rb) );}); | |||
| 93 | if (LAST(node) >= start && START(node) <= last) | |||
| 94 | return node; | |||
| 95 | } | |||
| 96 | return NULL((void *)0); | |||
| 97 | } | |||
| 98 | ||||
| 99 | static void | |||
| 100 | amdgpu_vm_it_remove(struct amdgpu_bo_va_mapping *node, | |||
| 101 | struct rb_root_cached *root) | |||
| 102 | { | |||
| 103 | rb_erase_cached(&node->rb, root)linux_root_RB_REMOVE((struct linux_root *)(&(root)->rb_root ), (&node->rb)); | |||
| 104 | } | |||
| 105 | ||||
| 106 | static void | |||
| 107 | amdgpu_vm_it_insert(struct amdgpu_bo_va_mapping *node, | |||
| 108 | struct rb_root_cached *root) | |||
| 109 | { | |||
| 110 | struct rb_node **iter = &root->rb_root.rb_node; | |||
| 111 | struct rb_node *parent = NULL((void *)0); | |||
| 112 | struct amdgpu_bo_va_mapping *iter_node; | |||
| 113 | ||||
| 114 | while (*iter) { | |||
| 115 | parent = *iter; | |||
| 116 | iter_node = rb_entry(*iter, struct amdgpu_bo_va_mapping, rb)({ const __typeof( ((struct amdgpu_bo_va_mapping *)0)->rb ) *__mptr = (*iter); (struct amdgpu_bo_va_mapping *)( (char *) __mptr - __builtin_offsetof(struct amdgpu_bo_va_mapping, rb) ) ;}); | |||
| 117 | ||||
| 118 | if (node->start < iter_node->start) | |||
| 119 | iter = &(*iter)->rb_left__entry.rbe_left; | |||
| 120 | else | |||
| 121 | iter = &(*iter)->rb_right__entry.rbe_right; | |||
| 122 | } | |||
| 123 | ||||
| 124 | rb_link_node(&node->rb, parent, iter); | |||
| 125 | rb_insert_color_cached(&node->rb, root, false)linux_root_RB_INSERT_COLOR((struct linux_root *)(&(root)-> rb_root), (&node->rb)); | |||
| 126 | } | |||
| 127 | #endif | |||
| 128 | ||||
| 129 | #undef START | |||
| 130 | #undef LAST | |||
| 131 | ||||
| 132 | /** | |||
| 133 | * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback | |||
| 134 | */ | |||
| 135 | struct amdgpu_prt_cb { | |||
| 136 | ||||
| 137 | /** | |||
| 138 | * @adev: amdgpu device | |||
| 139 | */ | |||
| 140 | struct amdgpu_device *adev; | |||
| 141 | ||||
| 142 | /** | |||
| 143 | * @cb: callback | |||
| 144 | */ | |||
| 145 | struct dma_fence_cb cb; | |||
| 146 | }; | |||
| 147 | ||||
| 148 | /* | |||
| 149 | * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS | |||
| 150 | * happens while holding this lock anywhere to prevent deadlocks when | |||
| 151 | * an MMU notifier runs in reclaim-FS context. | |||
| 152 | */ | |||
| 153 | static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) | |||
| 154 | { | |||
| 155 | mutex_lock(&vm->eviction_lock)rw_enter_write(&vm->eviction_lock); | |||
| 156 | #ifdef notyet | |||
| 157 | vm->saved_flags = memalloc_nofs_save(); | |||
| 158 | #endif | |||
| 159 | } | |||
| 160 | ||||
| 161 | static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) | |||
| 162 | { | |||
| 163 | if (mutex_trylock(&vm->eviction_lock)(rw_enter(&vm->eviction_lock, 0x0001UL | 0x0040UL) == 0 )) { | |||
| 164 | #ifdef notyet | |||
| 165 | vm->saved_flags = memalloc_nofs_save(); | |||
| 166 | #endif | |||
| 167 | return 1; | |||
| 168 | } | |||
| 169 | return 0; | |||
| 170 | } | |||
| 171 | ||||
| 172 | static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) | |||
| 173 | { | |||
| 174 | #ifdef notyet | |||
| 175 | memalloc_nofs_restore(vm->saved_flags); | |||
| 176 | #endif | |||
| 177 | mutex_unlock(&vm->eviction_lock)rw_exit_write(&vm->eviction_lock); | |||
| 178 | } | |||
| 179 | ||||
| 180 | /** | |||
| 181 | * amdgpu_vm_level_shift - return the addr shift for each level | |||
| 182 | * | |||
| 183 | * @adev: amdgpu_device pointer | |||
| 184 | * @level: VMPT level | |||
| 185 | * | |||
| 186 | * Returns: | |||
| 187 | * The number of bits the pfn needs to be right shifted for a level. | |||
| 188 | */ | |||
| 189 | static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, | |||
| 190 | unsigned level) | |||
| 191 | { | |||
| 192 | switch (level) { | |||
| 193 | case AMDGPU_VM_PDB2: | |||
| 194 | case AMDGPU_VM_PDB1: | |||
| 195 | case AMDGPU_VM_PDB0: | |||
| 196 | return 9 * (AMDGPU_VM_PDB0 - level) + | |||
| 197 | adev->vm_manager.block_size; | |||
| 198 | case AMDGPU_VM_PTB: | |||
| 199 | return 0; | |||
| 200 | default: | |||
| 201 | return ~0; | |||
| 202 | } | |||
| 203 | } | |||
| 204 | ||||
| 205 | /** | |||
| 206 | * amdgpu_vm_num_entries - return the number of entries in a PD/PT | |||
| 207 | * | |||
| 208 | * @adev: amdgpu_device pointer | |||
| 209 | * @level: VMPT level | |||
| 210 | * | |||
| 211 | * Returns: | |||
| 212 | * The number of entries in a page directory or page table. | |||
| 213 | */ | |||
| 214 | static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, | |||
| 215 | unsigned level) | |||
| 216 | { | |||
| 217 | unsigned shift = amdgpu_vm_level_shift(adev, | |||
| 218 | adev->vm_manager.root_level); | |||
| 219 | ||||
| 220 | if (level
| |||
| 221 | /* For the root directory */ | |||
| 222 | return round_up(adev->vm_manager.max_pfn, 1ULL << shift)((((adev->vm_manager.max_pfn) + ((1ULL << shift) - 1 )) / (1ULL << shift)) * (1ULL << shift)) | |||
| ||||
| 223 | >> shift; | |||
| 224 | else if (level != AMDGPU_VM_PTB) | |||
| 225 | /* Everything in between */ | |||
| 226 | return 512; | |||
| 227 | else | |||
| 228 | /* For the page tables on the leaves */ | |||
| 229 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size); | |||
| 230 | } | |||
| 231 | ||||
| 232 | /** | |||
| 233 | * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD | |||
| 234 | * | |||
| 235 | * @adev: amdgpu_device pointer | |||
| 236 | * | |||
| 237 | * Returns: | |||
| 238 | * The number of entries in the root page directory which needs the ATS setting. | |||
| 239 | */ | |||
| 240 | static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) | |||
| 241 | { | |||
| 242 | unsigned shift; | |||
| 243 | ||||
| 244 | shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); | |||
| 245 | return AMDGPU_GMC_HOLE_START0x0000800000000000ULL >> (shift + AMDGPU_GPU_PAGE_SHIFT12); | |||
| 246 | } | |||
| 247 | ||||
| 248 | /** | |||
| 249 | * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT | |||
| 250 | * | |||
| 251 | * @adev: amdgpu_device pointer | |||
| 252 | * @level: VMPT level | |||
| 253 | * | |||
| 254 | * Returns: | |||
| 255 | * The mask to extract the entry number of a PD/PT from an address. | |||
| 256 | */ | |||
| 257 | static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, | |||
| 258 | unsigned int level) | |||
| 259 | { | |||
| 260 | if (level <= adev->vm_manager.root_level) | |||
| 261 | return 0xffffffff; | |||
| 262 | else if (level != AMDGPU_VM_PTB) | |||
| 263 | return 0x1ff; | |||
| 264 | else | |||
| 265 | return AMDGPU_VM_PTE_COUNT(adev)(1 << (adev)->vm_manager.block_size) - 1; | |||
| 266 | } | |||
| 267 | ||||
| 268 | /** | |||
| 269 | * amdgpu_vm_bo_size - returns the size of the BOs in bytes | |||
| 270 | * | |||
| 271 | * @adev: amdgpu_device pointer | |||
| 272 | * @level: VMPT level | |||
| 273 | * | |||
| 274 | * Returns: | |||
| 275 | * The size of the BO for a page directory or page table in bytes. | |||
| 276 | */ | |||
| 277 | static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) | |||
| 278 | { | |||
| 279 | return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8)(((amdgpu_vm_num_entries(adev, level) * 8) + (4096 - 1)) & ~(4096 - 1)); | |||
| 280 | } | |||
| 281 | ||||
| 282 | /** | |||
| 283 | * amdgpu_vm_bo_evicted - vm_bo is evicted | |||
| 284 | * | |||
| 285 | * @vm_bo: vm_bo which is evicted | |||
| 286 | * | |||
| 287 | * State for PDs/PTs and per VM BOs which are not at the location they should | |||
| 288 | * be. | |||
| 289 | */ | |||
| 290 | static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) | |||
| 291 | { | |||
| 292 | struct amdgpu_vm *vm = vm_bo->vm; | |||
| 293 | struct amdgpu_bo *bo = vm_bo->bo; | |||
| 294 | ||||
| 295 | vm_bo->moved = true1; | |||
| 296 | if (bo->tbo.type == ttm_bo_type_kernel) | |||
| 297 | list_move(&vm_bo->vm_status, &vm->evicted); | |||
| 298 | else | |||
| 299 | list_move_tail(&vm_bo->vm_status, &vm->evicted); | |||
| 300 | } | |||
| 301 | /** | |||
| 302 | * amdgpu_vm_bo_moved - vm_bo is moved | |||
| 303 | * | |||
| 304 | * @vm_bo: vm_bo which is moved | |||
| 305 | * | |||
| 306 | * State for per VM BOs which are moved, but that change is not yet reflected | |||
| 307 | * in the page tables. | |||
| 308 | */ | |||
| 309 | static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) | |||
| 310 | { | |||
| 311 | list_move(&vm_bo->vm_status, &vm_bo->vm->moved); | |||
| 312 | } | |||
| 313 | ||||
| 314 | /** | |||
| 315 | * amdgpu_vm_bo_idle - vm_bo is idle | |||
| 316 | * | |||
| 317 | * @vm_bo: vm_bo which is now idle | |||
| 318 | * | |||
| 319 | * State for PDs/PTs and per VM BOs which have gone through the state machine | |||
| 320 | * and are now idle. | |||
| 321 | */ | |||
| 322 | static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) | |||
| 323 | { | |||
| 324 | list_move(&vm_bo->vm_status, &vm_bo->vm->idle); | |||
| 325 | vm_bo->moved = false0; | |||
| 326 | } | |||
| 327 | ||||
| 328 | /** | |||
| 329 | * amdgpu_vm_bo_invalidated - vm_bo is invalidated | |||
| 330 | * | |||
| 331 | * @vm_bo: vm_bo which is now invalidated | |||
| 332 | * | |||
| 333 | * State for normal BOs which are invalidated and that change not yet reflected | |||
| 334 | * in the PTs. | |||
| 335 | */ | |||
| 336 | static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) | |||
| 337 | { | |||
| 338 | spin_lock(&vm_bo->vm->invalidated_lock)mtx_enter(&vm_bo->vm->invalidated_lock); | |||
| 339 | list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); | |||
| 340 | spin_unlock(&vm_bo->vm->invalidated_lock)mtx_leave(&vm_bo->vm->invalidated_lock); | |||
| 341 | } | |||
| 342 | ||||
| 343 | /** | |||
| 344 | * amdgpu_vm_bo_relocated - vm_bo is reloacted | |||
| 345 | * | |||
| 346 | * @vm_bo: vm_bo which is relocated | |||
| 347 | * | |||
| 348 | * State for PDs/PTs which needs to update their parent PD. | |||
| 349 | * For the root PD, just move to idle state. | |||
| 350 | */ | |||
| 351 | static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) | |||
| 352 | { | |||
| 353 | if (vm_bo->bo->parent) | |||
| 354 | list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); | |||
| 355 | else | |||
| 356 | amdgpu_vm_bo_idle(vm_bo); | |||
| 357 | } | |||
| 358 | ||||
| 359 | /** | |||
| 360 | * amdgpu_vm_bo_done - vm_bo is done | |||
| 361 | * | |||
| 362 | * @vm_bo: vm_bo which is now done | |||
| 363 | * | |||
| 364 | * State for normal BOs which are invalidated and that change has been updated | |||
| 365 | * in the PTs. | |||
| 366 | */ | |||
| 367 | static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) | |||
| 368 | { | |||
| 369 | spin_lock(&vm_bo->vm->invalidated_lock)mtx_enter(&vm_bo->vm->invalidated_lock); | |||
| 370 | list_del_init(&vm_bo->vm_status); | |||
| 371 | spin_unlock(&vm_bo->vm->invalidated_lock)mtx_leave(&vm_bo->vm->invalidated_lock); | |||
| 372 | } | |||
| 373 | ||||
| 374 | /** | |||
| 375 | * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm | |||
| 376 | * | |||
| 377 | * @base: base structure for tracking BO usage in a VM | |||
| 378 | * @vm: vm to which bo is to be added | |||
| 379 | * @bo: amdgpu buffer object | |||
| 380 | * | |||
| 381 | * Initialize a bo_va_base structure and add it to the appropriate lists | |||
| 382 | * | |||
| 383 | */ | |||
| 384 | static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, | |||
| 385 | struct amdgpu_vm *vm, | |||
| 386 | struct amdgpu_bo *bo) | |||
| 387 | { | |||
| 388 | base->vm = vm; | |||
| 389 | base->bo = bo; | |||
| 390 | base->next = NULL((void *)0); | |||
| 391 | INIT_LIST_HEAD(&base->vm_status); | |||
| 392 | ||||
| 393 | if (!bo) | |||
| 394 | return; | |||
| 395 | base->next = bo->vm_bo; | |||
| 396 | bo->vm_bo = base; | |||
| 397 | ||||
| 398 | if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) | |||
| 399 | return; | |||
| 400 | ||||
| 401 | vm->bulk_moveable = false0; | |||
| 402 | if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) | |||
| 403 | amdgpu_vm_bo_relocated(base); | |||
| 404 | else | |||
| 405 | amdgpu_vm_bo_idle(base); | |||
| 406 | ||||
| 407 | if (bo->preferred_domains & | |||
| 408 | amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) | |||
| 409 | return; | |||
| 410 | ||||
| 411 | /* | |||
| 412 | * we checked all the prerequisites, but it looks like this per vm bo | |||
| 413 | * is currently evicted. add the bo to the evicted list to make sure it | |||
| 414 | * is validated on next vm use to avoid fault. | |||
| 415 | * */ | |||
| 416 | amdgpu_vm_bo_evicted(base); | |||
| 417 | } | |||
| 418 | ||||
| 419 | /** | |||
| 420 | * amdgpu_vm_pt_parent - get the parent page directory | |||
| 421 | * | |||
| 422 | * @pt: child page table | |||
| 423 | * | |||
| 424 | * Helper to get the parent entry for the child page table. NULL if we are at | |||
| 425 | * the root page directory. | |||
| 426 | */ | |||
| 427 | static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) | |||
| 428 | { | |||
| 429 | struct amdgpu_bo *parent = pt->base.bo->parent; | |||
| 430 | ||||
| 431 | if (!parent) | |||
| 432 | return NULL((void *)0); | |||
| 433 | ||||
| 434 | return container_of(parent->vm_bo, struct amdgpu_vm_pt, base)({ const __typeof( ((struct amdgpu_vm_pt *)0)->base ) *__mptr = (parent->vm_bo); (struct amdgpu_vm_pt *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_vm_pt, base) );}); | |||
| 435 | } | |||
| 436 | ||||
| 437 | /* | |||
| 438 | * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt | |||
| 439 | */ | |||
| 440 | struct amdgpu_vm_pt_cursor { | |||
| 441 | uint64_t pfn; | |||
| 442 | struct amdgpu_vm_pt *parent; | |||
| 443 | struct amdgpu_vm_pt *entry; | |||
| 444 | unsigned level; | |||
| 445 | }; | |||
| 446 | ||||
| 447 | /** | |||
| 448 | * amdgpu_vm_pt_start - start PD/PT walk | |||
| 449 | * | |||
| 450 | * @adev: amdgpu_device pointer | |||
| 451 | * @vm: amdgpu_vm structure | |||
| 452 | * @start: start address of the walk | |||
| 453 | * @cursor: state to initialize | |||
| 454 | * | |||
| 455 | * Initialize a amdgpu_vm_pt_cursor to start a walk. | |||
| 456 | */ | |||
| 457 | static void amdgpu_vm_pt_start(struct amdgpu_device *adev, | |||
| 458 | struct amdgpu_vm *vm, uint64_t start, | |||
| 459 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 460 | { | |||
| 461 | cursor->pfn = start; | |||
| 462 | cursor->parent = NULL((void *)0); | |||
| 463 | cursor->entry = &vm->root; | |||
| 464 | cursor->level = adev->vm_manager.root_level; | |||
| 465 | } | |||
| 466 | ||||
| 467 | /** | |||
| 468 | * amdgpu_vm_pt_descendant - go to child node | |||
| 469 | * | |||
| 470 | * @adev: amdgpu_device pointer | |||
| 471 | * @cursor: current state | |||
| 472 | * | |||
| 473 | * Walk to the child node of the current node. | |||
| 474 | * Returns: | |||
| 475 | * True if the walk was possible, false otherwise. | |||
| 476 | */ | |||
| 477 | static bool_Bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, | |||
| 478 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 479 | { | |||
| 480 | unsigned mask, shift, idx; | |||
| 481 | ||||
| 482 | if (!cursor->entry->entries) | |||
| 483 | return false0; | |||
| 484 | ||||
| 485 | BUG_ON(!cursor->entry->base.bo)((!(!cursor->entry->base.bo)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm.c", 485, "!(!cursor->entry->base.bo)" )); | |||
| 486 | mask = amdgpu_vm_entries_mask(adev, cursor->level); | |||
| 487 | shift = amdgpu_vm_level_shift(adev, cursor->level); | |||
| 488 | ||||
| 489 | ++cursor->level; | |||
| 490 | idx = (cursor->pfn >> shift) & mask; | |||
| 491 | cursor->parent = cursor->entry; | |||
| 492 | cursor->entry = &cursor->entry->entries[idx]; | |||
| 493 | return true1; | |||
| 494 | } | |||
| 495 | ||||
| 496 | /** | |||
| 497 | * amdgpu_vm_pt_sibling - go to sibling node | |||
| 498 | * | |||
| 499 | * @adev: amdgpu_device pointer | |||
| 500 | * @cursor: current state | |||
| 501 | * | |||
| 502 | * Walk to the sibling node of the current node. | |||
| 503 | * Returns: | |||
| 504 | * True if the walk was possible, false otherwise. | |||
| 505 | */ | |||
| 506 | static bool_Bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, | |||
| 507 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 508 | { | |||
| 509 | unsigned shift, num_entries; | |||
| 510 | ||||
| 511 | /* Root doesn't have a sibling */ | |||
| 512 | if (!cursor->parent) | |||
| 513 | return false0; | |||
| 514 | ||||
| 515 | /* Go to our parents and see if we got a sibling */ | |||
| 516 | shift = amdgpu_vm_level_shift(adev, cursor->level - 1); | |||
| 517 | num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); | |||
| 518 | ||||
| 519 | if (cursor->entry == &cursor->parent->entries[num_entries - 1]) | |||
| 520 | return false0; | |||
| 521 | ||||
| 522 | cursor->pfn += 1ULL << shift; | |||
| 523 | cursor->pfn &= ~((1ULL << shift) - 1); | |||
| 524 | ++cursor->entry; | |||
| 525 | return true1; | |||
| 526 | } | |||
| 527 | ||||
| 528 | /** | |||
| 529 | * amdgpu_vm_pt_ancestor - go to parent node | |||
| 530 | * | |||
| 531 | * @cursor: current state | |||
| 532 | * | |||
| 533 | * Walk to the parent node of the current node. | |||
| 534 | * Returns: | |||
| 535 | * True if the walk was possible, false otherwise. | |||
| 536 | */ | |||
| 537 | static bool_Bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) | |||
| 538 | { | |||
| 539 | if (!cursor->parent) | |||
| 540 | return false0; | |||
| 541 | ||||
| 542 | --cursor->level; | |||
| 543 | cursor->entry = cursor->parent; | |||
| 544 | cursor->parent = amdgpu_vm_pt_parent(cursor->parent); | |||
| 545 | return true1; | |||
| 546 | } | |||
| 547 | ||||
| 548 | /** | |||
| 549 | * amdgpu_vm_pt_next - get next PD/PT in hieratchy | |||
| 550 | * | |||
| 551 | * @adev: amdgpu_device pointer | |||
| 552 | * @cursor: current state | |||
| 553 | * | |||
| 554 | * Walk the PD/PT tree to the next node. | |||
| 555 | */ | |||
| 556 | static void amdgpu_vm_pt_next(struct amdgpu_device *adev, | |||
| 557 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 558 | { | |||
| 559 | /* First try a newborn child */ | |||
| 560 | if (amdgpu_vm_pt_descendant(adev, cursor)) | |||
| 561 | return; | |||
| 562 | ||||
| 563 | /* If that didn't worked try to find a sibling */ | |||
| 564 | while (!amdgpu_vm_pt_sibling(adev, cursor)) { | |||
| 565 | /* No sibling, go to our parents and grandparents */ | |||
| 566 | if (!amdgpu_vm_pt_ancestor(cursor)) { | |||
| 567 | cursor->pfn = ~0ll; | |||
| 568 | return; | |||
| 569 | } | |||
| 570 | } | |||
| 571 | } | |||
| 572 | ||||
| 573 | /** | |||
| 574 | * amdgpu_vm_pt_first_dfs - start a deep first search | |||
| 575 | * | |||
| 576 | * @adev: amdgpu_device structure | |||
| 577 | * @vm: amdgpu_vm structure | |||
| 578 | * @start: optional cursor to start with | |||
| 579 | * @cursor: state to initialize | |||
| 580 | * | |||
| 581 | * Starts a deep first traversal of the PD/PT tree. | |||
| 582 | */ | |||
| 583 | static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, | |||
| 584 | struct amdgpu_vm *vm, | |||
| 585 | struct amdgpu_vm_pt_cursor *start, | |||
| 586 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 587 | { | |||
| 588 | if (start) | |||
| 589 | *cursor = *start; | |||
| 590 | else | |||
| 591 | amdgpu_vm_pt_start(adev, vm, 0, cursor); | |||
| 592 | while (amdgpu_vm_pt_descendant(adev, cursor)); | |||
| 593 | } | |||
| 594 | ||||
| 595 | /** | |||
| 596 | * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue | |||
| 597 | * | |||
| 598 | * @start: starting point for the search | |||
| 599 | * @entry: current entry | |||
| 600 | * | |||
| 601 | * Returns: | |||
| 602 | * True when the search should continue, false otherwise. | |||
| 603 | */ | |||
| 604 | static bool_Bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, | |||
| 605 | struct amdgpu_vm_pt *entry) | |||
| 606 | { | |||
| 607 | return entry && (!start || entry != start->entry); | |||
| 608 | } | |||
| 609 | ||||
| 610 | /** | |||
| 611 | * amdgpu_vm_pt_next_dfs - get the next node for a deep first search | |||
| 612 | * | |||
| 613 | * @adev: amdgpu_device structure | |||
| 614 | * @cursor: current state | |||
| 615 | * | |||
| 616 | * Move the cursor to the next node in a deep first search. | |||
| 617 | */ | |||
| 618 | static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, | |||
| 619 | struct amdgpu_vm_pt_cursor *cursor) | |||
| 620 | { | |||
| 621 | if (!cursor->entry) | |||
| 622 | return; | |||
| 623 | ||||
| 624 | if (!cursor->parent) | |||
| 625 | cursor->entry = NULL((void *)0); | |||
| 626 | else if (amdgpu_vm_pt_sibling(adev, cursor)) | |||
| 627 | while (amdgpu_vm_pt_descendant(adev, cursor)); | |||
| 628 | else | |||
| 629 | amdgpu_vm_pt_ancestor(cursor); | |||
| 630 | } | |||
| 631 | ||||
| 632 | /* | |||
| 633 | * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs | |||
| 634 | */ | |||
| 635 | #define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) \ | |||
| 636 | for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ | |||
| 637 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ | |||
| 638 | amdgpu_vm_pt_continue_dfs((start), (entry)); \ | |||
| 639 | (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) | |||
| 640 | ||||
| 641 | /** | |||
| 642 | * amdgpu_vm_get_pd_bo - add the VM PD to a validation list | |||
| 643 | * | |||
| 644 | * @vm: vm providing the BOs | |||
| 645 | * @validated: head of validation list | |||
| 646 | * @entry: entry to add | |||
| 647 | * | |||
| 648 | * Add the page directory to the list of BOs to | |||
| 649 | * validate for command submission. | |||
| 650 | */ | |||
| 651 | void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, | |||
| 652 | struct list_head *validated, | |||
| 653 | struct amdgpu_bo_list_entry *entry) | |||
| 654 | { | |||
| 655 | entry->priority = 0; | |||
| 656 | entry->tv.bo = &vm->root.base.bo->tbo; | |||
| 657 | /* Two for VM updates, one for TTM and one for the CS job */ | |||
| 658 | entry->tv.num_shared = 4; | |||
| 659 | entry->user_pages = NULL((void *)0); | |||
| 660 | list_add(&entry->tv.head, validated); | |||
| 661 | } | |||
| 662 | ||||
| 663 | /** | |||
| 664 | * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag | |||
| 665 | * | |||
| 666 | * @bo: BO which was removed from the LRU | |||
| 667 | * | |||
| 668 | * Make sure the bulk_moveable flag is updated when a BO is removed from the | |||
| 669 | * LRU. | |||
| 670 | */ | |||
| 671 | void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) | |||
| 672 | { | |||
| 673 | struct amdgpu_bo *abo; | |||
| 674 | struct amdgpu_vm_bo_base *bo_base; | |||
| 675 | ||||
| 676 | if (!amdgpu_bo_is_amdgpu_bo(bo)) | |||
| 677 | return; | |||
| 678 | ||||
| 679 | if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT(1 << 21)) | |||
| 680 | return; | |||
| 681 | ||||
| 682 | abo = ttm_to_amdgpu_bo(bo); | |||
| 683 | if (!abo->parent) | |||
| 684 | return; | |||
| 685 | for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { | |||
| 686 | struct amdgpu_vm *vm = bo_base->vm; | |||
| 687 | ||||
| 688 | if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) | |||
| 689 | vm->bulk_moveable = false0; | |||
| 690 | } | |||
| 691 | ||||
| 692 | } | |||
| 693 | /** | |||
| 694 | * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU | |||
| 695 | * | |||
| 696 | * @adev: amdgpu device pointer | |||
| 697 | * @vm: vm providing the BOs | |||
| 698 | * | |||
| 699 | * Move all BOs to the end of LRU and remember their positions to put them | |||
| 700 | * together. | |||
| 701 | */ | |||
| 702 | void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, | |||
| 703 | struct amdgpu_vm *vm) | |||
| 704 | { | |||
| 705 | struct amdgpu_vm_bo_base *bo_base; | |||
| 706 | ||||
| 707 | if (vm->bulk_moveable) { | |||
| 708 | spin_lock(&ttm_bo_glob.lru_lock)mtx_enter(&ttm_bo_glob.lru_lock); | |||
| 709 | ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); | |||
| 710 | spin_unlock(&ttm_bo_glob.lru_lock)mtx_leave(&ttm_bo_glob.lru_lock); | |||
| 711 | return; | |||
| 712 | } | |||
| 713 | ||||
| 714 | memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move))__builtin_memset((&vm->lru_bulk_move), (0), (sizeof(vm ->lru_bulk_move))); | |||
| 715 | ||||
| 716 | spin_lock(&ttm_bo_glob.lru_lock)mtx_enter(&ttm_bo_glob.lru_lock); | |||
| 717 | list_for_each_entry(bo_base, &vm->idle, vm_status)for (bo_base = ({ const __typeof( ((__typeof(*bo_base) *)0)-> vm_status ) *__mptr = ((&vm->idle)->next); (__typeof (*bo_base) *)( (char *)__mptr - __builtin_offsetof(__typeof(* bo_base), vm_status) );}); &bo_base->vm_status != (& vm->idle); bo_base = ({ const __typeof( ((__typeof(*bo_base ) *)0)->vm_status ) *__mptr = (bo_base->vm_status.next) ; (__typeof(*bo_base) *)( (char *)__mptr - __builtin_offsetof (__typeof(*bo_base), vm_status) );})) { | |||
| 718 | struct amdgpu_bo *bo = bo_base->bo; | |||
| 719 | ||||
| 720 | if (!bo->parent) | |||
| 721 | continue; | |||
| 722 | ||||
| 723 | ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move); | |||
| 724 | if (bo->shadow) | |||
| 725 | ttm_bo_move_to_lru_tail(&bo->shadow->tbo, | |||
| 726 | &vm->lru_bulk_move); | |||
| 727 | } | |||
| 728 | spin_unlock(&ttm_bo_glob.lru_lock)mtx_leave(&ttm_bo_glob.lru_lock); | |||
| 729 | ||||
| 730 | vm->bulk_moveable = true1; | |||
| 731 | } | |||
| 732 | ||||
| 733 | /** | |||
| 734 | * amdgpu_vm_validate_pt_bos - validate the page table BOs | |||
| 735 | * | |||
| 736 | * @adev: amdgpu device pointer | |||
| 737 | * @vm: vm providing the BOs | |||
| 738 | * @validate: callback to do the validation | |||
| 739 | * @param: parameter for the validation callback | |||
| 740 | * | |||
| 741 | * Validate the page table BOs on command submission if neccessary. | |||
| 742 | * | |||
| 743 | * Returns: | |||
| 744 | * Validation result. | |||
| 745 | */ | |||
| 746 | int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 747 | int (*validate)(void *p, struct amdgpu_bo *bo), | |||
| 748 | void *param) | |||
| 749 | { | |||
| 750 | struct amdgpu_vm_bo_base *bo_base, *tmp; | |||
| 751 | int r; | |||
| 752 | ||||
| 753 | vm->bulk_moveable &= list_empty(&vm->evicted); | |||
| 754 | ||||
| 755 | list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status)for (bo_base = ({ const __typeof( ((__typeof(*bo_base) *)0)-> vm_status ) *__mptr = ((&vm->evicted)->next); (__typeof (*bo_base) *)( (char *)__mptr - __builtin_offsetof(__typeof(* bo_base), vm_status) );}), tmp = ({ const __typeof( ((__typeof (*bo_base) *)0)->vm_status ) *__mptr = (bo_base->vm_status .next); (__typeof(*bo_base) *)( (char *)__mptr - __builtin_offsetof (__typeof(*bo_base), vm_status) );}); &bo_base->vm_status != (&vm->evicted); bo_base = tmp, tmp = ({ const __typeof ( ((__typeof(*tmp) *)0)->vm_status ) *__mptr = (tmp->vm_status .next); (__typeof(*tmp) *)( (char *)__mptr - __builtin_offsetof (__typeof(*tmp), vm_status) );})) { | |||
| 756 | struct amdgpu_bo *bo = bo_base->bo; | |||
| 757 | ||||
| 758 | r = validate(param, bo); | |||
| 759 | if (r) | |||
| 760 | return r; | |||
| 761 | ||||
| 762 | if (bo->tbo.type != ttm_bo_type_kernel) { | |||
| 763 | amdgpu_vm_bo_moved(bo_base); | |||
| 764 | } else { | |||
| 765 | vm->update_funcs->map_table(bo); | |||
| 766 | amdgpu_vm_bo_relocated(bo_base); | |||
| 767 | } | |||
| 768 | } | |||
| 769 | ||||
| 770 | amdgpu_vm_eviction_lock(vm); | |||
| 771 | vm->evicting = false0; | |||
| 772 | amdgpu_vm_eviction_unlock(vm); | |||
| 773 | ||||
| 774 | return 0; | |||
| 775 | } | |||
| 776 | ||||
| 777 | /** | |||
| 778 | * amdgpu_vm_ready - check VM is ready for updates | |||
| 779 | * | |||
| 780 | * @vm: VM to check | |||
| 781 | * | |||
| 782 | * Check if all VM PDs/PTs are ready for updates | |||
| 783 | * | |||
| 784 | * Returns: | |||
| 785 | * True if eviction list is empty. | |||
| 786 | */ | |||
| 787 | bool_Bool amdgpu_vm_ready(struct amdgpu_vm *vm) | |||
| 788 | { | |||
| 789 | return list_empty(&vm->evicted); | |||
| 790 | } | |||
| 791 | ||||
| 792 | /** | |||
| 793 | * amdgpu_vm_clear_bo - initially clear the PDs/PTs | |||
| 794 | * | |||
| 795 | * @adev: amdgpu_device pointer | |||
| 796 | * @vm: VM to clear BO from | |||
| 797 | * @bo: BO to clear | |||
| 798 | * @immediate: use an immediate update | |||
| 799 | * | |||
| 800 | * Root PD needs to be reserved when calling this. | |||
| 801 | * | |||
| 802 | * Returns: | |||
| 803 | * 0 on success, errno otherwise. | |||
| 804 | */ | |||
| 805 | static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, | |||
| 806 | struct amdgpu_vm *vm, | |||
| 807 | struct amdgpu_bo *bo, | |||
| 808 | bool_Bool immediate) | |||
| 809 | { | |||
| 810 | struct ttm_operation_ctx ctx = { true1, false0 }; | |||
| 811 | unsigned level = adev->vm_manager.root_level; | |||
| 812 | struct amdgpu_vm_update_params params; | |||
| 813 | struct amdgpu_bo *ancestor = bo; | |||
| 814 | unsigned entries, ats_entries; | |||
| 815 | uint64_t addr; | |||
| 816 | int r; | |||
| 817 | ||||
| 818 | /* Figure out our place in the hierarchy */ | |||
| 819 | if (ancestor->parent) { | |||
| 820 | ++level; | |||
| 821 | while (ancestor->parent->parent) { | |||
| 822 | ++level; | |||
| 823 | ancestor = ancestor->parent; | |||
| 824 | } | |||
| 825 | } | |||
| 826 | ||||
| 827 | entries = amdgpu_bo_size(bo) / 8; | |||
| 828 | if (!vm->pte_support_ats) { | |||
| 829 | ats_entries = 0; | |||
| 830 | ||||
| 831 | } else if (!bo->parent) { | |||
| 832 | ats_entries = amdgpu_vm_num_ats_entries(adev); | |||
| 833 | ats_entries = min(ats_entries, entries)(((ats_entries)<(entries))?(ats_entries):(entries)); | |||
| 834 | entries -= ats_entries; | |||
| 835 | ||||
| 836 | } else { | |||
| 837 | struct amdgpu_vm_pt *pt; | |||
| 838 | ||||
| 839 | pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base)({ const __typeof( ((struct amdgpu_vm_pt *)0)->base ) *__mptr = (ancestor->vm_bo); (struct amdgpu_vm_pt *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_vm_pt, base) );}); | |||
| 840 | ats_entries = amdgpu_vm_num_ats_entries(adev); | |||
| 841 | if ((pt - vm->root.entries) >= ats_entries) { | |||
| 842 | ats_entries = 0; | |||
| 843 | } else { | |||
| 844 | ats_entries = entries; | |||
| 845 | entries = 0; | |||
| 846 | } | |||
| 847 | } | |||
| 848 | ||||
| 849 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); | |||
| 850 | if (r) | |||
| 851 | return r; | |||
| 852 | ||||
| 853 | if (bo->shadow) { | |||
| 854 | r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement, | |||
| 855 | &ctx); | |||
| 856 | if (r) | |||
| 857 | return r; | |||
| 858 | } | |||
| 859 | ||||
| 860 | r = vm->update_funcs->map_table(bo); | |||
| 861 | if (r) | |||
| 862 | return r; | |||
| 863 | ||||
| 864 | memset(¶ms, 0, sizeof(params))__builtin_memset((¶ms), (0), (sizeof(params))); | |||
| 865 | params.adev = adev; | |||
| 866 | params.vm = vm; | |||
| 867 | params.immediate = immediate; | |||
| 868 | ||||
| 869 | r = vm->update_funcs->prepare(¶ms, NULL((void *)0), AMDGPU_SYNC_EXPLICIT); | |||
| 870 | if (r) | |||
| 871 | return r; | |||
| 872 | ||||
| 873 | addr = 0; | |||
| 874 | if (ats_entries) { | |||
| 875 | uint64_t value = 0, flags; | |||
| 876 | ||||
| 877 | flags = AMDGPU_PTE_DEFAULT_ATC((1ULL << 1) | (1ULL << 2) | (1ULL << 4) | ( 1ULL << 5) | (1ULL << 6) | ((uint64_t)(2) << 57)); | |||
| 878 | if (level != AMDGPU_VM_PTB) { | |||
| 879 | /* Handle leaf PDEs as PTEs */ | |||
| 880 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
| 881 | amdgpu_gmc_get_vm_pde(adev, level, &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | |||
| 882 | } | |||
| 883 | ||||
| 884 | r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries, | |||
| 885 | value, flags); | |||
| 886 | if (r) | |||
| 887 | return r; | |||
| 888 | ||||
| 889 | addr += ats_entries * 8; | |||
| 890 | } | |||
| 891 | ||||
| 892 | if (entries) { | |||
| 893 | uint64_t value = 0, flags = 0; | |||
| 894 | ||||
| 895 | if (adev->asic_type >= CHIP_VEGA10) { | |||
| 896 | if (level != AMDGPU_VM_PTB) { | |||
| 897 | /* Handle leaf PDEs as PTEs */ | |||
| 898 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
| 899 | amdgpu_gmc_get_vm_pde(adev, level,(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)) | |||
| 900 | &value, &flags)(adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (& value), (&flags)); | |||
| 901 | } else { | |||
| 902 | /* Workaround for fault priority problem on GMC9 */ | |||
| 903 | flags = AMDGPU_PTE_EXECUTABLE(1ULL << 4); | |||
| 904 | } | |||
| 905 | } | |||
| 906 | ||||
| 907 | r = vm->update_funcs->update(¶ms, bo, addr, 0, entries, | |||
| 908 | value, flags); | |||
| 909 | if (r) | |||
| 910 | return r; | |||
| 911 | } | |||
| 912 | ||||
| 913 | return vm->update_funcs->commit(¶ms, NULL((void *)0)); | |||
| 914 | } | |||
| 915 | ||||
| 916 | /** | |||
| 917 | * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation | |||
| 918 | * | |||
| 919 | * @adev: amdgpu_device pointer | |||
| 920 | * @vm: requesting vm | |||
| 921 | * @level: the page table level | |||
| 922 | * @immediate: use a immediate update | |||
| 923 | * @bp: resulting BO allocation parameters | |||
| 924 | */ | |||
| 925 | static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 926 | int level, bool_Bool immediate, | |||
| 927 | struct amdgpu_bo_param *bp) | |||
| 928 | { | |||
| 929 | memset(bp, 0, sizeof(*bp))__builtin_memset((bp), (0), (sizeof(*bp))); | |||
| 930 | ||||
| 931 | bp->size = amdgpu_vm_bo_size(adev, level); | |||
| 932 | bp->byte_align = AMDGPU_GPU_PAGE_SIZE4096; | |||
| 933 | bp->domain = AMDGPU_GEM_DOMAIN_VRAM0x4; | |||
| 934 | bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); | |||
| 935 | bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS(1 << 5) | | |||
| 936 | AMDGPU_GEM_CREATE_CPU_GTT_USWC(1 << 2); | |||
| 937 | if (vm->use_cpu_for_update) | |||
| 938 | bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED(1 << 0); | |||
| 939 | else if (!vm->root.base.bo || vm->root.base.bo->shadow) | |||
| 940 | bp->flags |= AMDGPU_GEM_CREATE_SHADOW(1 << 4); | |||
| 941 | bp->type = ttm_bo_type_kernel; | |||
| 942 | bp->no_wait_gpu = immediate; | |||
| 943 | if (vm->root.base.bo) | |||
| 944 | bp->resv = vm->root.base.bo->tbo.base.resv; | |||
| 945 | } | |||
| 946 | ||||
| 947 | /** | |||
| 948 | * amdgpu_vm_alloc_pts - Allocate a specific page table | |||
| 949 | * | |||
| 950 | * @adev: amdgpu_device pointer | |||
| 951 | * @vm: VM to allocate page tables for | |||
| 952 | * @cursor: Which page table to allocate | |||
| 953 | * @immediate: use an immediate update | |||
| 954 | * | |||
| 955 | * Make sure a specific page table or directory is allocated. | |||
| 956 | * | |||
| 957 | * Returns: | |||
| 958 | * 1 if page table needed to be allocated, 0 if page table was already | |||
| 959 | * allocated, negative errno if an error occurred. | |||
| 960 | */ | |||
| 961 | static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, | |||
| 962 | struct amdgpu_vm *vm, | |||
| 963 | struct amdgpu_vm_pt_cursor *cursor, | |||
| 964 | bool_Bool immediate) | |||
| 965 | { | |||
| 966 | struct amdgpu_vm_pt *entry = cursor->entry; | |||
| 967 | struct amdgpu_bo_param bp; | |||
| 968 | struct amdgpu_bo *pt; | |||
| 969 | int r; | |||
| 970 | ||||
| 971 | if (cursor->level < AMDGPU_VM_PTB && !entry->entries) { | |||
| 972 | unsigned num_entries; | |||
| 973 | ||||
| 974 | num_entries = amdgpu_vm_num_entries(adev, cursor->level); | |||
| 975 | entry->entries = kvmalloc_array(num_entries, | |||
| 976 | sizeof(*entry->entries), | |||
| 977 | GFP_KERNEL(0x0001 | 0x0004) | __GFP_ZERO0x0008); | |||
| 978 | if (!entry->entries) | |||
| 979 | return -ENOMEM12; | |||
| 980 | } | |||
| 981 | ||||
| 982 | if (entry->base.bo) | |||
| 983 | return 0; | |||
| 984 | ||||
| 985 | amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); | |||
| 986 | ||||
| 987 | r = amdgpu_bo_create(adev, &bp, &pt); | |||
| 988 | if (r) | |||
| 989 | return r; | |||
| 990 | ||||
| 991 | /* Keep a reference to the root directory to avoid | |||
| 992 | * freeing them up in the wrong order. | |||
| 993 | */ | |||
| 994 | pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); | |||
| 995 | amdgpu_vm_bo_base_init(&entry->base, vm, pt); | |||
| 996 | ||||
| 997 | r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); | |||
| 998 | if (r) | |||
| 999 | goto error_free_pt; | |||
| 1000 | ||||
| 1001 | return 0; | |||
| 1002 | ||||
| 1003 | error_free_pt: | |||
| 1004 | amdgpu_bo_unref(&pt->shadow); | |||
| 1005 | amdgpu_bo_unref(&pt); | |||
| 1006 | return r; | |||
| 1007 | } | |||
| 1008 | ||||
| 1009 | /** | |||
| 1010 | * amdgpu_vm_free_table - fre one PD/PT | |||
| 1011 | * | |||
| 1012 | * @entry: PDE to free | |||
| 1013 | */ | |||
| 1014 | static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry) | |||
| 1015 | { | |||
| 1016 | if (entry->base.bo) { | |||
| 1017 | entry->base.bo->vm_bo = NULL((void *)0); | |||
| 1018 | list_del(&entry->base.vm_status); | |||
| 1019 | amdgpu_bo_unref(&entry->base.bo->shadow); | |||
| 1020 | amdgpu_bo_unref(&entry->base.bo); | |||
| 1021 | } | |||
| 1022 | kvfree(entry->entries); | |||
| 1023 | entry->entries = NULL((void *)0); | |||
| 1024 | } | |||
| 1025 | ||||
| 1026 | /** | |||
| 1027 | * amdgpu_vm_free_pts - free PD/PT levels | |||
| 1028 | * | |||
| 1029 | * @adev: amdgpu device structure | |||
| 1030 | * @vm: amdgpu vm structure | |||
| 1031 | * @start: optional cursor where to start freeing PDs/PTs | |||
| 1032 | * | |||
| 1033 | * Free the page directory or page table level and all sub levels. | |||
| 1034 | */ | |||
| 1035 | static void amdgpu_vm_free_pts(struct amdgpu_device *adev, | |||
| 1036 | struct amdgpu_vm *vm, | |||
| 1037 | struct amdgpu_vm_pt_cursor *start) | |||
| 1038 | { | |||
| 1039 | struct amdgpu_vm_pt_cursor cursor; | |||
| 1040 | struct amdgpu_vm_pt *entry; | |||
| 1041 | ||||
| 1042 | vm->bulk_moveable = false0; | |||
| 1043 | ||||
| 1044 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor )), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), & (cursor)); amdgpu_vm_pt_continue_dfs((start), (entry)); (entry ) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor ))) | |||
| 1045 | amdgpu_vm_free_table(entry); | |||
| 1046 | ||||
| 1047 | if (start) | |||
| 1048 | amdgpu_vm_free_table(start->entry); | |||
| 1049 | } | |||
| 1050 | ||||
| 1051 | /** | |||
| 1052 | * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug | |||
| 1053 | * | |||
| 1054 | * @adev: amdgpu_device pointer | |||
| 1055 | */ | |||
| 1056 | void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) | |||
| 1057 | { | |||
| 1058 | const struct amdgpu_ip_block *ip_block; | |||
| 1059 | bool_Bool has_compute_vm_bug; | |||
| 1060 | struct amdgpu_ring *ring; | |||
| 1061 | int i; | |||
| 1062 | ||||
| 1063 | has_compute_vm_bug = false0; | |||
| 1064 | ||||
| 1065 | ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); | |||
| 1066 | if (ip_block) { | |||
| 1067 | /* Compute has a VM bug for GFX version < 7. | |||
| 1068 | Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ | |||
| 1069 | if (ip_block->version->major <= 7) | |||
| 1070 | has_compute_vm_bug = true1; | |||
| 1071 | else if (ip_block->version->major == 8) | |||
| 1072 | if (adev->gfx.mec_fw_version < 673) | |||
| 1073 | has_compute_vm_bug = true1; | |||
| 1074 | } | |||
| 1075 | ||||
| 1076 | for (i = 0; i < adev->num_rings; i++) { | |||
| 1077 | ring = adev->rings[i]; | |||
| 1078 | if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) | |||
| 1079 | /* only compute rings */ | |||
| 1080 | ring->has_compute_vm_bug = has_compute_vm_bug; | |||
| 1081 | else | |||
| 1082 | ring->has_compute_vm_bug = false0; | |||
| 1083 | } | |||
| 1084 | } | |||
| 1085 | ||||
| 1086 | /** | |||
| 1087 | * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. | |||
| 1088 | * | |||
| 1089 | * @ring: ring on which the job will be submitted | |||
| 1090 | * @job: job to submit | |||
| 1091 | * | |||
| 1092 | * Returns: | |||
| 1093 | * True if sync is needed. | |||
| 1094 | */ | |||
| 1095 | bool_Bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, | |||
| 1096 | struct amdgpu_job *job) | |||
| 1097 | { | |||
| 1098 | struct amdgpu_device *adev = ring->adev; | |||
| 1099 | unsigned vmhub = ring->funcs->vmhub; | |||
| 1100 | struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; | |||
| 1101 | struct amdgpu_vmid *id; | |||
| 1102 | bool_Bool gds_switch_needed; | |||
| 1103 | bool_Bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; | |||
| 1104 | ||||
| 1105 | if (job->vmid == 0) | |||
| 1106 | return false0; | |||
| 1107 | id = &id_mgr->ids[job->vmid]; | |||
| 1108 | gds_switch_needed = ring->funcs->emit_gds_switch && ( | |||
| 1109 | id->gds_base != job->gds_base || | |||
| 1110 | id->gds_size != job->gds_size || | |||
| 1111 | id->gws_base != job->gws_base || | |||
| 1112 | id->gws_size != job->gws_size || | |||
| 1113 | id->oa_base != job->oa_base || | |||
| 1114 | id->oa_size != job->oa_size); | |||
| 1115 | ||||
| 1116 | if (amdgpu_vmid_had_gpu_reset(adev, id)) | |||
| 1117 | return true1; | |||
| 1118 | ||||
| 1119 | return vm_flush_needed || gds_switch_needed; | |||
| 1120 | } | |||
| 1121 | ||||
| 1122 | /** | |||
| 1123 | * amdgpu_vm_flush - hardware flush the vm | |||
| 1124 | * | |||
| 1125 | * @ring: ring to use for flush | |||
| 1126 | * @job: related job | |||
| 1127 | * @need_pipe_sync: is pipe sync needed | |||
| 1128 | * | |||
| 1129 | * Emit a VM flush when it is necessary. | |||
| 1130 | * | |||
| 1131 | * Returns: | |||
| 1132 | * 0 on success, errno otherwise. | |||
| 1133 | */ | |||
| 1134 | int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, | |||
| 1135 | bool_Bool need_pipe_sync) | |||
| 1136 | { | |||
| 1137 | struct amdgpu_device *adev = ring->adev; | |||
| 1138 | unsigned vmhub = ring->funcs->vmhub; | |||
| 1139 | struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; | |||
| 1140 | struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; | |||
| 1141 | bool_Bool gds_switch_needed = ring->funcs->emit_gds_switch && ( | |||
| 1142 | id->gds_base != job->gds_base || | |||
| 1143 | id->gds_size != job->gds_size || | |||
| 1144 | id->gws_base != job->gws_base || | |||
| 1145 | id->gws_size != job->gws_size || | |||
| 1146 | id->oa_base != job->oa_base || | |||
| 1147 | id->oa_size != job->oa_size); | |||
| 1148 | bool_Bool vm_flush_needed = job->vm_needs_flush; | |||
| 1149 | struct dma_fence *fence = NULL((void *)0); | |||
| 1150 | bool_Bool pasid_mapping_needed = false0; | |||
| 1151 | unsigned patch_offset = 0; | |||
| 1152 | bool_Bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL((void *)0))); | |||
| 1153 | int r; | |||
| 1154 | ||||
| 1155 | if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) | |||
| 1156 | adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); | |||
| 1157 | ||||
| 1158 | if (amdgpu_vmid_had_gpu_reset(adev, id)) { | |||
| 1159 | gds_switch_needed = true1; | |||
| 1160 | vm_flush_needed = true1; | |||
| 1161 | pasid_mapping_needed = true1; | |||
| 1162 | } | |||
| 1163 | ||||
| 1164 | mutex_lock(&id_mgr->lock)rw_enter_write(&id_mgr->lock); | |||
| 1165 | if (id->pasid != job->pasid || !id->pasid_mapping || | |||
| 1166 | !dma_fence_is_signaled(id->pasid_mapping)) | |||
| 1167 | pasid_mapping_needed = true1; | |||
| 1168 | mutex_unlock(&id_mgr->lock)rw_exit_write(&id_mgr->lock); | |||
| 1169 | ||||
| 1170 | gds_switch_needed &= !!ring->funcs->emit_gds_switch; | |||
| 1171 | vm_flush_needed &= !!ring->funcs->emit_vm_flush && | |||
| 1172 | job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET0x7fffffffffffffffL; | |||
| 1173 | pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && | |||
| 1174 | ring->funcs->emit_wreg; | |||
| 1175 | ||||
| 1176 | if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) | |||
| 1177 | return 0; | |||
| 1178 | ||||
| 1179 | if (ring->funcs->init_cond_exec) | |||
| 1180 | patch_offset = amdgpu_ring_init_cond_exec(ring)(ring)->funcs->init_cond_exec((ring)); | |||
| 1181 | ||||
| 1182 | if (need_pipe_sync) | |||
| 1183 | amdgpu_ring_emit_pipeline_sync(ring)(ring)->funcs->emit_pipeline_sync((ring)); | |||
| 1184 | ||||
| 1185 | if (vm_flush_needed) { | |||
| 1186 | trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); | |||
| 1187 | amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr)(ring)->funcs->emit_vm_flush((ring), (job->vmid), (job ->vm_pd_addr)); | |||
| 1188 | } | |||
| 1189 | ||||
| 1190 | if (pasid_mapping_needed) | |||
| 1191 | amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid)(ring)->adev->gmc.gmc_funcs->emit_pasid_mapping((ring ), (job->vmid), (job->pasid)); | |||
| 1192 | ||||
| 1193 | if (vm_flush_needed || pasid_mapping_needed) { | |||
| 1194 | r = amdgpu_fence_emit(ring, &fence, 0); | |||
| 1195 | if (r) | |||
| 1196 | return r; | |||
| 1197 | } | |||
| 1198 | ||||
| 1199 | if (vm_flush_needed) { | |||
| 1200 | mutex_lock(&id_mgr->lock)rw_enter_write(&id_mgr->lock); | |||
| 1201 | dma_fence_put(id->last_flush); | |||
| 1202 | id->last_flush = dma_fence_get(fence); | |||
| 1203 | id->current_gpu_reset_count = | |||
| 1204 | atomic_read(&adev->gpu_reset_counter)({ typeof(*(&adev->gpu_reset_counter)) __tmp = *(volatile typeof(*(&adev->gpu_reset_counter)) *)&(*(&adev ->gpu_reset_counter)); membar_datadep_consumer(); __tmp; } ); | |||
| 1205 | mutex_unlock(&id_mgr->lock)rw_exit_write(&id_mgr->lock); | |||
| 1206 | } | |||
| 1207 | ||||
| 1208 | if (pasid_mapping_needed) { | |||
| 1209 | mutex_lock(&id_mgr->lock)rw_enter_write(&id_mgr->lock); | |||
| 1210 | id->pasid = job->pasid; | |||
| 1211 | dma_fence_put(id->pasid_mapping); | |||
| 1212 | id->pasid_mapping = dma_fence_get(fence); | |||
| 1213 | mutex_unlock(&id_mgr->lock)rw_exit_write(&id_mgr->lock); | |||
| 1214 | } | |||
| 1215 | dma_fence_put(fence); | |||
| 1216 | ||||
| 1217 | if (ring->funcs->emit_gds_switch && gds_switch_needed) { | |||
| 1218 | id->gds_base = job->gds_base; | |||
| 1219 | id->gds_size = job->gds_size; | |||
| 1220 | id->gws_base = job->gws_base; | |||
| 1221 | id->gws_size = job->gws_size; | |||
| 1222 | id->oa_base = job->oa_base; | |||
| 1223 | id->oa_size = job->oa_size; | |||
| 1224 | amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,(ring)->funcs->emit_gds_switch((ring), (job->vmid), ( job->gds_base), (job->gds_size), (job->gws_base), (job ->gws_size), (job->oa_base), (job->oa_size)) | |||
| 1225 | job->gds_size, job->gws_base,(ring)->funcs->emit_gds_switch((ring), (job->vmid), ( job->gds_base), (job->gds_size), (job->gws_base), (job ->gws_size), (job->oa_base), (job->oa_size)) | |||
| 1226 | job->gws_size, job->oa_base,(ring)->funcs->emit_gds_switch((ring), (job->vmid), ( job->gds_base), (job->gds_size), (job->gws_base), (job ->gws_size), (job->oa_base), (job->oa_size)) | |||
| 1227 | job->oa_size)(ring)->funcs->emit_gds_switch((ring), (job->vmid), ( job->gds_base), (job->gds_size), (job->gws_base), (job ->gws_size), (job->oa_base), (job->oa_size)); | |||
| 1228 | } | |||
| 1229 | ||||
| 1230 | if (ring->funcs->patch_cond_exec) | |||
| 1231 | amdgpu_ring_patch_cond_exec(ring, patch_offset)(ring)->funcs->patch_cond_exec((ring),(patch_offset)); | |||
| 1232 | ||||
| 1233 | /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ | |||
| 1234 | if (ring->funcs->emit_switch_buffer) { | |||
| 1235 | amdgpu_ring_emit_switch_buffer(ring)(ring)->funcs->emit_switch_buffer((ring)); | |||
| 1236 | amdgpu_ring_emit_switch_buffer(ring)(ring)->funcs->emit_switch_buffer((ring)); | |||
| 1237 | } | |||
| 1238 | return 0; | |||
| 1239 | } | |||
| 1240 | ||||
| 1241 | /** | |||
| 1242 | * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo | |||
| 1243 | * | |||
| 1244 | * @vm: requested vm | |||
| 1245 | * @bo: requested buffer object | |||
| 1246 | * | |||
| 1247 | * Find @bo inside the requested vm. | |||
| 1248 | * Search inside the @bos vm list for the requested vm | |||
| 1249 | * Returns the found bo_va or NULL if none is found | |||
| 1250 | * | |||
| 1251 | * Object has to be reserved! | |||
| 1252 | * | |||
| 1253 | * Returns: | |||
| 1254 | * Found bo_va or NULL. | |||
| 1255 | */ | |||
| 1256 | struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, | |||
| 1257 | struct amdgpu_bo *bo) | |||
| 1258 | { | |||
| 1259 | struct amdgpu_vm_bo_base *base; | |||
| 1260 | ||||
| 1261 | for (base = bo->vm_bo; base; base = base->next) { | |||
| 1262 | if (base->vm != vm) | |||
| 1263 | continue; | |||
| 1264 | ||||
| 1265 | return container_of(base, struct amdgpu_bo_va, base)({ const __typeof( ((struct amdgpu_bo_va *)0)->base ) *__mptr = (base); (struct amdgpu_bo_va *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo_va, base) );}); | |||
| 1266 | } | |||
| 1267 | return NULL((void *)0); | |||
| 1268 | } | |||
| 1269 | ||||
| 1270 | /** | |||
| 1271 | * amdgpu_vm_map_gart - Resolve gart mapping of addr | |||
| 1272 | * | |||
| 1273 | * @pages_addr: optional DMA address to use for lookup | |||
| 1274 | * @addr: the unmapped addr | |||
| 1275 | * | |||
| 1276 | * Look up the physical address of the page that the pte resolves | |||
| 1277 | * to. | |||
| 1278 | * | |||
| 1279 | * Returns: | |||
| 1280 | * The pointer for the page table entry. | |||
| 1281 | */ | |||
| 1282 | uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) | |||
| 1283 | { | |||
| 1284 | uint64_t result; | |||
| 1285 | ||||
| 1286 | /* page table offset */ | |||
| 1287 | result = pages_addr[addr >> PAGE_SHIFT12]; | |||
| 1288 | ||||
| 1289 | /* in case cpu page size != gpu page size*/ | |||
| 1290 | result |= addr & (~LINUX_PAGE_MASK(~((1 << 12) - 1))); | |||
| 1291 | ||||
| 1292 | result &= 0xFFFFFFFFFFFFF000ULL; | |||
| 1293 | ||||
| 1294 | return result; | |||
| 1295 | } | |||
| 1296 | ||||
| 1297 | /** | |||
| 1298 | * amdgpu_vm_update_pde - update a single level in the hierarchy | |||
| 1299 | * | |||
| 1300 | * @params: parameters for the update | |||
| 1301 | * @vm: requested vm | |||
| 1302 | * @entry: entry to update | |||
| 1303 | * | |||
| 1304 | * Makes sure the requested entry in parent is up to date. | |||
| 1305 | */ | |||
| 1306 | static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, | |||
| 1307 | struct amdgpu_vm *vm, | |||
| 1308 | struct amdgpu_vm_pt *entry) | |||
| 1309 | { | |||
| 1310 | struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry); | |||
| 1311 | struct amdgpu_bo *bo = parent->base.bo, *pbo; | |||
| 1312 | uint64_t pde, pt, flags; | |||
| 1313 | unsigned level; | |||
| 1314 | ||||
| 1315 | for (level = 0, pbo = bo->parent; pbo; ++level) | |||
| 1316 | pbo = pbo->parent; | |||
| 1317 | ||||
| 1318 | level += params->adev->vm_manager.root_level; | |||
| 1319 | amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); | |||
| 1320 | pde = (entry - parent->entries) * 8; | |||
| 1321 | return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); | |||
| 1322 | } | |||
| 1323 | ||||
| 1324 | /** | |||
| 1325 | * amdgpu_vm_invalidate_pds - mark all PDs as invalid | |||
| 1326 | * | |||
| 1327 | * @adev: amdgpu_device pointer | |||
| 1328 | * @vm: related vm | |||
| 1329 | * | |||
| 1330 | * Mark all PD level as invalid after an error. | |||
| 1331 | */ | |||
| 1332 | static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, | |||
| 1333 | struct amdgpu_vm *vm) | |||
| 1334 | { | |||
| 1335 | struct amdgpu_vm_pt_cursor cursor; | |||
| 1336 | struct amdgpu_vm_pt *entry; | |||
| 1337 | ||||
| 1338 | for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)for (amdgpu_vm_pt_first_dfs((adev), (vm), (((void *)0)), & (cursor)), (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev ), &(cursor)); amdgpu_vm_pt_continue_dfs((((void *)0)), ( entry)); (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev ), &(cursor))) | |||
| 1339 | if (entry->base.bo && !entry->base.moved) | |||
| 1340 | amdgpu_vm_bo_relocated(&entry->base); | |||
| 1341 | } | |||
| 1342 | ||||
| 1343 | /** | |||
| 1344 | * amdgpu_vm_update_pdes - make sure that all directories are valid | |||
| 1345 | * | |||
| 1346 | * @adev: amdgpu_device pointer | |||
| 1347 | * @vm: requested vm | |||
| 1348 | * @immediate: submit immediately to the paging queue | |||
| 1349 | * | |||
| 1350 | * Makes sure all directories are up to date. | |||
| 1351 | * | |||
| 1352 | * Returns: | |||
| 1353 | * 0 for success, error for failure. | |||
| 1354 | */ | |||
| 1355 | int amdgpu_vm_update_pdes(struct amdgpu_device *adev, | |||
| 1356 | struct amdgpu_vm *vm, bool_Bool immediate) | |||
| 1357 | { | |||
| 1358 | struct amdgpu_vm_update_params params; | |||
| 1359 | int r; | |||
| 1360 | ||||
| 1361 | if (list_empty(&vm->relocated)) | |||
| 1362 | return 0; | |||
| 1363 | ||||
| 1364 | memset(¶ms, 0, sizeof(params))__builtin_memset((¶ms), (0), (sizeof(params))); | |||
| 1365 | params.adev = adev; | |||
| 1366 | params.vm = vm; | |||
| 1367 | params.immediate = immediate; | |||
| 1368 | ||||
| 1369 | r = vm->update_funcs->prepare(¶ms, NULL((void *)0), AMDGPU_SYNC_EXPLICIT); | |||
| 1370 | if (r) | |||
| 1371 | return r; | |||
| 1372 | ||||
| 1373 | while (!list_empty(&vm->relocated)) { | |||
| 1374 | struct amdgpu_vm_pt *entry; | |||
| 1375 | ||||
| 1376 | entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt,({ const __typeof( ((struct amdgpu_vm_pt *)0)->base.vm_status ) *__mptr = ((&vm->relocated)->next); (struct amdgpu_vm_pt *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_vm_pt, base.vm_status) );}) | |||
| 1377 | base.vm_status)({ const __typeof( ((struct amdgpu_vm_pt *)0)->base.vm_status ) *__mptr = ((&vm->relocated)->next); (struct amdgpu_vm_pt *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_vm_pt, base.vm_status) );}); | |||
| 1378 | amdgpu_vm_bo_idle(&entry->base); | |||
| 1379 | ||||
| 1380 | r = amdgpu_vm_update_pde(¶ms, vm, entry); | |||
| 1381 | if (r) | |||
| 1382 | goto error; | |||
| 1383 | } | |||
| 1384 | ||||
| 1385 | r = vm->update_funcs->commit(¶ms, &vm->last_update); | |||
| 1386 | if (r) | |||
| 1387 | goto error; | |||
| 1388 | return 0; | |||
| 1389 | ||||
| 1390 | error: | |||
| 1391 | amdgpu_vm_invalidate_pds(adev, vm); | |||
| 1392 | return r; | |||
| 1393 | } | |||
| 1394 | ||||
| 1395 | /* | |||
| 1396 | * amdgpu_vm_update_flags - figure out flags for PTE updates | |||
| 1397 | * | |||
| 1398 | * Make sure to set the right flags for the PTEs at the desired level. | |||
| 1399 | */ | |||
| 1400 | static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, | |||
| 1401 | struct amdgpu_bo *bo, unsigned level, | |||
| 1402 | uint64_t pe, uint64_t addr, | |||
| 1403 | unsigned count, uint32_t incr, | |||
| 1404 | uint64_t flags) | |||
| 1405 | ||||
| 1406 | { | |||
| 1407 | if (level != AMDGPU_VM_PTB) { | |||
| 1408 | flags |= AMDGPU_PDE_PTE(1ULL << 54); | |||
| 1409 | amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags)(params->adev)->gmc.gmc_funcs->get_vm_pde((params-> adev), (level), (&addr), (&flags)); | |||
| 1410 | ||||
| 1411 | } else if (params->adev->asic_type >= CHIP_VEGA10 && | |||
| 1412 | !(flags & AMDGPU_PTE_VALID(1ULL << 0)) && | |||
| 1413 | !(flags & AMDGPU_PTE_PRT(1ULL << 51))) { | |||
| 1414 | ||||
| 1415 | /* Workaround for fault priority problem on GMC9 */ | |||
| 1416 | flags |= AMDGPU_PTE_EXECUTABLE(1ULL << 4); | |||
| 1417 | } | |||
| 1418 | ||||
| 1419 | params->vm->update_funcs->update(params, bo, pe, addr, count, incr, | |||
| 1420 | flags); | |||
| 1421 | } | |||
| 1422 | ||||
| 1423 | /** | |||
| 1424 | * amdgpu_vm_fragment - get fragment for PTEs | |||
| 1425 | * | |||
| 1426 | * @params: see amdgpu_vm_update_params definition | |||
| 1427 | * @start: first PTE to handle | |||
| 1428 | * @end: last PTE to handle | |||
| 1429 | * @flags: hw mapping flags | |||
| 1430 | * @frag: resulting fragment size | |||
| 1431 | * @frag_end: end of this fragment | |||
| 1432 | * | |||
| 1433 | * Returns the first possible fragment for the start and end address. | |||
| 1434 | */ | |||
| 1435 | static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, | |||
| 1436 | uint64_t start, uint64_t end, uint64_t flags, | |||
| 1437 | unsigned int *frag, uint64_t *frag_end) | |||
| 1438 | { | |||
| 1439 | /** | |||
| 1440 | * The MC L1 TLB supports variable sized pages, based on a fragment | |||
| 1441 | * field in the PTE. When this field is set to a non-zero value, page | |||
| 1442 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE | |||
| 1443 | * flags are considered valid for all PTEs within the fragment range | |||
| 1444 | * and corresponding mappings are assumed to be physically contiguous. | |||
| 1445 | * | |||
| 1446 | * The L1 TLB can store a single PTE for the whole fragment, | |||
| 1447 | * significantly increasing the space available for translation | |||
| 1448 | * caching. This leads to large improvements in throughput when the | |||
| 1449 | * TLB is under pressure. | |||
| 1450 | * | |||
| 1451 | * The L2 TLB distributes small and large fragments into two | |||
| 1452 | * asymmetric partitions. The large fragment cache is significantly | |||
| 1453 | * larger. Thus, we try to use large fragments wherever possible. | |||
| 1454 | * Userspace can support this by aligning virtual base address and | |||
| 1455 | * allocation size to the fragment size. | |||
| 1456 | * | |||
| 1457 | * Starting with Vega10 the fragment size only controls the L1. The L2 | |||
| 1458 | * is now directly feed with small/huge/giant pages from the walker. | |||
| 1459 | */ | |||
| 1460 | unsigned max_frag; | |||
| 1461 | ||||
| 1462 | if (params->adev->asic_type < CHIP_VEGA10) | |||
| 1463 | max_frag = params->adev->vm_manager.fragment_size; | |||
| 1464 | else | |||
| 1465 | max_frag = 31; | |||
| 1466 | ||||
| 1467 | /* system pages are non continuously */ | |||
| 1468 | if (params->pages_addr) { | |||
| 1469 | *frag = 0; | |||
| 1470 | *frag_end = end; | |||
| 1471 | return; | |||
| 1472 | } | |||
| 1473 | ||||
| 1474 | /* This intentionally wraps around if no bit is set */ | |||
| 1475 | *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1)((((unsigned)ffs(start) - 1)<((unsigned)fls64(end - start) - 1))?((unsigned)ffs(start) - 1):((unsigned)fls64(end - start ) - 1)); | |||
| 1476 | if (*frag >= max_frag) { | |||
| 1477 | *frag = max_frag; | |||
| 1478 | *frag_end = end & ~((1ULL << max_frag) - 1); | |||
| 1479 | } else { | |||
| 1480 | *frag_end = start + (1 << *frag); | |||
| 1481 | } | |||
| 1482 | } | |||
| 1483 | ||||
| 1484 | /** | |||
| 1485 | * amdgpu_vm_update_ptes - make sure that page tables are valid | |||
| 1486 | * | |||
| 1487 | * @params: see amdgpu_vm_update_params definition | |||
| 1488 | * @start: start of GPU address range | |||
| 1489 | * @end: end of GPU address range | |||
| 1490 | * @dst: destination address to map to, the next dst inside the function | |||
| 1491 | * @flags: mapping flags | |||
| 1492 | * | |||
| 1493 | * Update the page tables in the range @start - @end. | |||
| 1494 | * | |||
| 1495 | * Returns: | |||
| 1496 | * 0 for success, -EINVAL for failure. | |||
| 1497 | */ | |||
| 1498 | static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, | |||
| 1499 | uint64_t start, uint64_t end, | |||
| 1500 | uint64_t dst, uint64_t flags) | |||
| 1501 | { | |||
| 1502 | struct amdgpu_device *adev = params->adev; | |||
| 1503 | struct amdgpu_vm_pt_cursor cursor; | |||
| 1504 | uint64_t frag_start = start, frag_end; | |||
| 1505 | unsigned int frag; | |||
| 1506 | int r; | |||
| 1507 | ||||
| 1508 | /* figure out the initial fragment */ | |||
| 1509 | amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); | |||
| 1510 | ||||
| 1511 | /* walk over the address space and update the PTs */ | |||
| 1512 | amdgpu_vm_pt_start(adev, params->vm, start, &cursor); | |||
| 1513 | while (cursor.pfn < end) { | |||
| 1514 | unsigned shift, parent_shift, mask; | |||
| 1515 | uint64_t incr, entry_end, pe_start; | |||
| 1516 | struct amdgpu_bo *pt; | |||
| 1517 | ||||
| 1518 | if (!params->unlocked) { | |||
| 1519 | /* make sure that the page tables covering the | |||
| 1520 | * address range are actually allocated | |||
| 1521 | */ | |||
| 1522 | r = amdgpu_vm_alloc_pts(params->adev, params->vm, | |||
| 1523 | &cursor, params->immediate); | |||
| 1524 | if (r) | |||
| 1525 | return r; | |||
| 1526 | } | |||
| 1527 | ||||
| 1528 | shift = amdgpu_vm_level_shift(adev, cursor.level); | |||
| 1529 | parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); | |||
| 1530 | if (params->unlocked) { | |||
| 1531 | /* Unlocked updates are only allowed on the leaves */ | |||
| 1532 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |||
| 1533 | continue; | |||
| 1534 | } else if (adev->asic_type < CHIP_VEGA10 && | |||
| 1535 | (flags & AMDGPU_PTE_VALID(1ULL << 0))) { | |||
| 1536 | /* No huge page support before GMC v9 */ | |||
| 1537 | if (cursor.level != AMDGPU_VM_PTB) { | |||
| 1538 | if (!amdgpu_vm_pt_descendant(adev, &cursor)) | |||
| 1539 | return -ENOENT2; | |||
| 1540 | continue; | |||
| 1541 | } | |||
| 1542 | } else if (frag < shift) { | |||
| 1543 | /* We can't use this level when the fragment size is | |||
| 1544 | * smaller than the address shift. Go to the next | |||
| 1545 | * child entry and try again. | |||
| 1546 | */ | |||
| 1547 | if (amdgpu_vm_pt_descendant(adev, &cursor)) | |||
| 1548 | continue; | |||
| 1549 | } else if (frag >= parent_shift) { | |||
| 1550 | /* If the fragment size is even larger than the parent | |||
| 1551 | * shift we should go up one level and check it again. | |||
| 1552 | */ | |||
| 1553 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |||
| 1554 | return -EINVAL22; | |||
| 1555 | continue; | |||
| 1556 | } | |||
| 1557 | ||||
| 1558 | pt = cursor.entry->base.bo; | |||
| 1559 | if (!pt) { | |||
| 1560 | /* We need all PDs and PTs for mapping something, */ | |||
| 1561 | if (flags & AMDGPU_PTE_VALID(1ULL << 0)) | |||
| 1562 | return -ENOENT2; | |||
| 1563 | ||||
| 1564 | /* but unmapping something can happen at a higher | |||
| 1565 | * level. | |||
| 1566 | */ | |||
| 1567 | if (!amdgpu_vm_pt_ancestor(&cursor)) | |||
| 1568 | return -EINVAL22; | |||
| 1569 | ||||
| 1570 | pt = cursor.entry->base.bo; | |||
| 1571 | shift = parent_shift; | |||
| 1572 | frag_end = max(frag_end, roundup2(frag_start + 1,(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))) | |||
| 1573 | 1ULL << shift))(((frag_end)>((((frag_start + 1) + ((1ULL << shift) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift) - 1)))))?(frag_end):((((frag_start + 1) + ((1ULL << shift ) - 1)) & (~((__typeof(frag_start + 1))(1ULL << shift ) - 1))))); | |||
| 1574 | } | |||
| 1575 | ||||
| 1576 | /* Looks good so far, calculate parameters for the update */ | |||
| 1577 | incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE4096 << shift; | |||
| 1578 | mask = amdgpu_vm_entries_mask(adev, cursor.level); | |||
| 1579 | pe_start = ((cursor.pfn >> shift) & mask) * 8; | |||
| 1580 | entry_end = ((uint64_t)mask + 1) << shift; | |||
| 1581 | entry_end += cursor.pfn & ~(entry_end - 1); | |||
| 1582 | entry_end = min(entry_end, end)(((entry_end)<(end))?(entry_end):(end)); | |||
| 1583 | ||||
| 1584 | do { | |||
| 1585 | struct amdgpu_vm *vm = params->vm; | |||
| 1586 | uint64_t upd_end = min(entry_end, frag_end)(((entry_end)<(frag_end))?(entry_end):(frag_end)); | |||
| 1587 | unsigned nptes = (upd_end - frag_start) >> shift; | |||
| 1588 | uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag)((frag & 0x1fULL) << 7); | |||
| 1589 | ||||
| 1590 | /* This can happen when we set higher level PDs to | |||
| 1591 | * silent to stop fault floods. | |||
| 1592 | */ | |||
| 1593 | nptes = max(nptes, 1u)(((nptes)>(1u))?(nptes):(1u)); | |||
| 1594 | ||||
| 1595 | trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, | |||
| 1596 | nptes, dst, incr, upd_flags, | |||
| 1597 | vm->task_info.pid, | |||
| 1598 | vm->immediate.fence_context); | |||
| 1599 | amdgpu_vm_update_flags(params, pt, cursor.level, | |||
| 1600 | pe_start, dst, nptes, incr, | |||
| 1601 | upd_flags); | |||
| 1602 | ||||
| 1603 | pe_start += nptes * 8; | |||
| 1604 | dst += nptes * incr; | |||
| 1605 | ||||
| 1606 | frag_start = upd_end; | |||
| 1607 | if (frag_start >= frag_end) { | |||
| 1608 | /* figure out the next fragment */ | |||
| 1609 | amdgpu_vm_fragment(params, frag_start, end, | |||
| 1610 | flags, &frag, &frag_end); | |||
| 1611 | if (frag < shift) | |||
| 1612 | break; | |||
| 1613 | } | |||
| 1614 | } while (frag_start < entry_end); | |||
| 1615 | ||||
| 1616 | if (amdgpu_vm_pt_descendant(adev, &cursor)) { | |||
| 1617 | /* Free all child entries. | |||
| 1618 | * Update the tables with the flags and addresses and free up subsequent | |||
| 1619 | * tables in the case of huge pages or freed up areas. | |||
| 1620 | * This is the maximum you can free, because all other page tables are not | |||
| 1621 | * completely covered by the range and so potentially still in use. | |||
| 1622 | */ | |||
| 1623 | while (cursor.pfn < frag_start) { | |||
| 1624 | amdgpu_vm_free_pts(adev, params->vm, &cursor); | |||
| 1625 | amdgpu_vm_pt_next(adev, &cursor); | |||
| 1626 | } | |||
| 1627 | ||||
| 1628 | } else if (frag >= shift) { | |||
| 1629 | /* or just move on to the next on the same level. */ | |||
| 1630 | amdgpu_vm_pt_next(adev, &cursor); | |||
| 1631 | } | |||
| 1632 | } | |||
| 1633 | ||||
| 1634 | return 0; | |||
| 1635 | } | |||
| 1636 | ||||
| 1637 | /** | |||
| 1638 | * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table | |||
| 1639 | * | |||
| 1640 | * @adev: amdgpu_device pointer | |||
| 1641 | * @vm: requested vm | |||
| 1642 | * @immediate: immediate submission in a page fault | |||
| 1643 | * @unlocked: unlocked invalidation during MM callback | |||
| 1644 | * @resv: fences we need to sync to | |||
| 1645 | * @start: start of mapped range | |||
| 1646 | * @last: last mapped entry | |||
| 1647 | * @flags: flags for the entries | |||
| 1648 | * @addr: addr to set the area to | |||
| 1649 | * @pages_addr: DMA addresses to use for mapping | |||
| 1650 | * @fence: optional resulting fence | |||
| 1651 | * | |||
| 1652 | * Fill in the page table entries between @start and @last. | |||
| 1653 | * | |||
| 1654 | * Returns: | |||
| 1655 | * 0 for success, -EINVAL for failure. | |||
| 1656 | */ | |||
| 1657 | static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, | |||
| 1658 | struct amdgpu_vm *vm, bool_Bool immediate, | |||
| 1659 | bool_Bool unlocked, struct dma_resv *resv, | |||
| 1660 | uint64_t start, uint64_t last, | |||
| 1661 | uint64_t flags, uint64_t addr, | |||
| 1662 | dma_addr_t *pages_addr, | |||
| 1663 | struct dma_fence **fence) | |||
| 1664 | { | |||
| 1665 | struct amdgpu_vm_update_params params; | |||
| 1666 | enum amdgpu_sync_mode sync_mode; | |||
| 1667 | int r; | |||
| 1668 | ||||
| 1669 | memset(¶ms, 0, sizeof(params))__builtin_memset((¶ms), (0), (sizeof(params))); | |||
| 1670 | params.adev = adev; | |||
| 1671 | params.vm = vm; | |||
| 1672 | params.immediate = immediate; | |||
| 1673 | params.pages_addr = pages_addr; | |||
| 1674 | params.unlocked = unlocked; | |||
| 1675 | ||||
| 1676 | /* Implicitly sync to command submissions in the same VM before | |||
| 1677 | * unmapping. Sync to moving fences before mapping. | |||
| 1678 | */ | |||
| 1679 | if (!(flags & AMDGPU_PTE_VALID(1ULL << 0))) | |||
| 1680 | sync_mode = AMDGPU_SYNC_EQ_OWNER; | |||
| 1681 | else | |||
| 1682 | sync_mode = AMDGPU_SYNC_EXPLICIT; | |||
| 1683 | ||||
| 1684 | amdgpu_vm_eviction_lock(vm); | |||
| 1685 | if (vm->evicting) { | |||
| 1686 | r = -EBUSY16; | |||
| 1687 | goto error_unlock; | |||
| 1688 | } | |||
| 1689 | ||||
| 1690 | if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { | |||
| 1691 | struct dma_fence *tmp = dma_fence_get_stub(); | |||
| 1692 | ||||
| 1693 | amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true1); | |||
| 1694 | swap(vm->last_unlocked, tmp)do { __typeof(vm->last_unlocked) __tmp = (vm->last_unlocked ); (vm->last_unlocked) = (tmp); (tmp) = __tmp; } while(0); | |||
| 1695 | dma_fence_put(tmp); | |||
| 1696 | } | |||
| 1697 | ||||
| 1698 | r = vm->update_funcs->prepare(¶ms, resv, sync_mode); | |||
| 1699 | if (r) | |||
| 1700 | goto error_unlock; | |||
| 1701 | ||||
| 1702 | r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); | |||
| 1703 | if (r) | |||
| 1704 | goto error_unlock; | |||
| 1705 | ||||
| 1706 | r = vm->update_funcs->commit(¶ms, fence); | |||
| 1707 | ||||
| 1708 | error_unlock: | |||
| 1709 | amdgpu_vm_eviction_unlock(vm); | |||
| 1710 | return r; | |||
| 1711 | } | |||
| 1712 | ||||
| 1713 | /** | |||
| 1714 | * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks | |||
| 1715 | * | |||
| 1716 | * @adev: amdgpu_device pointer | |||
| 1717 | * @resv: fences we need to sync to | |||
| 1718 | * @pages_addr: DMA addresses to use for mapping | |||
| 1719 | * @vm: requested vm | |||
| 1720 | * @mapping: mapped range and flags to use for the update | |||
| 1721 | * @flags: HW flags for the mapping | |||
| 1722 | * @bo_adev: amdgpu_device pointer that bo actually been allocated | |||
| 1723 | * @nodes: array of drm_mm_nodes with the MC addresses | |||
| 1724 | * @fence: optional resulting fence | |||
| 1725 | * | |||
| 1726 | * Split the mapping into smaller chunks so that each update fits | |||
| 1727 | * into a SDMA IB. | |||
| 1728 | * | |||
| 1729 | * Returns: | |||
| 1730 | * 0 for success, -EINVAL for failure. | |||
| 1731 | */ | |||
| 1732 | static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, | |||
| 1733 | struct dma_resv *resv, | |||
| 1734 | dma_addr_t *pages_addr, | |||
| 1735 | struct amdgpu_vm *vm, | |||
| 1736 | struct amdgpu_bo_va_mapping *mapping, | |||
| 1737 | uint64_t flags, | |||
| 1738 | struct amdgpu_device *bo_adev, | |||
| 1739 | struct drm_mm_node *nodes, | |||
| 1740 | struct dma_fence **fence) | |||
| 1741 | { | |||
| 1742 | unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; | |||
| 1743 | uint64_t pfn, start = mapping->start; | |||
| 1744 | int r; | |||
| 1745 | ||||
| 1746 | /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here | |||
| 1747 | * but in case of something, we filter the flags in first place | |||
| 1748 | */ | |||
| 1749 | if (!(mapping->flags & AMDGPU_PTE_READABLE(1ULL << 5))) | |||
| 1750 | flags &= ~AMDGPU_PTE_READABLE(1ULL << 5); | |||
| 1751 | if (!(mapping->flags & AMDGPU_PTE_WRITEABLE(1ULL << 6))) | |||
| 1752 | flags &= ~AMDGPU_PTE_WRITEABLE(1ULL << 6); | |||
| 1753 | ||||
| 1754 | /* Apply ASIC specific mapping flags */ | |||
| 1755 | amdgpu_gmc_get_vm_pte(adev, mapping, &flags)(adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (& flags)); | |||
| 1756 | ||||
| 1757 | trace_amdgpu_vm_bo_update(mapping); | |||
| 1758 | ||||
| 1759 | pfn = mapping->offset >> PAGE_SHIFT12; | |||
| 1760 | if (nodes) { | |||
| 1761 | while (pfn >= nodes->size) { | |||
| 1762 | pfn -= nodes->size; | |||
| 1763 | ++nodes; | |||
| 1764 | } | |||
| 1765 | } | |||
| 1766 | ||||
| 1767 | do { | |||
| 1768 | dma_addr_t *dma_addr = NULL((void *)0); | |||
| 1769 | uint64_t max_entries; | |||
| 1770 | uint64_t addr, last; | |||
| 1771 | ||||
| 1772 | max_entries = mapping->last - start + 1; | |||
| 1773 | if (nodes) { | |||
| 1774 | addr = nodes->start << PAGE_SHIFT12; | |||
| 1775 | max_entries = min((nodes->size - pfn) *((((nodes->size - pfn) * ((1 << 12) / 4096))<(max_entries ))?((nodes->size - pfn) * ((1 << 12) / 4096)):(max_entries )) | |||
| 1776 | AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries)((((nodes->size - pfn) * ((1 << 12) / 4096))<(max_entries ))?((nodes->size - pfn) * ((1 << 12) / 4096)):(max_entries )); | |||
| 1777 | } else { | |||
| 1778 | addr = 0; | |||
| 1779 | } | |||
| 1780 | ||||
| 1781 | if (pages_addr) { | |||
| 1782 | uint64_t count; | |||
| 1783 | ||||
| 1784 | for (count = 1; | |||
| 1785 | count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE((1 << 12) / 4096); | |||
| 1786 | ++count) { | |||
| 1787 | uint64_t idx = pfn + count; | |||
| 1788 | ||||
| 1789 | if (pages_addr[idx] != | |||
| 1790 | (pages_addr[idx - 1] + PAGE_SIZE(1 << 12))) | |||
| 1791 | break; | |||
| 1792 | } | |||
| 1793 | ||||
| 1794 | if (count < min_linear_pages) { | |||
| 1795 | addr = pfn << PAGE_SHIFT12; | |||
| 1796 | dma_addr = pages_addr; | |||
| 1797 | } else { | |||
| 1798 | addr = pages_addr[pfn]; | |||
| 1799 | max_entries = count * | |||
| 1800 | AMDGPU_GPU_PAGES_IN_CPU_PAGE((1 << 12) / 4096); | |||
| 1801 | } | |||
| 1802 | ||||
| 1803 | } else if (flags & (AMDGPU_PTE_VALID(1ULL << 0) | AMDGPU_PTE_PRT(1ULL << 51))) { | |||
| 1804 | addr += bo_adev->vm_manager.vram_base_offset; | |||
| 1805 | addr += pfn << PAGE_SHIFT12; | |||
| 1806 | } | |||
| 1807 | ||||
| 1808 | last = start + max_entries - 1; | |||
| 1809 | r = amdgpu_vm_bo_update_mapping(adev, vm, false0, false0, resv, | |||
| 1810 | start, last, flags, addr, | |||
| 1811 | dma_addr, fence); | |||
| 1812 | if (r) | |||
| 1813 | return r; | |||
| 1814 | ||||
| 1815 | pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE((1 << 12) / 4096); | |||
| 1816 | if (nodes && nodes->size == pfn) { | |||
| 1817 | pfn = 0; | |||
| 1818 | ++nodes; | |||
| 1819 | } | |||
| 1820 | start = last + 1; | |||
| 1821 | ||||
| 1822 | } while (unlikely(start != mapping->last + 1)__builtin_expect(!!(start != mapping->last + 1), 0)); | |||
| 1823 | ||||
| 1824 | return 0; | |||
| 1825 | } | |||
| 1826 | ||||
| 1827 | /** | |||
| 1828 | * amdgpu_vm_bo_update - update all BO mappings in the vm page table | |||
| 1829 | * | |||
| 1830 | * @adev: amdgpu_device pointer | |||
| 1831 | * @bo_va: requested BO and VM object | |||
| 1832 | * @clear: if true clear the entries | |||
| 1833 | * | |||
| 1834 | * Fill in the page table entries for @bo_va. | |||
| 1835 | * | |||
| 1836 | * Returns: | |||
| 1837 | * 0 for success, -EINVAL for failure. | |||
| 1838 | */ | |||
| 1839 | int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, | |||
| 1840 | bool_Bool clear) | |||
| 1841 | { | |||
| 1842 | struct amdgpu_bo *bo = bo_va->base.bo; | |||
| 1843 | struct amdgpu_vm *vm = bo_va->base.vm; | |||
| 1844 | struct amdgpu_bo_va_mapping *mapping; | |||
| 1845 | dma_addr_t *pages_addr = NULL((void *)0); | |||
| 1846 | struct ttm_resource *mem; | |||
| 1847 | struct drm_mm_node *nodes; | |||
| 1848 | struct dma_fence **last_update; | |||
| 1849 | struct dma_resv *resv; | |||
| 1850 | uint64_t flags; | |||
| 1851 | struct amdgpu_device *bo_adev = adev; | |||
| 1852 | int r; | |||
| 1853 | ||||
| 1854 | if (clear || !bo) { | |||
| 1855 | mem = NULL((void *)0); | |||
| 1856 | nodes = NULL((void *)0); | |||
| 1857 | resv = vm->root.base.bo->tbo.base.resv; | |||
| 1858 | } else { | |||
| 1859 | struct drm_gem_object *obj = &bo->tbo.base; | |||
| 1860 | struct ttm_dma_tt *ttm; | |||
| 1861 | ||||
| 1862 | resv = bo->tbo.base.resv; | |||
| 1863 | #ifdef notyet | |||
| 1864 | if (obj->import_attach && bo_va->is_xgmi) { | |||
| 1865 | struct dma_buf *dma_buf = obj->import_attach->dmabuf; | |||
| 1866 | struct drm_gem_object *gobj = dma_buf->priv; | |||
| 1867 | struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj)({ const __typeof( ((struct amdgpu_bo *)0)->tbo.base ) *__mptr = ((gobj)); (struct amdgpu_bo *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo, tbo.base) );}); | |||
| 1868 | ||||
| 1869 | if (abo->tbo.mem.mem_type == TTM_PL_VRAM2) | |||
| 1870 | bo = gem_to_amdgpu_bo(gobj)({ const __typeof( ((struct amdgpu_bo *)0)->tbo.base ) *__mptr = ((gobj)); (struct amdgpu_bo *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_bo, tbo.base) );}); | |||
| 1871 | } | |||
| 1872 | #endif | |||
| 1873 | mem = &bo->tbo.mem; | |||
| 1874 | nodes = mem->mm_node; | |||
| 1875 | if (mem->mem_type == TTM_PL_TT1) { | |||
| 1876 | ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm)({ const __typeof( ((struct ttm_dma_tt *)0)->ttm ) *__mptr = (bo->tbo.ttm); (struct ttm_dma_tt *)( (char *)__mptr - __builtin_offsetof (struct ttm_dma_tt, ttm) );}); | |||
| 1877 | pages_addr = ttm->dma_address; | |||
| 1878 | } | |||
| 1879 | } | |||
| 1880 | ||||
| 1881 | if (bo) { | |||
| 1882 | flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); | |||
| 1883 | ||||
| 1884 | if (amdgpu_bo_encrypted(bo)) | |||
| 1885 | flags |= AMDGPU_PTE_TMZ(1ULL << 3); | |||
| 1886 | ||||
| 1887 | bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); | |||
| 1888 | } else { | |||
| 1889 | flags = 0x0; | |||
| 1890 | } | |||
| 1891 | ||||
| 1892 | if (clear || (bo && bo->tbo.base.resv == | |||
| 1893 | vm->root.base.bo->tbo.base.resv)) | |||
| 1894 | last_update = &vm->last_update; | |||
| 1895 | else | |||
| 1896 | last_update = &bo_va->last_pt_update; | |||
| 1897 | ||||
| 1898 | if (!clear && bo_va->base.moved) { | |||
| 1899 | bo_va->base.moved = false0; | |||
| 1900 | list_splice_init(&bo_va->valids, &bo_va->invalids); | |||
| 1901 | ||||
| 1902 | } else if (bo_va->cleared != clear) { | |||
| 1903 | list_splice_init(&bo_va->valids, &bo_va->invalids); | |||
| 1904 | } | |||
| 1905 | ||||
| 1906 | list_for_each_entry(mapping, &bo_va->invalids, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&bo_va->invalids)->next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}); &mapping->list != (&bo_va-> invalids); mapping = ({ const __typeof( ((__typeof(*mapping) * )0)->list ) *__mptr = (mapping->list.next); (__typeof(* mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(*mapping ), list) );})) { | |||
| 1907 | r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm, | |||
| 1908 | mapping, flags, bo_adev, nodes, | |||
| 1909 | last_update); | |||
| 1910 | if (r) | |||
| 1911 | return r; | |||
| 1912 | } | |||
| 1913 | ||||
| 1914 | /* If the BO is not in its preferred location add it back to | |||
| 1915 | * the evicted list so that it gets validated again on the | |||
| 1916 | * next command submission. | |||
| 1917 | */ | |||
| 1918 | if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { | |||
| 1919 | uint32_t mem_type = bo->tbo.mem.mem_type; | |||
| 1920 | ||||
| 1921 | if (!(bo->preferred_domains & | |||
| 1922 | amdgpu_mem_type_to_domain(mem_type))) | |||
| 1923 | amdgpu_vm_bo_evicted(&bo_va->base); | |||
| 1924 | else | |||
| 1925 | amdgpu_vm_bo_idle(&bo_va->base); | |||
| 1926 | } else { | |||
| 1927 | amdgpu_vm_bo_done(&bo_va->base); | |||
| 1928 | } | |||
| 1929 | ||||
| 1930 | list_splice_init(&bo_va->invalids, &bo_va->valids); | |||
| 1931 | bo_va->cleared = clear; | |||
| 1932 | ||||
| 1933 | if (trace_amdgpu_vm_bo_mapping_enabled()) { | |||
| 1934 | list_for_each_entry(mapping, &bo_va->valids, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&bo_va->valids)->next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}); &mapping->list != (&bo_va-> valids); mapping = ({ const __typeof( ((__typeof(*mapping) *) 0)->list ) *__mptr = (mapping->list.next); (__typeof(*mapping ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*mapping), list) );})) | |||
| 1935 | trace_amdgpu_vm_bo_mapping(mapping); | |||
| 1936 | } | |||
| 1937 | ||||
| 1938 | return 0; | |||
| 1939 | } | |||
| 1940 | ||||
| 1941 | /** | |||
| 1942 | * amdgpu_vm_update_prt_state - update the global PRT state | |||
| 1943 | * | |||
| 1944 | * @adev: amdgpu_device pointer | |||
| 1945 | */ | |||
| 1946 | static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) | |||
| 1947 | { | |||
| 1948 | unsigned long flags; | |||
| 1949 | bool_Bool enable; | |||
| 1950 | ||||
| 1951 | spin_lock_irqsave(&adev->vm_manager.prt_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.prt_lock); } while (0); | |||
| 1952 | enable = !!atomic_read(&adev->vm_manager.num_prt_users)({ typeof(*(&adev->vm_manager.num_prt_users)) __tmp = * (volatile typeof(*(&adev->vm_manager.num_prt_users)) * )&(*(&adev->vm_manager.num_prt_users)); membar_datadep_consumer (); __tmp; }); | |||
| 1953 | adev->gmc.gmc_funcs->set_prt(adev, enable); | |||
| 1954 | spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.prt_lock ); } while (0); | |||
| 1955 | } | |||
| 1956 | ||||
| 1957 | /** | |||
| 1958 | * amdgpu_vm_prt_get - add a PRT user | |||
| 1959 | * | |||
| 1960 | * @adev: amdgpu_device pointer | |||
| 1961 | */ | |||
| 1962 | static void amdgpu_vm_prt_get(struct amdgpu_device *adev) | |||
| 1963 | { | |||
| 1964 | if (!adev->gmc.gmc_funcs->set_prt) | |||
| 1965 | return; | |||
| 1966 | ||||
| 1967 | if (atomic_inc_return(&adev->vm_manager.num_prt_users)__sync_add_and_fetch((&adev->vm_manager.num_prt_users) , 1) == 1) | |||
| 1968 | amdgpu_vm_update_prt_state(adev); | |||
| 1969 | } | |||
| 1970 | ||||
| 1971 | /** | |||
| 1972 | * amdgpu_vm_prt_put - drop a PRT user | |||
| 1973 | * | |||
| 1974 | * @adev: amdgpu_device pointer | |||
| 1975 | */ | |||
| 1976 | static void amdgpu_vm_prt_put(struct amdgpu_device *adev) | |||
| 1977 | { | |||
| 1978 | if (atomic_dec_return(&adev->vm_manager.num_prt_users)__sync_sub_and_fetch((&adev->vm_manager.num_prt_users) , 1) == 0) | |||
| 1979 | amdgpu_vm_update_prt_state(adev); | |||
| 1980 | } | |||
| 1981 | ||||
| 1982 | /** | |||
| 1983 | * amdgpu_vm_prt_cb - callback for updating the PRT status | |||
| 1984 | * | |||
| 1985 | * @fence: fence for the callback | |||
| 1986 | * @_cb: the callback function | |||
| 1987 | */ | |||
| 1988 | static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) | |||
| 1989 | { | |||
| 1990 | struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb)({ const __typeof( ((struct amdgpu_prt_cb *)0)->cb ) *__mptr = (_cb); (struct amdgpu_prt_cb *)( (char *)__mptr - __builtin_offsetof (struct amdgpu_prt_cb, cb) );}); | |||
| 1991 | ||||
| 1992 | amdgpu_vm_prt_put(cb->adev); | |||
| 1993 | kfree(cb); | |||
| 1994 | } | |||
| 1995 | ||||
| 1996 | /** | |||
| 1997 | * amdgpu_vm_add_prt_cb - add callback for updating the PRT status | |||
| 1998 | * | |||
| 1999 | * @adev: amdgpu_device pointer | |||
| 2000 | * @fence: fence for the callback | |||
| 2001 | */ | |||
| 2002 | static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, | |||
| 2003 | struct dma_fence *fence) | |||
| 2004 | { | |||
| 2005 | struct amdgpu_prt_cb *cb; | |||
| 2006 | ||||
| 2007 | if (!adev->gmc.gmc_funcs->set_prt) | |||
| 2008 | return; | |||
| 2009 | ||||
| 2010 | cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL(0x0001 | 0x0004)); | |||
| 2011 | if (!cb) { | |||
| 2012 | /* Last resort when we are OOM */ | |||
| 2013 | if (fence) | |||
| 2014 | dma_fence_wait(fence, false0); | |||
| 2015 | ||||
| 2016 | amdgpu_vm_prt_put(adev); | |||
| 2017 | } else { | |||
| 2018 | cb->adev = adev; | |||
| 2019 | if (!fence || dma_fence_add_callback(fence, &cb->cb, | |||
| 2020 | amdgpu_vm_prt_cb)) | |||
| 2021 | amdgpu_vm_prt_cb(fence, &cb->cb); | |||
| 2022 | } | |||
| 2023 | } | |||
| 2024 | ||||
| 2025 | /** | |||
| 2026 | * amdgpu_vm_free_mapping - free a mapping | |||
| 2027 | * | |||
| 2028 | * @adev: amdgpu_device pointer | |||
| 2029 | * @vm: requested vm | |||
| 2030 | * @mapping: mapping to be freed | |||
| 2031 | * @fence: fence of the unmap operation | |||
| 2032 | * | |||
| 2033 | * Free a mapping and make sure we decrease the PRT usage count if applicable. | |||
| 2034 | */ | |||
| 2035 | static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, | |||
| 2036 | struct amdgpu_vm *vm, | |||
| 2037 | struct amdgpu_bo_va_mapping *mapping, | |||
| 2038 | struct dma_fence *fence) | |||
| 2039 | { | |||
| 2040 | if (mapping->flags & AMDGPU_PTE_PRT(1ULL << 51)) | |||
| 2041 | amdgpu_vm_add_prt_cb(adev, fence); | |||
| 2042 | kfree(mapping); | |||
| 2043 | } | |||
| 2044 | ||||
| 2045 | /** | |||
| 2046 | * amdgpu_vm_prt_fini - finish all prt mappings | |||
| 2047 | * | |||
| 2048 | * @adev: amdgpu_device pointer | |||
| 2049 | * @vm: requested vm | |||
| 2050 | * | |||
| 2051 | * Register a cleanup callback to disable PRT support after VM dies. | |||
| 2052 | */ | |||
| 2053 | static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 2054 | { | |||
| 2055 | struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; | |||
| 2056 | struct dma_fence *excl, **shared; | |||
| 2057 | unsigned i, shared_count; | |||
| 2058 | int r; | |||
| 2059 | ||||
| 2060 | r = dma_resv_get_fences_rcu(resv, &excl, | |||
| 2061 | &shared_count, &shared); | |||
| 2062 | if (r) { | |||
| 2063 | /* Not enough memory to grab the fence list, as last resort | |||
| 2064 | * block for all the fences to complete. | |||
| 2065 | */ | |||
| 2066 | dma_resv_wait_timeout_rcu(resv, true1, false0, | |||
| 2067 | MAX_SCHEDULE_TIMEOUT(0x7fffffff)); | |||
| 2068 | return; | |||
| 2069 | } | |||
| 2070 | ||||
| 2071 | /* Add a callback for each fence in the reservation object */ | |||
| 2072 | amdgpu_vm_prt_get(adev); | |||
| 2073 | amdgpu_vm_add_prt_cb(adev, excl); | |||
| 2074 | ||||
| 2075 | for (i = 0; i < shared_count; ++i) { | |||
| 2076 | amdgpu_vm_prt_get(adev); | |||
| 2077 | amdgpu_vm_add_prt_cb(adev, shared[i]); | |||
| 2078 | } | |||
| 2079 | ||||
| 2080 | kfree(shared); | |||
| 2081 | } | |||
| 2082 | ||||
| 2083 | /** | |||
| 2084 | * amdgpu_vm_clear_freed - clear freed BOs in the PT | |||
| 2085 | * | |||
| 2086 | * @adev: amdgpu_device pointer | |||
| 2087 | * @vm: requested vm | |||
| 2088 | * @fence: optional resulting fence (unchanged if no work needed to be done | |||
| 2089 | * or if an error occurred) | |||
| 2090 | * | |||
| 2091 | * Make sure all freed BOs are cleared in the PT. | |||
| 2092 | * PTs have to be reserved and mutex must be locked! | |||
| 2093 | * | |||
| 2094 | * Returns: | |||
| 2095 | * 0 for success. | |||
| 2096 | * | |||
| 2097 | */ | |||
| 2098 | int amdgpu_vm_clear_freed(struct amdgpu_device *adev, | |||
| 2099 | struct amdgpu_vm *vm, | |||
| 2100 | struct dma_fence **fence) | |||
| 2101 | { | |||
| 2102 | struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; | |||
| 2103 | struct amdgpu_bo_va_mapping *mapping; | |||
| 2104 | uint64_t init_pte_value = 0; | |||
| 2105 | struct dma_fence *f = NULL((void *)0); | |||
| 2106 | int r; | |||
| 2107 | ||||
| 2108 | while (!list_empty(&vm->freed)) { | |||
| 2109 | mapping = list_first_entry(&vm->freed,({ const __typeof( ((struct amdgpu_bo_va_mapping *)0)->list ) *__mptr = ((&vm->freed)->next); (struct amdgpu_bo_va_mapping *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_va_mapping , list) );}) | |||
| 2110 | struct amdgpu_bo_va_mapping, list)({ const __typeof( ((struct amdgpu_bo_va_mapping *)0)->list ) *__mptr = ((&vm->freed)->next); (struct amdgpu_bo_va_mapping *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_va_mapping , list) );}); | |||
| 2111 | list_del(&mapping->list); | |||
| 2112 | ||||
| 2113 | if (vm->pte_support_ats && | |||
| 2114 | mapping->start < AMDGPU_GMC_HOLE_START0x0000800000000000ULL) | |||
| 2115 | init_pte_value = AMDGPU_PTE_DEFAULT_ATC((1ULL << 1) | (1ULL << 2) | (1ULL << 4) | ( 1ULL << 5) | (1ULL << 6) | ((uint64_t)(2) << 57)); | |||
| 2116 | ||||
| 2117 | r = amdgpu_vm_bo_update_mapping(adev, vm, false0, false0, resv, | |||
| 2118 | mapping->start, mapping->last, | |||
| 2119 | init_pte_value, 0, NULL((void *)0), &f); | |||
| 2120 | amdgpu_vm_free_mapping(adev, vm, mapping, f); | |||
| 2121 | if (r) { | |||
| 2122 | dma_fence_put(f); | |||
| 2123 | return r; | |||
| 2124 | } | |||
| 2125 | } | |||
| 2126 | ||||
| 2127 | if (fence && f) { | |||
| 2128 | dma_fence_put(*fence); | |||
| 2129 | *fence = f; | |||
| 2130 | } else { | |||
| 2131 | dma_fence_put(f); | |||
| 2132 | } | |||
| 2133 | ||||
| 2134 | return 0; | |||
| 2135 | ||||
| 2136 | } | |||
| 2137 | ||||
| 2138 | /** | |||
| 2139 | * amdgpu_vm_handle_moved - handle moved BOs in the PT | |||
| 2140 | * | |||
| 2141 | * @adev: amdgpu_device pointer | |||
| 2142 | * @vm: requested vm | |||
| 2143 | * | |||
| 2144 | * Make sure all BOs which are moved are updated in the PTs. | |||
| 2145 | * | |||
| 2146 | * Returns: | |||
| 2147 | * 0 for success. | |||
| 2148 | * | |||
| 2149 | * PTs have to be reserved! | |||
| 2150 | */ | |||
| 2151 | int amdgpu_vm_handle_moved(struct amdgpu_device *adev, | |||
| 2152 | struct amdgpu_vm *vm) | |||
| 2153 | { | |||
| 2154 | struct amdgpu_bo_va *bo_va, *tmp; | |||
| 2155 | struct dma_resv *resv; | |||
| 2156 | bool_Bool clear; | |||
| 2157 | int r; | |||
| 2158 | ||||
| 2159 | list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status)for (bo_va = ({ const __typeof( ((__typeof(*bo_va) *)0)->base .vm_status ) *__mptr = ((&vm->moved)->next); (__typeof (*bo_va) *)( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va ), base.vm_status) );}), tmp = ({ const __typeof( ((__typeof( *bo_va) *)0)->base.vm_status ) *__mptr = (bo_va->base.vm_status .next); (__typeof(*bo_va) *)( (char *)__mptr - __builtin_offsetof (__typeof(*bo_va), base.vm_status) );}); &bo_va->base. vm_status != (&vm->moved); bo_va = tmp, tmp = ({ const __typeof( ((__typeof(*tmp) *)0)->base.vm_status ) *__mptr = (tmp->base.vm_status.next); (__typeof(*tmp) *)( (char * )__mptr - __builtin_offsetof(__typeof(*tmp), base.vm_status) ) ;})) { | |||
| 2160 | /* Per VM BOs never need to bo cleared in the page tables */ | |||
| 2161 | r = amdgpu_vm_bo_update(adev, bo_va, false0); | |||
| 2162 | if (r) | |||
| 2163 | return r; | |||
| 2164 | } | |||
| 2165 | ||||
| 2166 | spin_lock(&vm->invalidated_lock)mtx_enter(&vm->invalidated_lock); | |||
| 2167 | while (!list_empty(&vm->invalidated)) { | |||
| 2168 | bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,({ const __typeof( ((struct amdgpu_bo_va *)0)->base.vm_status ) *__mptr = ((&vm->invalidated)->next); (struct amdgpu_bo_va *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_va, base.vm_status) );}) | |||
| 2169 | base.vm_status)({ const __typeof( ((struct amdgpu_bo_va *)0)->base.vm_status ) *__mptr = ((&vm->invalidated)->next); (struct amdgpu_bo_va *)( (char *)__mptr - __builtin_offsetof(struct amdgpu_bo_va, base.vm_status) );}); | |||
| 2170 | resv = bo_va->base.bo->tbo.base.resv; | |||
| 2171 | spin_unlock(&vm->invalidated_lock)mtx_leave(&vm->invalidated_lock); | |||
| 2172 | ||||
| 2173 | /* Try to reserve the BO to avoid clearing its ptes */ | |||
| 2174 | if (!amdgpu_vm_debug && dma_resv_trylock(resv)) | |||
| 2175 | clear = false0; | |||
| 2176 | /* Somebody else is using the BO right now */ | |||
| 2177 | else | |||
| 2178 | clear = true1; | |||
| 2179 | ||||
| 2180 | r = amdgpu_vm_bo_update(adev, bo_va, clear); | |||
| 2181 | if (r) | |||
| 2182 | return r; | |||
| 2183 | ||||
| 2184 | if (!clear) | |||
| 2185 | dma_resv_unlock(resv); | |||
| 2186 | spin_lock(&vm->invalidated_lock)mtx_enter(&vm->invalidated_lock); | |||
| 2187 | } | |||
| 2188 | spin_unlock(&vm->invalidated_lock)mtx_leave(&vm->invalidated_lock); | |||
| 2189 | ||||
| 2190 | return 0; | |||
| 2191 | } | |||
| 2192 | ||||
| 2193 | /** | |||
| 2194 | * amdgpu_vm_bo_add - add a bo to a specific vm | |||
| 2195 | * | |||
| 2196 | * @adev: amdgpu_device pointer | |||
| 2197 | * @vm: requested vm | |||
| 2198 | * @bo: amdgpu buffer object | |||
| 2199 | * | |||
| 2200 | * Add @bo into the requested vm. | |||
| 2201 | * Add @bo to the list of bos associated with the vm | |||
| 2202 | * | |||
| 2203 | * Returns: | |||
| 2204 | * Newly added bo_va or NULL for failure | |||
| 2205 | * | |||
| 2206 | * Object has to be reserved! | |||
| 2207 | */ | |||
| 2208 | struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, | |||
| 2209 | struct amdgpu_vm *vm, | |||
| 2210 | struct amdgpu_bo *bo) | |||
| 2211 | { | |||
| 2212 | struct amdgpu_bo_va *bo_va; | |||
| 2213 | ||||
| 2214 | bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL(0x0001 | 0x0004)); | |||
| 2215 | if (bo_va == NULL((void *)0)) { | |||
| 2216 | return NULL((void *)0); | |||
| 2217 | } | |||
| 2218 | amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); | |||
| 2219 | ||||
| 2220 | bo_va->ref_count = 1; | |||
| 2221 | INIT_LIST_HEAD(&bo_va->valids); | |||
| 2222 | INIT_LIST_HEAD(&bo_va->invalids); | |||
| 2223 | ||||
| 2224 | if (!bo) | |||
| 2225 | return bo_va; | |||
| 2226 | ||||
| 2227 | if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) { | |||
| 2228 | bo_va->is_xgmi = true1; | |||
| 2229 | /* Power up XGMI if it can be potentially used */ | |||
| 2230 | amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20); | |||
| 2231 | } | |||
| 2232 | ||||
| 2233 | return bo_va; | |||
| 2234 | } | |||
| 2235 | ||||
| 2236 | ||||
| 2237 | /** | |||
| 2238 | * amdgpu_vm_bo_insert_mapping - insert a new mapping | |||
| 2239 | * | |||
| 2240 | * @adev: amdgpu_device pointer | |||
| 2241 | * @bo_va: bo_va to store the address | |||
| 2242 | * @mapping: the mapping to insert | |||
| 2243 | * | |||
| 2244 | * Insert a new mapping into all structures. | |||
| 2245 | */ | |||
| 2246 | static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, | |||
| 2247 | struct amdgpu_bo_va *bo_va, | |||
| 2248 | struct amdgpu_bo_va_mapping *mapping) | |||
| 2249 | { | |||
| 2250 | struct amdgpu_vm *vm = bo_va->base.vm; | |||
| 2251 | struct amdgpu_bo *bo = bo_va->base.bo; | |||
| 2252 | ||||
| 2253 | mapping->bo_va = bo_va; | |||
| 2254 | list_add(&mapping->list, &bo_va->invalids); | |||
| 2255 | amdgpu_vm_it_insert(mapping, &vm->va); | |||
| 2256 | ||||
| 2257 | if (mapping->flags & AMDGPU_PTE_PRT(1ULL << 51)) | |||
| 2258 | amdgpu_vm_prt_get(adev); | |||
| 2259 | ||||
| 2260 | if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && | |||
| 2261 | !bo_va->base.moved) { | |||
| 2262 | list_move(&bo_va->base.vm_status, &vm->moved); | |||
| 2263 | } | |||
| 2264 | trace_amdgpu_vm_bo_map(bo_va, mapping); | |||
| 2265 | } | |||
| 2266 | ||||
| 2267 | /** | |||
| 2268 | * amdgpu_vm_bo_map - map bo inside a vm | |||
| 2269 | * | |||
| 2270 | * @adev: amdgpu_device pointer | |||
| 2271 | * @bo_va: bo_va to store the address | |||
| 2272 | * @saddr: where to map the BO | |||
| 2273 | * @offset: requested offset in the BO | |||
| 2274 | * @size: BO size in bytes | |||
| 2275 | * @flags: attributes of pages (read/write/valid/etc.) | |||
| 2276 | * | |||
| 2277 | * Add a mapping of the BO at the specefied addr into the VM. | |||
| 2278 | * | |||
| 2279 | * Returns: | |||
| 2280 | * 0 for success, error for failure. | |||
| 2281 | * | |||
| 2282 | * Object has to be reserved and unreserved outside! | |||
| 2283 | */ | |||
| 2284 | int amdgpu_vm_bo_map(struct amdgpu_device *adev, | |||
| 2285 | struct amdgpu_bo_va *bo_va, | |||
| 2286 | uint64_t saddr, uint64_t offset, | |||
| 2287 | uint64_t size, uint64_t flags) | |||
| 2288 | { | |||
| 2289 | struct amdgpu_bo_va_mapping *mapping, *tmp; | |||
| 2290 | struct amdgpu_bo *bo = bo_va->base.bo; | |||
| 2291 | struct amdgpu_vm *vm = bo_va->base.vm; | |||
| 2292 | uint64_t eaddr; | |||
| 2293 | ||||
| 2294 | /* validate the parameters */ | |||
| 2295 | if (saddr & ~LINUX_PAGE_MASK(~((1 << 12) - 1)) || offset & ~LINUX_PAGE_MASK(~((1 << 12) - 1)) || | |||
| 2296 | size == 0 || size & ~LINUX_PAGE_MASK(~((1 << 12) - 1))) | |||
| 2297 | return -EINVAL22; | |||
| 2298 | ||||
| 2299 | /* make sure object fit at this offset */ | |||
| 2300 | eaddr = saddr + size - 1; | |||
| 2301 | if (saddr >= eaddr || | |||
| 2302 | (bo && offset + size > amdgpu_bo_size(bo)) || | |||
| 2303 | (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT12)) | |||
| 2304 | return -EINVAL22; | |||
| 2305 | ||||
| 2306 | saddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2307 | eaddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2308 | ||||
| 2309 | tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); | |||
| 2310 | if (tmp) { | |||
| 2311 | /* bo and tmp overlap, invalid addr */ | |||
| 2312 | dev_err(adev->dev, "bo %p va 0x%010llx-0x%010llx conflict with "printf("drm:pid%d:%s *ERROR* " "bo %p va 0x%010llx-0x%010llx conflict with " "0x%010llx-0x%010llx\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bo, saddr, eaddr, tmp->start, tmp->last + 1 ) | |||
| 2313 | "0x%010llx-0x%010llx\n", bo, saddr, eaddr,printf("drm:pid%d:%s *ERROR* " "bo %p va 0x%010llx-0x%010llx conflict with " "0x%010llx-0x%010llx\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bo, saddr, eaddr, tmp->start, tmp->last + 1 ) | |||
| 2314 | tmp->start, tmp->last + 1)printf("drm:pid%d:%s *ERROR* " "bo %p va 0x%010llx-0x%010llx conflict with " "0x%010llx-0x%010llx\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bo, saddr, eaddr, tmp->start, tmp->last + 1 ); | |||
| 2315 | return -EINVAL22; | |||
| 2316 | } | |||
| 2317 | ||||
| 2318 | mapping = kmalloc(sizeof(*mapping), GFP_KERNEL(0x0001 | 0x0004)); | |||
| 2319 | if (!mapping) | |||
| 2320 | return -ENOMEM12; | |||
| 2321 | ||||
| 2322 | mapping->start = saddr; | |||
| 2323 | mapping->last = eaddr; | |||
| 2324 | mapping->offset = offset; | |||
| 2325 | mapping->flags = flags; | |||
| 2326 | ||||
| 2327 | amdgpu_vm_bo_insert_map(adev, bo_va, mapping); | |||
| 2328 | ||||
| 2329 | return 0; | |||
| 2330 | } | |||
| 2331 | ||||
| 2332 | /** | |||
| 2333 | * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings | |||
| 2334 | * | |||
| 2335 | * @adev: amdgpu_device pointer | |||
| 2336 | * @bo_va: bo_va to store the address | |||
| 2337 | * @saddr: where to map the BO | |||
| 2338 | * @offset: requested offset in the BO | |||
| 2339 | * @size: BO size in bytes | |||
| 2340 | * @flags: attributes of pages (read/write/valid/etc.) | |||
| 2341 | * | |||
| 2342 | * Add a mapping of the BO at the specefied addr into the VM. Replace existing | |||
| 2343 | * mappings as we do so. | |||
| 2344 | * | |||
| 2345 | * Returns: | |||
| 2346 | * 0 for success, error for failure. | |||
| 2347 | * | |||
| 2348 | * Object has to be reserved and unreserved outside! | |||
| 2349 | */ | |||
| 2350 | int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, | |||
| 2351 | struct amdgpu_bo_va *bo_va, | |||
| 2352 | uint64_t saddr, uint64_t offset, | |||
| 2353 | uint64_t size, uint64_t flags) | |||
| 2354 | { | |||
| 2355 | struct amdgpu_bo_va_mapping *mapping; | |||
| 2356 | struct amdgpu_bo *bo = bo_va->base.bo; | |||
| 2357 | uint64_t eaddr; | |||
| 2358 | int r; | |||
| 2359 | ||||
| 2360 | /* validate the parameters */ | |||
| 2361 | if (saddr & ~LINUX_PAGE_MASK(~((1 << 12) - 1)) || offset & ~LINUX_PAGE_MASK(~((1 << 12) - 1)) || | |||
| 2362 | size == 0 || size & ~LINUX_PAGE_MASK(~((1 << 12) - 1))) | |||
| 2363 | return -EINVAL22; | |||
| 2364 | ||||
| 2365 | /* make sure object fit at this offset */ | |||
| 2366 | eaddr = saddr + size - 1; | |||
| 2367 | if (saddr >= eaddr || | |||
| 2368 | (bo && offset + size > amdgpu_bo_size(bo)) || | |||
| 2369 | (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT12)) | |||
| 2370 | return -EINVAL22; | |||
| 2371 | ||||
| 2372 | /* Allocate all the needed memory */ | |||
| 2373 | mapping = kmalloc(sizeof(*mapping), GFP_KERNEL(0x0001 | 0x0004)); | |||
| 2374 | if (!mapping) | |||
| 2375 | return -ENOMEM12; | |||
| 2376 | ||||
| 2377 | r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); | |||
| 2378 | if (r) { | |||
| 2379 | kfree(mapping); | |||
| 2380 | return r; | |||
| 2381 | } | |||
| 2382 | ||||
| 2383 | saddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2384 | eaddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2385 | ||||
| 2386 | mapping->start = saddr; | |||
| 2387 | mapping->last = eaddr; | |||
| 2388 | mapping->offset = offset; | |||
| 2389 | mapping->flags = flags; | |||
| 2390 | ||||
| 2391 | amdgpu_vm_bo_insert_map(adev, bo_va, mapping); | |||
| 2392 | ||||
| 2393 | return 0; | |||
| 2394 | } | |||
| 2395 | ||||
| 2396 | /** | |||
| 2397 | * amdgpu_vm_bo_unmap - remove bo mapping from vm | |||
| 2398 | * | |||
| 2399 | * @adev: amdgpu_device pointer | |||
| 2400 | * @bo_va: bo_va to remove the address from | |||
| 2401 | * @saddr: where to the BO is mapped | |||
| 2402 | * | |||
| 2403 | * Remove a mapping of the BO at the specefied addr from the VM. | |||
| 2404 | * | |||
| 2405 | * Returns: | |||
| 2406 | * 0 for success, error for failure. | |||
| 2407 | * | |||
| 2408 | * Object has to be reserved and unreserved outside! | |||
| 2409 | */ | |||
| 2410 | int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, | |||
| 2411 | struct amdgpu_bo_va *bo_va, | |||
| 2412 | uint64_t saddr) | |||
| 2413 | { | |||
| 2414 | struct amdgpu_bo_va_mapping *mapping; | |||
| 2415 | struct amdgpu_vm *vm = bo_va->base.vm; | |||
| 2416 | bool_Bool valid = true1; | |||
| 2417 | ||||
| 2418 | saddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2419 | ||||
| 2420 | list_for_each_entry(mapping, &bo_va->valids, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&bo_va->valids)->next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}); &mapping->list != (&bo_va-> valids); mapping = ({ const __typeof( ((__typeof(*mapping) *) 0)->list ) *__mptr = (mapping->list.next); (__typeof(*mapping ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*mapping), list) );})) { | |||
| 2421 | if (mapping->start == saddr) | |||
| 2422 | break; | |||
| 2423 | } | |||
| 2424 | ||||
| 2425 | if (&mapping->list == &bo_va->valids) { | |||
| 2426 | valid = false0; | |||
| 2427 | ||||
| 2428 | list_for_each_entry(mapping, &bo_va->invalids, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&bo_va->invalids)->next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}); &mapping->list != (&bo_va-> invalids); mapping = ({ const __typeof( ((__typeof(*mapping) * )0)->list ) *__mptr = (mapping->list.next); (__typeof(* mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(*mapping ), list) );})) { | |||
| 2429 | if (mapping->start == saddr) | |||
| 2430 | break; | |||
| 2431 | } | |||
| 2432 | ||||
| 2433 | if (&mapping->list == &bo_va->invalids) | |||
| 2434 | return -ENOENT2; | |||
| 2435 | } | |||
| 2436 | ||||
| 2437 | list_del(&mapping->list); | |||
| 2438 | amdgpu_vm_it_remove(mapping, &vm->va); | |||
| 2439 | mapping->bo_va = NULL((void *)0); | |||
| 2440 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | |||
| 2441 | ||||
| 2442 | if (valid) | |||
| 2443 | list_add(&mapping->list, &vm->freed); | |||
| 2444 | else | |||
| 2445 | amdgpu_vm_free_mapping(adev, vm, mapping, | |||
| 2446 | bo_va->last_pt_update); | |||
| 2447 | ||||
| 2448 | return 0; | |||
| 2449 | } | |||
| 2450 | ||||
| 2451 | /** | |||
| 2452 | * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range | |||
| 2453 | * | |||
| 2454 | * @adev: amdgpu_device pointer | |||
| 2455 | * @vm: VM structure to use | |||
| 2456 | * @saddr: start of the range | |||
| 2457 | * @size: size of the range | |||
| 2458 | * | |||
| 2459 | * Remove all mappings in a range, split them as appropriate. | |||
| 2460 | * | |||
| 2461 | * Returns: | |||
| 2462 | * 0 for success, error for failure. | |||
| 2463 | */ | |||
| 2464 | int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, | |||
| 2465 | struct amdgpu_vm *vm, | |||
| 2466 | uint64_t saddr, uint64_t size) | |||
| 2467 | { | |||
| 2468 | struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; | |||
| 2469 | DRM_LIST_HEAD(removed)struct list_head removed = { &(removed), &(removed) }; | |||
| 2470 | uint64_t eaddr; | |||
| 2471 | ||||
| 2472 | eaddr = saddr + size - 1; | |||
| 2473 | saddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2474 | eaddr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 2475 | ||||
| 2476 | /* Allocate all the needed memory */ | |||
| 2477 | before = kzalloc(sizeof(*before), GFP_KERNEL(0x0001 | 0x0004)); | |||
| 2478 | if (!before) | |||
| 2479 | return -ENOMEM12; | |||
| 2480 | INIT_LIST_HEAD(&before->list); | |||
| 2481 | ||||
| 2482 | after = kzalloc(sizeof(*after), GFP_KERNEL(0x0001 | 0x0004)); | |||
| 2483 | if (!after) { | |||
| 2484 | kfree(before); | |||
| 2485 | return -ENOMEM12; | |||
| 2486 | } | |||
| 2487 | INIT_LIST_HEAD(&after->list); | |||
| 2488 | ||||
| 2489 | /* Now gather all removed mappings */ | |||
| 2490 | tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); | |||
| 2491 | while (tmp) { | |||
| 2492 | /* Remember mapping split at the start */ | |||
| 2493 | if (tmp->start < saddr) { | |||
| 2494 | before->start = tmp->start; | |||
| 2495 | before->last = saddr - 1; | |||
| 2496 | before->offset = tmp->offset; | |||
| 2497 | before->flags = tmp->flags; | |||
| 2498 | before->bo_va = tmp->bo_va; | |||
| 2499 | list_add(&before->list, &tmp->bo_va->invalids); | |||
| 2500 | } | |||
| 2501 | ||||
| 2502 | /* Remember mapping split at the end */ | |||
| 2503 | if (tmp->last > eaddr) { | |||
| 2504 | after->start = eaddr + 1; | |||
| 2505 | after->last = tmp->last; | |||
| 2506 | after->offset = tmp->offset; | |||
| 2507 | after->offset += (after->start - tmp->start) << PAGE_SHIFT12; | |||
| 2508 | after->flags = tmp->flags; | |||
| 2509 | after->bo_va = tmp->bo_va; | |||
| 2510 | list_add(&after->list, &tmp->bo_va->invalids); | |||
| 2511 | } | |||
| 2512 | ||||
| 2513 | list_del(&tmp->list); | |||
| 2514 | list_add(&tmp->list, &removed); | |||
| 2515 | ||||
| 2516 | tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); | |||
| 2517 | } | |||
| 2518 | ||||
| 2519 | /* And free them up */ | |||
| 2520 | list_for_each_entry_safe(tmp, next, &removed, list)for (tmp = ({ const __typeof( ((__typeof(*tmp) *)0)->list ) *__mptr = ((&removed)->next); (__typeof(*tmp) *)( (char *)__mptr - __builtin_offsetof(__typeof(*tmp), list) );}), next = ({ const __typeof( ((__typeof(*tmp) *)0)->list ) *__mptr = (tmp->list.next); (__typeof(*tmp) *)( (char *)__mptr - __builtin_offsetof (__typeof(*tmp), list) );}); &tmp->list != (&removed ); tmp = next, next = ({ const __typeof( ((__typeof(*next) *) 0)->list ) *__mptr = (next->list.next); (__typeof(*next ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*next), list ) );})) { | |||
| 2521 | amdgpu_vm_it_remove(tmp, &vm->va); | |||
| 2522 | list_del(&tmp->list); | |||
| 2523 | ||||
| 2524 | if (tmp->start < saddr) | |||
| 2525 | tmp->start = saddr; | |||
| 2526 | if (tmp->last > eaddr) | |||
| 2527 | tmp->last = eaddr; | |||
| 2528 | ||||
| 2529 | tmp->bo_va = NULL((void *)0); | |||
| 2530 | list_add(&tmp->list, &vm->freed); | |||
| 2531 | trace_amdgpu_vm_bo_unmap(NULL((void *)0), tmp); | |||
| 2532 | } | |||
| 2533 | ||||
| 2534 | /* Insert partial mapping before the range */ | |||
| 2535 | if (!list_empty(&before->list)) { | |||
| 2536 | amdgpu_vm_it_insert(before, &vm->va); | |||
| 2537 | if (before->flags & AMDGPU_PTE_PRT(1ULL << 51)) | |||
| 2538 | amdgpu_vm_prt_get(adev); | |||
| 2539 | } else { | |||
| 2540 | kfree(before); | |||
| 2541 | } | |||
| 2542 | ||||
| 2543 | /* Insert partial mapping after the range */ | |||
| 2544 | if (!list_empty(&after->list)) { | |||
| 2545 | amdgpu_vm_it_insert(after, &vm->va); | |||
| 2546 | if (after->flags & AMDGPU_PTE_PRT(1ULL << 51)) | |||
| 2547 | amdgpu_vm_prt_get(adev); | |||
| 2548 | } else { | |||
| 2549 | kfree(after); | |||
| 2550 | } | |||
| 2551 | ||||
| 2552 | return 0; | |||
| 2553 | } | |||
| 2554 | ||||
| 2555 | /** | |||
| 2556 | * amdgpu_vm_bo_lookup_mapping - find mapping by address | |||
| 2557 | * | |||
| 2558 | * @vm: the requested VM | |||
| 2559 | * @addr: the address | |||
| 2560 | * | |||
| 2561 | * Find a mapping by it's address. | |||
| 2562 | * | |||
| 2563 | * Returns: | |||
| 2564 | * The amdgpu_bo_va_mapping matching for addr or NULL | |||
| 2565 | * | |||
| 2566 | */ | |||
| 2567 | struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, | |||
| 2568 | uint64_t addr) | |||
| 2569 | { | |||
| 2570 | return amdgpu_vm_it_iter_first(&vm->va, addr, addr); | |||
| 2571 | } | |||
| 2572 | ||||
| 2573 | /** | |||
| 2574 | * amdgpu_vm_bo_trace_cs - trace all reserved mappings | |||
| 2575 | * | |||
| 2576 | * @vm: the requested vm | |||
| 2577 | * @ticket: CS ticket | |||
| 2578 | * | |||
| 2579 | * Trace all mappings of BOs reserved during a command submission. | |||
| 2580 | */ | |||
| 2581 | void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) | |||
| 2582 | { | |||
| 2583 | struct amdgpu_bo_va_mapping *mapping; | |||
| 2584 | ||||
| 2585 | if (!trace_amdgpu_vm_bo_cs_enabled()) | |||
| 2586 | return; | |||
| 2587 | ||||
| 2588 | for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX0xffffffffffffffffULL); mapping; | |||
| 2589 | mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX0xffffffffffffffffULL)) { | |||
| 2590 | if (mapping->bo_va && mapping->bo_va->base.bo) { | |||
| 2591 | struct amdgpu_bo *bo; | |||
| 2592 | ||||
| 2593 | bo = mapping->bo_va->base.bo; | |||
| 2594 | if (dma_resv_locking_ctx(bo->tbo.base.resv) != | |||
| 2595 | ticket) | |||
| 2596 | continue; | |||
| 2597 | } | |||
| 2598 | ||||
| 2599 | trace_amdgpu_vm_bo_cs(mapping); | |||
| 2600 | } | |||
| 2601 | } | |||
| 2602 | ||||
| 2603 | /** | |||
| 2604 | * amdgpu_vm_bo_rmv - remove a bo to a specific vm | |||
| 2605 | * | |||
| 2606 | * @adev: amdgpu_device pointer | |||
| 2607 | * @bo_va: requested bo_va | |||
| 2608 | * | |||
| 2609 | * Remove @bo_va->bo from the requested vm. | |||
| 2610 | * | |||
| 2611 | * Object have to be reserved! | |||
| 2612 | */ | |||
| 2613 | void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, | |||
| 2614 | struct amdgpu_bo_va *bo_va) | |||
| 2615 | { | |||
| 2616 | struct amdgpu_bo_va_mapping *mapping, *next; | |||
| 2617 | struct amdgpu_bo *bo = bo_va->base.bo; | |||
| 2618 | struct amdgpu_vm *vm = bo_va->base.vm; | |||
| 2619 | struct amdgpu_vm_bo_base **base; | |||
| 2620 | ||||
| 2621 | if (bo) { | |||
| 2622 | if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) | |||
| 2623 | vm->bulk_moveable = false0; | |||
| 2624 | ||||
| 2625 | for (base = &bo_va->base.bo->vm_bo; *base; | |||
| 2626 | base = &(*base)->next) { | |||
| 2627 | if (*base != &bo_va->base) | |||
| 2628 | continue; | |||
| 2629 | ||||
| 2630 | *base = bo_va->base.next; | |||
| 2631 | break; | |||
| 2632 | } | |||
| 2633 | } | |||
| 2634 | ||||
| 2635 | spin_lock(&vm->invalidated_lock)mtx_enter(&vm->invalidated_lock); | |||
| 2636 | list_del(&bo_va->base.vm_status); | |||
| 2637 | spin_unlock(&vm->invalidated_lock)mtx_leave(&vm->invalidated_lock); | |||
| 2638 | ||||
| 2639 | list_for_each_entry_safe(mapping, next, &bo_va->valids, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&bo_va->valids)->next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}), next = ({ const __typeof( ((__typeof(*mapping ) *)0)->list ) *__mptr = (mapping->list.next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}); &mapping->list != (&bo_va-> valids); mapping = next, next = ({ const __typeof( ((__typeof (*next) *)0)->list ) *__mptr = (next->list.next); (__typeof (*next) *)( (char *)__mptr - __builtin_offsetof(__typeof(*next ), list) );})) { | |||
| 2640 | list_del(&mapping->list); | |||
| 2641 | amdgpu_vm_it_remove(mapping, &vm->va); | |||
| 2642 | mapping->bo_va = NULL((void *)0); | |||
| 2643 | trace_amdgpu_vm_bo_unmap(bo_va, mapping); | |||
| 2644 | list_add(&mapping->list, &vm->freed); | |||
| 2645 | } | |||
| 2646 | list_for_each_entry_safe(mapping, next, &bo_va->invalids, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&bo_va->invalids)->next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}), next = ({ const __typeof( ((__typeof(*mapping ) *)0)->list ) *__mptr = (mapping->list.next); (__typeof (*mapping) *)( (char *)__mptr - __builtin_offsetof(__typeof(* mapping), list) );}); &mapping->list != (&bo_va-> invalids); mapping = next, next = ({ const __typeof( ((__typeof (*next) *)0)->list ) *__mptr = (next->list.next); (__typeof (*next) *)( (char *)__mptr - __builtin_offsetof(__typeof(*next ), list) );})) { | |||
| 2647 | list_del(&mapping->list); | |||
| 2648 | amdgpu_vm_it_remove(mapping, &vm->va); | |||
| 2649 | amdgpu_vm_free_mapping(adev, vm, mapping, | |||
| 2650 | bo_va->last_pt_update); | |||
| 2651 | } | |||
| 2652 | ||||
| 2653 | dma_fence_put(bo_va->last_pt_update); | |||
| 2654 | ||||
| 2655 | if (bo && bo_va->is_xgmi) | |||
| 2656 | amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN); | |||
| 2657 | ||||
| 2658 | kfree(bo_va); | |||
| 2659 | } | |||
| 2660 | ||||
| 2661 | /** | |||
| 2662 | * amdgpu_vm_evictable - check if we can evict a VM | |||
| 2663 | * | |||
| 2664 | * @bo: A page table of the VM. | |||
| 2665 | * | |||
| 2666 | * Check if it is possible to evict a VM. | |||
| 2667 | */ | |||
| 2668 | bool_Bool amdgpu_vm_evictable(struct amdgpu_bo *bo) | |||
| 2669 | { | |||
| 2670 | struct amdgpu_vm_bo_base *bo_base = bo->vm_bo; | |||
| 2671 | ||||
| 2672 | /* Page tables of a destroyed VM can go away immediately */ | |||
| 2673 | if (!bo_base || !bo_base->vm) | |||
| 2674 | return true1; | |||
| 2675 | ||||
| 2676 | /* Don't evict VM page tables while they are busy */ | |||
| 2677 | if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true1)) | |||
| 2678 | return false0; | |||
| 2679 | ||||
| 2680 | /* Try to block ongoing updates */ | |||
| 2681 | if (!amdgpu_vm_eviction_trylock(bo_base->vm)) | |||
| 2682 | return false0; | |||
| 2683 | ||||
| 2684 | /* Don't evict VM page tables while they are updated */ | |||
| 2685 | if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) { | |||
| 2686 | amdgpu_vm_eviction_unlock(bo_base->vm); | |||
| 2687 | return false0; | |||
| 2688 | } | |||
| 2689 | ||||
| 2690 | bo_base->vm->evicting = true1; | |||
| 2691 | amdgpu_vm_eviction_unlock(bo_base->vm); | |||
| 2692 | return true1; | |||
| 2693 | } | |||
| 2694 | ||||
| 2695 | /** | |||
| 2696 | * amdgpu_vm_bo_invalidate - mark the bo as invalid | |||
| 2697 | * | |||
| 2698 | * @adev: amdgpu_device pointer | |||
| 2699 | * @bo: amdgpu buffer object | |||
| 2700 | * @evicted: is the BO evicted | |||
| 2701 | * | |||
| 2702 | * Mark @bo as invalid. | |||
| 2703 | */ | |||
| 2704 | void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, | |||
| 2705 | struct amdgpu_bo *bo, bool_Bool evicted) | |||
| 2706 | { | |||
| 2707 | struct amdgpu_vm_bo_base *bo_base; | |||
| 2708 | ||||
| 2709 | /* shadow bo doesn't have bo base, its validation needs its parent */ | |||
| 2710 | if (bo->parent && bo->parent->shadow == bo) | |||
| 2711 | bo = bo->parent; | |||
| 2712 | ||||
| 2713 | for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { | |||
| 2714 | struct amdgpu_vm *vm = bo_base->vm; | |||
| 2715 | ||||
| 2716 | if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { | |||
| 2717 | amdgpu_vm_bo_evicted(bo_base); | |||
| 2718 | continue; | |||
| 2719 | } | |||
| 2720 | ||||
| 2721 | if (bo_base->moved) | |||
| 2722 | continue; | |||
| 2723 | bo_base->moved = true1; | |||
| 2724 | ||||
| 2725 | if (bo->tbo.type == ttm_bo_type_kernel) | |||
| 2726 | amdgpu_vm_bo_relocated(bo_base); | |||
| 2727 | else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) | |||
| 2728 | amdgpu_vm_bo_moved(bo_base); | |||
| 2729 | else | |||
| 2730 | amdgpu_vm_bo_invalidated(bo_base); | |||
| 2731 | } | |||
| 2732 | } | |||
| 2733 | ||||
| 2734 | /** | |||
| 2735 | * amdgpu_vm_get_block_size - calculate VM page table size as power of two | |||
| 2736 | * | |||
| 2737 | * @vm_size: VM size | |||
| 2738 | * | |||
| 2739 | * Returns: | |||
| 2740 | * VM page table as power of two | |||
| 2741 | */ | |||
| 2742 | static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) | |||
| 2743 | { | |||
| 2744 | /* Total bits covered by PD + PTs */ | |||
| 2745 | unsigned bits = ilog2(vm_size)((sizeof(vm_size) <= 4) ? (fls(vm_size) - 1) : (flsl(vm_size ) - 1)) + 18; | |||
| 2746 | ||||
| 2747 | /* Make sure the PD is 4K in size up to 8GB address space. | |||
| 2748 | Above that split equal between PD and PTs */ | |||
| 2749 | if (vm_size <= 8) | |||
| 2750 | return (bits - 9); | |||
| 2751 | else | |||
| 2752 | return ((bits + 3) / 2); | |||
| 2753 | } | |||
| 2754 | ||||
| 2755 | /** | |||
| 2756 | * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size | |||
| 2757 | * | |||
| 2758 | * @adev: amdgpu_device pointer | |||
| 2759 | * @min_vm_size: the minimum vm size in GB if it's set auto | |||
| 2760 | * @fragment_size_default: Default PTE fragment size | |||
| 2761 | * @max_level: max VMPT level | |||
| 2762 | * @max_bits: max address space size in bits | |||
| 2763 | * | |||
| 2764 | */ | |||
| 2765 | void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, | |||
| 2766 | uint32_t fragment_size_default, unsigned max_level, | |||
| 2767 | unsigned max_bits) | |||
| 2768 | { | |||
| 2769 | unsigned int max_size = 1 << (max_bits - 30); | |||
| 2770 | unsigned int vm_size; | |||
| 2771 | uint64_t tmp; | |||
| 2772 | ||||
| 2773 | /* adjust vm size first */ | |||
| 2774 | if (amdgpu_vm_size != -1) { | |||
| 2775 | vm_size = amdgpu_vm_size; | |||
| 2776 | if (vm_size > max_size) { | |||
| 2777 | dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n",printf("drm:pid%d:%s *WARNING* " "VM size (%d) too large, max is %u GB\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_vm_size , max_size) | |||
| 2778 | amdgpu_vm_size, max_size)printf("drm:pid%d:%s *WARNING* " "VM size (%d) too large, max is %u GB\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , amdgpu_vm_size , max_size); | |||
| 2779 | vm_size = max_size; | |||
| 2780 | } | |||
| 2781 | } else { | |||
| 2782 | #ifdef __linux__ | |||
| 2783 | struct sysinfo si; | |||
| 2784 | #endif | |||
| 2785 | unsigned int phys_ram_gb; | |||
| 2786 | ||||
| 2787 | /* Optimal VM size depends on the amount of physical | |||
| 2788 | * RAM available. Underlying requirements and | |||
| 2789 | * assumptions: | |||
| 2790 | * | |||
| 2791 | * - Need to map system memory and VRAM from all GPUs | |||
| 2792 | * - VRAM from other GPUs not known here | |||
| 2793 | * - Assume VRAM <= system memory | |||
| 2794 | * - On GFX8 and older, VM space can be segmented for | |||
| 2795 | * different MTYPEs | |||
| 2796 | * - Need to allow room for fragmentation, guard pages etc. | |||
| 2797 | * | |||
| 2798 | * This adds up to a rough guess of system memory x3. | |||
| 2799 | * Round up to power of two to maximize the available | |||
| 2800 | * VM size with the given page table size. | |||
| 2801 | */ | |||
| 2802 | #ifdef __linux__ | |||
| 2803 | si_meminfo(&si); | |||
| 2804 | phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + | |||
| 2805 | (1 << 30) - 1) >> 30; | |||
| 2806 | #else | |||
| 2807 | phys_ram_gb = ((uint64_t)ptoa(physmem)((paddr_t)(physmem) << 12) + | |||
| 2808 | (1 << 30) - 1) >> 30; | |||
| 2809 | #endif | |||
| 2810 | vm_size = roundup_pow_of_two( | |||
| 2811 | min(max(phys_ram_gb * 3, min_vm_size), max_size)((((((phys_ram_gb * 3)>(min_vm_size))?(phys_ram_gb * 3):(min_vm_size )))<(max_size))?((((phys_ram_gb * 3)>(min_vm_size))?(phys_ram_gb * 3):(min_vm_size))):(max_size))); | |||
| 2812 | } | |||
| 2813 | ||||
| 2814 | adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; | |||
| 2815 | ||||
| 2816 | tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); | |||
| 2817 | if (amdgpu_vm_block_size != -1) | |||
| 2818 | tmp >>= amdgpu_vm_block_size - 9; | |||
| 2819 | tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9)(((fls64(tmp) - 1) + ((9) - 1)) / (9)) - 1; | |||
| 2820 | adev->vm_manager.num_level = min(max_level, (unsigned)tmp)(((max_level)<((unsigned)tmp))?(max_level):((unsigned)tmp) ); | |||
| 2821 | switch (adev->vm_manager.num_level) { | |||
| 2822 | case 3: | |||
| 2823 | adev->vm_manager.root_level = AMDGPU_VM_PDB2; | |||
| 2824 | break; | |||
| 2825 | case 2: | |||
| 2826 | adev->vm_manager.root_level = AMDGPU_VM_PDB1; | |||
| 2827 | break; | |||
| 2828 | case 1: | |||
| 2829 | adev->vm_manager.root_level = AMDGPU_VM_PDB0; | |||
| 2830 | break; | |||
| 2831 | default: | |||
| 2832 | dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n")printf("drm:pid%d:%s *ERROR* " "VMPT only supports 2~4+1 levels\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); | |||
| 2833 | } | |||
| 2834 | /* block size depends on vm size and hw setup*/ | |||
| 2835 | if (amdgpu_vm_block_size != -1) | |||
| 2836 | adev->vm_manager.block_size = | |||
| 2837 | min((unsigned)amdgpu_vm_block_size, max_bits((((unsigned)amdgpu_vm_block_size)<(max_bits - 12 - 9 * adev ->vm_manager.num_level))?((unsigned)amdgpu_vm_block_size): (max_bits - 12 - 9 * adev->vm_manager.num_level)) | |||
| 2838 | - AMDGPU_GPU_PAGE_SHIFT((((unsigned)amdgpu_vm_block_size)<(max_bits - 12 - 9 * adev ->vm_manager.num_level))?((unsigned)amdgpu_vm_block_size): (max_bits - 12 - 9 * adev->vm_manager.num_level)) | |||
| 2839 | - 9 * adev->vm_manager.num_level)((((unsigned)amdgpu_vm_block_size)<(max_bits - 12 - 9 * adev ->vm_manager.num_level))?((unsigned)amdgpu_vm_block_size): (max_bits - 12 - 9 * adev->vm_manager.num_level)); | |||
| 2840 | else if (adev->vm_manager.num_level > 1) | |||
| 2841 | adev->vm_manager.block_size = 9; | |||
| 2842 | else | |||
| 2843 | adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); | |||
| 2844 | ||||
| 2845 | if (amdgpu_vm_fragment_size == -1) | |||
| 2846 | adev->vm_manager.fragment_size = fragment_size_default; | |||
| 2847 | else | |||
| 2848 | adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; | |||
| 2849 | ||||
| 2850 | DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n",printk("\0016" "[" "drm" "] " "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n" , vm_size, adev->vm_manager.num_level + 1, adev->vm_manager .block_size, adev->vm_manager.fragment_size) | |||
| 2851 | vm_size, adev->vm_manager.num_level + 1,printk("\0016" "[" "drm" "] " "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n" , vm_size, adev->vm_manager.num_level + 1, adev->vm_manager .block_size, adev->vm_manager.fragment_size) | |||
| 2852 | adev->vm_manager.block_size,printk("\0016" "[" "drm" "] " "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n" , vm_size, adev->vm_manager.num_level + 1, adev->vm_manager .block_size, adev->vm_manager.fragment_size) | |||
| 2853 | adev->vm_manager.fragment_size)printk("\0016" "[" "drm" "] " "vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n" , vm_size, adev->vm_manager.num_level + 1, adev->vm_manager .block_size, adev->vm_manager.fragment_size); | |||
| 2854 | } | |||
| 2855 | ||||
| 2856 | /** | |||
| 2857 | * amdgpu_vm_wait_idle - wait for the VM to become idle | |||
| 2858 | * | |||
| 2859 | * @vm: VM object to wait for | |||
| 2860 | * @timeout: timeout to wait for VM to become idle | |||
| 2861 | */ | |||
| 2862 | long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) | |||
| 2863 | { | |||
| 2864 | timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, | |||
| 2865 | true1, true1, timeout); | |||
| 2866 | if (timeout <= 0) | |||
| 2867 | return timeout; | |||
| 2868 | ||||
| 2869 | return dma_fence_wait_timeout(vm->last_unlocked, true1, timeout); | |||
| 2870 | } | |||
| 2871 | ||||
| 2872 | /** | |||
| 2873 | * amdgpu_vm_init - initialize a vm instance | |||
| 2874 | * | |||
| 2875 | * @adev: amdgpu_device pointer | |||
| 2876 | * @vm: requested vm | |||
| 2877 | * @vm_context: Indicates if it GFX or Compute context | |||
| 2878 | * @pasid: Process address space identifier | |||
| 2879 | * | |||
| 2880 | * Init @vm fields. | |||
| 2881 | * | |||
| 2882 | * Returns: | |||
| 2883 | * 0 for success, error for failure. | |||
| 2884 | */ | |||
| 2885 | int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 2886 | int vm_context, u32 pasid) | |||
| 2887 | { | |||
| 2888 | struct amdgpu_bo_param bp; | |||
| 2889 | struct amdgpu_bo *root; | |||
| 2890 | int r, i; | |||
| 2891 | ||||
| 2892 | vm->va = RB_ROOT_CACHED(struct rb_root_cached) { ((void *)0) }; | |||
| 2893 | for (i = 0; i < AMDGPU_MAX_VMHUBS3; i++) | |||
| 2894 | vm->reserved_vmid[i] = NULL((void *)0); | |||
| 2895 | INIT_LIST_HEAD(&vm->evicted); | |||
| 2896 | INIT_LIST_HEAD(&vm->relocated); | |||
| 2897 | INIT_LIST_HEAD(&vm->moved); | |||
| 2898 | INIT_LIST_HEAD(&vm->idle); | |||
| 2899 | INIT_LIST_HEAD(&vm->invalidated); | |||
| 2900 | mtx_init(&vm->invalidated_lock, IPL_NONE)do { (void)(((void *)0)); (void)(0); __mtx_init((&vm-> invalidated_lock), ((((0x0)) > 0x0 && ((0x0)) < 0x9) ? 0x9 : ((0x0)))); } while (0); | |||
| 2901 | INIT_LIST_HEAD(&vm->freed); | |||
| 2902 | ||||
| 2903 | ||||
| 2904 | /* create scheduler entities for page table updates */ | |||
| 2905 | r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, | |||
| 2906 | adev->vm_manager.vm_pte_scheds, | |||
| 2907 | adev->vm_manager.vm_pte_num_scheds, NULL((void *)0)); | |||
| 2908 | if (r) | |||
| 2909 | return r; | |||
| 2910 | ||||
| 2911 | r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, | |||
| 2912 | adev->vm_manager.vm_pte_scheds, | |||
| 2913 | adev->vm_manager.vm_pte_num_scheds, NULL((void *)0)); | |||
| 2914 | if (r) | |||
| 2915 | goto error_free_immediate; | |||
| 2916 | ||||
| 2917 | vm->pte_support_ats = false0; | |||
| 2918 | vm->is_compute_context = false0; | |||
| 2919 | ||||
| 2920 | if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE1) { | |||
| 2921 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | |||
| 2922 | AMDGPU_VM_USE_CPU_FOR_COMPUTE(1 << 1)); | |||
| 2923 | ||||
| 2924 | if (adev->asic_type == CHIP_RAVEN) | |||
| 2925 | vm->pte_support_ats = true1; | |||
| 2926 | } else { | |||
| 2927 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | |||
| 2928 | AMDGPU_VM_USE_CPU_FOR_GFX(1 << 0)); | |||
| 2929 | } | |||
| 2930 | DRM_DEBUG_DRIVER("VM update mode is %s\n",__drm_dbg(DRM_UT_DRIVER, "VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA") | |||
| 2931 | vm->use_cpu_for_update ? "CPU" : "SDMA")__drm_dbg(DRM_UT_DRIVER, "VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); | |||
| 2932 | WARN_ONCE((vm->use_cpu_for_update &&({ static int __warned; int __ret = !!((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)) ); if (__ret && !__warned) { printf("CPU update of VM recommended only for large BAR system\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); }) | |||
| 2933 | !amdgpu_gmc_vram_full_visible(&adev->gmc)),({ static int __warned; int __ret = !!((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)) ); if (__ret && !__warned) { printf("CPU update of VM recommended only for large BAR system\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); }) | |||
| 2934 | "CPU update of VM recommended only for large BAR system\n")({ static int __warned; int __ret = !!((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)) ); if (__ret && !__warned) { printf("CPU update of VM recommended only for large BAR system\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); }); | |||
| 2935 | ||||
| 2936 | if (vm->use_cpu_for_update) | |||
| 2937 | vm->update_funcs = &amdgpu_vm_cpu_funcs; | |||
| 2938 | else | |||
| 2939 | vm->update_funcs = &amdgpu_vm_sdma_funcs; | |||
| 2940 | vm->last_update = NULL((void *)0); | |||
| 2941 | vm->last_unlocked = dma_fence_get_stub(); | |||
| 2942 | ||||
| 2943 | rw_init(&vm->eviction_lock, "avmev")_rw_init_flags(&vm->eviction_lock, "avmev", 0, ((void * )0)); | |||
| 2944 | vm->evicting = false0; | |||
| 2945 | ||||
| 2946 | amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false0, &bp); | |||
| 2947 | if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE1) | |||
| 2948 | bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW(1 << 4); | |||
| 2949 | r = amdgpu_bo_create(adev, &bp, &root); | |||
| 2950 | if (r) | |||
| 2951 | goto error_free_delayed; | |||
| 2952 | ||||
| 2953 | r = amdgpu_bo_reserve(root, true1); | |||
| 2954 | if (r) | |||
| 2955 | goto error_free_root; | |||
| 2956 | ||||
| 2957 | r = dma_resv_reserve_shared(root->tbo.base.resv, 1); | |||
| 2958 | if (r) | |||
| 2959 | goto error_unreserve; | |||
| 2960 | ||||
| 2961 | amdgpu_vm_bo_base_init(&vm->root.base, vm, root); | |||
| 2962 | ||||
| 2963 | r = amdgpu_vm_clear_bo(adev, vm, root, false0); | |||
| 2964 | if (r) | |||
| 2965 | goto error_unreserve; | |||
| 2966 | ||||
| 2967 | amdgpu_bo_unreserve(vm->root.base.bo); | |||
| 2968 | ||||
| 2969 | if (pasid) { | |||
| 2970 | unsigned long flags; | |||
| 2971 | ||||
| 2972 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 2973 | r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, | |||
| 2974 | GFP_ATOMIC0x0002); | |||
| 2975 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 2976 | if (r < 0) | |||
| 2977 | goto error_free_root; | |||
| 2978 | ||||
| 2979 | vm->pasid = pasid; | |||
| 2980 | } | |||
| 2981 | ||||
| 2982 | #ifdef __linux__ | |||
| 2983 | INIT_KFIFO(vm->faults); | |||
| 2984 | #else | |||
| 2985 | SIMPLEQ_INIT(&vm->faults)do { (&vm->faults)->sqh_first = ((void *)0); (& vm->faults)->sqh_last = &(&vm->faults)->sqh_first ; } while (0); | |||
| 2986 | #endif | |||
| 2987 | ||||
| 2988 | return 0; | |||
| 2989 | ||||
| 2990 | error_unreserve: | |||
| 2991 | amdgpu_bo_unreserve(vm->root.base.bo); | |||
| 2992 | ||||
| 2993 | error_free_root: | |||
| 2994 | amdgpu_bo_unref(&vm->root.base.bo->shadow); | |||
| 2995 | amdgpu_bo_unref(&vm->root.base.bo); | |||
| 2996 | vm->root.base.bo = NULL((void *)0); | |||
| 2997 | ||||
| 2998 | error_free_delayed: | |||
| 2999 | dma_fence_put(vm->last_unlocked); | |||
| 3000 | drm_sched_entity_destroy(&vm->delayed); | |||
| 3001 | ||||
| 3002 | error_free_immediate: | |||
| 3003 | drm_sched_entity_destroy(&vm->immediate); | |||
| 3004 | ||||
| 3005 | return r; | |||
| 3006 | } | |||
| 3007 | ||||
| 3008 | /** | |||
| 3009 | * amdgpu_vm_check_clean_reserved - check if a VM is clean | |||
| 3010 | * | |||
| 3011 | * @adev: amdgpu_device pointer | |||
| 3012 | * @vm: the VM to check | |||
| 3013 | * | |||
| 3014 | * check all entries of the root PD, if any subsequent PDs are allocated, | |||
| 3015 | * it means there are page table creating and filling, and is no a clean | |||
| 3016 | * VM | |||
| 3017 | * | |||
| 3018 | * Returns: | |||
| 3019 | * 0 if this VM is clean | |||
| 3020 | */ | |||
| 3021 | static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, | |||
| 3022 | struct amdgpu_vm *vm) | |||
| 3023 | { | |||
| 3024 | enum amdgpu_vm_level root = adev->vm_manager.root_level; | |||
| 3025 | unsigned int entries = amdgpu_vm_num_entries(adev, root); | |||
| 3026 | unsigned int i = 0; | |||
| 3027 | ||||
| 3028 | if (!(vm->root.entries)) | |||
| 3029 | return 0; | |||
| 3030 | ||||
| 3031 | for (i = 0; i < entries; i++) { | |||
| 3032 | if (vm->root.entries[i].base.bo) | |||
| 3033 | return -EINVAL22; | |||
| 3034 | } | |||
| 3035 | ||||
| 3036 | return 0; | |||
| 3037 | } | |||
| 3038 | ||||
| 3039 | /** | |||
| 3040 | * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM | |||
| 3041 | * | |||
| 3042 | * @adev: amdgpu_device pointer | |||
| 3043 | * @vm: requested vm | |||
| 3044 | * @pasid: pasid to use | |||
| 3045 | * | |||
| 3046 | * This only works on GFX VMs that don't have any BOs added and no | |||
| 3047 | * page tables allocated yet. | |||
| 3048 | * | |||
| 3049 | * Changes the following VM parameters: | |||
| 3050 | * - use_cpu_for_update | |||
| 3051 | * - pte_supports_ats | |||
| 3052 | * - pasid (old PASID is released, because compute manages its own PASIDs) | |||
| 3053 | * | |||
| 3054 | * Reinitializes the page directory to reflect the changed ATS | |||
| 3055 | * setting. | |||
| 3056 | * | |||
| 3057 | * Returns: | |||
| 3058 | * 0 for success, -errno for errors. | |||
| 3059 | */ | |||
| 3060 | int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, | |||
| 3061 | u32 pasid) | |||
| 3062 | { | |||
| 3063 | bool_Bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); | |||
| ||||
| 3064 | int r; | |||
| 3065 | ||||
| 3066 | r = amdgpu_bo_reserve(vm->root.base.bo, true1); | |||
| 3067 | if (r
| |||
| 3068 | return r; | |||
| 3069 | ||||
| 3070 | /* Sanity checks */ | |||
| 3071 | r = amdgpu_vm_check_clean_reserved(adev, vm); | |||
| 3072 | if (r) | |||
| 3073 | goto unreserve_bo; | |||
| 3074 | ||||
| 3075 | if (pasid) { | |||
| 3076 | unsigned long flags; | |||
| 3077 | ||||
| 3078 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3079 | r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, | |||
| 3080 | GFP_ATOMIC0x0002); | |||
| 3081 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3082 | ||||
| 3083 | if (r == -ENOSPC28) | |||
| 3084 | goto unreserve_bo; | |||
| 3085 | r = 0; | |||
| 3086 | } | |||
| 3087 | ||||
| 3088 | /* Check if PD needs to be reinitialized and do it before | |||
| 3089 | * changing any other state, in case it fails. | |||
| 3090 | */ | |||
| 3091 | if (pte_support_ats != vm->pte_support_ats) { | |||
| 3092 | vm->pte_support_ats = pte_support_ats; | |||
| 3093 | r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false0); | |||
| 3094 | if (r) | |||
| 3095 | goto free_idr; | |||
| 3096 | } | |||
| 3097 | ||||
| 3098 | /* Update VM state */ | |||
| 3099 | vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & | |||
| 3100 | AMDGPU_VM_USE_CPU_FOR_COMPUTE(1 << 1)); | |||
| 3101 | DRM_DEBUG_DRIVER("VM update mode is %s\n",__drm_dbg(DRM_UT_DRIVER, "VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA") | |||
| 3102 | vm->use_cpu_for_update ? "CPU" : "SDMA")__drm_dbg(DRM_UT_DRIVER, "VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); | |||
| 3103 | WARN_ONCE((vm->use_cpu_for_update &&({ static int __warned; int __ret = !!((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)) ); if (__ret && !__warned) { printf("CPU update of VM recommended only for large BAR system\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); }) | |||
| 3104 | !amdgpu_gmc_vram_full_visible(&adev->gmc)),({ static int __warned; int __ret = !!((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)) ); if (__ret && !__warned) { printf("CPU update of VM recommended only for large BAR system\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); }) | |||
| 3105 | "CPU update of VM recommended only for large BAR system\n")({ static int __warned; int __ret = !!((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)) ); if (__ret && !__warned) { printf("CPU update of VM recommended only for large BAR system\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); }); | |||
| 3106 | ||||
| 3107 | if (vm->use_cpu_for_update) { | |||
| 3108 | /* Sync with last SDMA update/clear before switching to CPU */ | |||
| 3109 | r = amdgpu_bo_sync_wait(vm->root.base.bo, | |||
| 3110 | AMDGPU_FENCE_OWNER_UNDEFINED((void *)0ul), true1); | |||
| 3111 | if (r) | |||
| 3112 | goto free_idr; | |||
| 3113 | ||||
| 3114 | vm->update_funcs = &amdgpu_vm_cpu_funcs; | |||
| 3115 | } else { | |||
| 3116 | vm->update_funcs = &amdgpu_vm_sdma_funcs; | |||
| 3117 | } | |||
| 3118 | dma_fence_put(vm->last_update); | |||
| 3119 | vm->last_update = NULL((void *)0); | |||
| 3120 | vm->is_compute_context = true1; | |||
| 3121 | ||||
| 3122 | if (vm->pasid) { | |||
| 3123 | unsigned long flags; | |||
| 3124 | ||||
| 3125 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3126 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | |||
| 3127 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3128 | ||||
| 3129 | /* Free the original amdgpu allocated pasid | |||
| 3130 | * Will be replaced with kfd allocated pasid | |||
| 3131 | */ | |||
| 3132 | amdgpu_pasid_free(vm->pasid); | |||
| 3133 | vm->pasid = 0; | |||
| 3134 | } | |||
| 3135 | ||||
| 3136 | /* Free the shadow bo for compute VM */ | |||
| 3137 | amdgpu_bo_unref(&vm->root.base.bo->shadow); | |||
| 3138 | ||||
| 3139 | if (pasid) | |||
| 3140 | vm->pasid = pasid; | |||
| 3141 | ||||
| 3142 | goto unreserve_bo; | |||
| 3143 | ||||
| 3144 | free_idr: | |||
| 3145 | if (pasid) { | |||
| 3146 | unsigned long flags; | |||
| 3147 | ||||
| 3148 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3149 | idr_remove(&adev->vm_manager.pasid_idr, pasid); | |||
| 3150 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3151 | } | |||
| 3152 | unreserve_bo: | |||
| 3153 | amdgpu_bo_unreserve(vm->root.base.bo); | |||
| 3154 | return r; | |||
| 3155 | } | |||
| 3156 | ||||
| 3157 | /** | |||
| 3158 | * amdgpu_vm_release_compute - release a compute vm | |||
| 3159 | * @adev: amdgpu_device pointer | |||
| 3160 | * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute | |||
| 3161 | * | |||
| 3162 | * This is a correspondant of amdgpu_vm_make_compute. It decouples compute | |||
| 3163 | * pasid from vm. Compute should stop use of vm after this call. | |||
| 3164 | */ | |||
| 3165 | void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 3166 | { | |||
| 3167 | if (vm->pasid) { | |||
| 3168 | unsigned long flags; | |||
| 3169 | ||||
| 3170 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3171 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | |||
| 3172 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3173 | } | |||
| 3174 | vm->pasid = 0; | |||
| 3175 | vm->is_compute_context = false0; | |||
| 3176 | } | |||
| 3177 | ||||
| 3178 | /** | |||
| 3179 | * amdgpu_vm_fini - tear down a vm instance | |||
| 3180 | * | |||
| 3181 | * @adev: amdgpu_device pointer | |||
| 3182 | * @vm: requested vm | |||
| 3183 | * | |||
| 3184 | * Tear down @vm. | |||
| 3185 | * Unbind the VM and remove all bos from the vm bo list | |||
| 3186 | */ | |||
| 3187 | void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) | |||
| 3188 | { | |||
| 3189 | struct amdgpu_bo_va_mapping *mapping, *tmp; | |||
| 3190 | bool_Bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; | |||
| 3191 | struct amdgpu_bo *root; | |||
| 3192 | int i; | |||
| 3193 | ||||
| 3194 | amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); | |||
| 3195 | ||||
| 3196 | root = amdgpu_bo_ref(vm->root.base.bo); | |||
| 3197 | amdgpu_bo_reserve(root, true1); | |||
| 3198 | if (vm->pasid) { | |||
| 3199 | unsigned long flags; | |||
| 3200 | ||||
| 3201 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3202 | idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); | |||
| 3203 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3204 | vm->pasid = 0; | |||
| 3205 | } | |||
| 3206 | ||||
| 3207 | dma_fence_wait(vm->last_unlocked, false0); | |||
| 3208 | dma_fence_put(vm->last_unlocked); | |||
| 3209 | ||||
| 3210 | list_for_each_entry_safe(mapping, tmp, &vm->freed, list)for (mapping = ({ const __typeof( ((__typeof(*mapping) *)0)-> list ) *__mptr = ((&vm->freed)->next); (__typeof(*mapping ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*mapping), list) );}), tmp = ({ const __typeof( ((__typeof(*mapping) *) 0)->list ) *__mptr = (mapping->list.next); (__typeof(*mapping ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*mapping), list) );}); &mapping->list != (&vm->freed); mapping = tmp, tmp = ({ const __typeof( ((__typeof(*tmp) *)0)->list ) *__mptr = (tmp->list.next); (__typeof(*tmp) *)( (char * )__mptr - __builtin_offsetof(__typeof(*tmp), list) );})) { | |||
| 3211 | if (mapping->flags & AMDGPU_PTE_PRT(1ULL << 51) && prt_fini_needed) { | |||
| 3212 | amdgpu_vm_prt_fini(adev, vm); | |||
| 3213 | prt_fini_needed = false0; | |||
| 3214 | } | |||
| 3215 | ||||
| 3216 | list_del(&mapping->list); | |||
| 3217 | amdgpu_vm_free_mapping(adev, vm, mapping, NULL((void *)0)); | |||
| 3218 | } | |||
| 3219 | ||||
| 3220 | amdgpu_vm_free_pts(adev, vm, NULL((void *)0)); | |||
| 3221 | amdgpu_bo_unreserve(root); | |||
| 3222 | amdgpu_bo_unref(&root); | |||
| 3223 | WARN_ON(vm->root.base.bo)({ int __ret = !!(vm->root.base.bo); if (__ret) printf("WARNING %s failed at %s:%d\n" , "vm->root.base.bo", "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm.c" , 3223); __builtin_expect(!!(__ret), 0); }); | |||
| 3224 | ||||
| 3225 | drm_sched_entity_destroy(&vm->immediate); | |||
| 3226 | drm_sched_entity_destroy(&vm->delayed); | |||
| 3227 | ||||
| 3228 | if (!RB_EMPTY_ROOT(&vm->va.rb_root)((&vm->va.rb_root)->rb_node == ((void *)0))) { | |||
| 3229 | dev_err(adev->dev, "still active bo inside vm\n")printf("drm:pid%d:%s *ERROR* " "still active bo inside vm\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); | |||
| 3230 | } | |||
| 3231 | rbtree_postorder_for_each_entry_safe(mapping, tmp,for ((mapping) = (__rb_deepest_left((&vm->va.rb_root)-> rb_node) ? ({ const __typeof( ((__typeof(*mapping) *)0)->rb ) *__mptr = (__rb_deepest_left((&vm->va.rb_root)-> rb_node)); (__typeof(*mapping) *)( (char *)__mptr - __builtin_offsetof (__typeof(*mapping), rb) );}) : ((void *)0)); ((mapping) != ( (void *)0)) && ((tmp) = (rb_next_postorder(&mapping ->rb) ? ({ const __typeof( ((typeof(*mapping) *)0)->rb ) *__mptr = (rb_next_postorder(&mapping->rb)); (typeof( *mapping) *)( (char *)__mptr - __builtin_offsetof(typeof(*mapping ), rb) );}) : ((void *)0)), 1); (mapping) = (tmp)) | |||
| 3232 | &vm->va.rb_root, rb)for ((mapping) = (__rb_deepest_left((&vm->va.rb_root)-> rb_node) ? ({ const __typeof( ((__typeof(*mapping) *)0)->rb ) *__mptr = (__rb_deepest_left((&vm->va.rb_root)-> rb_node)); (__typeof(*mapping) *)( (char *)__mptr - __builtin_offsetof (__typeof(*mapping), rb) );}) : ((void *)0)); ((mapping) != ( (void *)0)) && ((tmp) = (rb_next_postorder(&mapping ->rb) ? ({ const __typeof( ((typeof(*mapping) *)0)->rb ) *__mptr = (rb_next_postorder(&mapping->rb)); (typeof( *mapping) *)( (char *)__mptr - __builtin_offsetof(typeof(*mapping ), rb) );}) : ((void *)0)), 1); (mapping) = (tmp)) { | |||
| 3233 | /* Don't remove the mapping here, we don't want to trigger a | |||
| 3234 | * rebalance and the tree is about to be destroyed anyway. | |||
| 3235 | */ | |||
| 3236 | list_del(&mapping->list); | |||
| 3237 | kfree(mapping); | |||
| 3238 | } | |||
| 3239 | ||||
| 3240 | dma_fence_put(vm->last_update); | |||
| 3241 | for (i = 0; i < AMDGPU_MAX_VMHUBS3; i++) | |||
| 3242 | amdgpu_vmid_free_reserved(adev, vm, i); | |||
| 3243 | } | |||
| 3244 | ||||
| 3245 | /** | |||
| 3246 | * amdgpu_vm_manager_init - init the VM manager | |||
| 3247 | * | |||
| 3248 | * @adev: amdgpu_device pointer | |||
| 3249 | * | |||
| 3250 | * Initialize the VM manager structures | |||
| 3251 | */ | |||
| 3252 | void amdgpu_vm_manager_init(struct amdgpu_device *adev) | |||
| 3253 | { | |||
| 3254 | unsigned i; | |||
| 3255 | ||||
| 3256 | /* Concurrent flushes are only possible starting with Vega10 and | |||
| 3257 | * are broken on Navi10 and Navi14. | |||
| 3258 | */ | |||
| 3259 | adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 || | |||
| 3260 | adev->asic_type == CHIP_NAVI10 || | |||
| 3261 | adev->asic_type == CHIP_NAVI14); | |||
| 3262 | amdgpu_vmid_mgr_init(adev); | |||
| 3263 | ||||
| 3264 | adev->vm_manager.fence_context = | |||
| 3265 | dma_fence_context_alloc(AMDGPU_MAX_RINGS28); | |||
| 3266 | for (i = 0; i < AMDGPU_MAX_RINGS28; ++i) | |||
| 3267 | adev->vm_manager.seqno[i] = 0; | |||
| 3268 | ||||
| 3269 | mtx_init(&adev->vm_manager.prt_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev-> vm_manager.prt_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); | |||
| 3270 | atomic_set(&adev->vm_manager.num_prt_users, 0)({ typeof(*(&adev->vm_manager.num_prt_users)) __tmp = ( (0)); *(volatile typeof(*(&adev->vm_manager.num_prt_users )) *)&(*(&adev->vm_manager.num_prt_users)) = __tmp ; __tmp; }); | |||
| 3271 | ||||
| 3272 | /* If not overridden by the user, by default, only in large BAR systems | |||
| 3273 | * Compute VM tables will be updated by CPU | |||
| 3274 | */ | |||
| 3275 | #ifdef CONFIG_X86_641 | |||
| 3276 | if (amdgpu_vm_update_mode == -1) { | |||
| 3277 | if (amdgpu_gmc_vram_full_visible(&adev->gmc)) | |||
| 3278 | adev->vm_manager.vm_update_mode = | |||
| 3279 | AMDGPU_VM_USE_CPU_FOR_COMPUTE(1 << 1); | |||
| 3280 | else | |||
| 3281 | adev->vm_manager.vm_update_mode = 0; | |||
| 3282 | } else | |||
| 3283 | adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; | |||
| 3284 | #else | |||
| 3285 | adev->vm_manager.vm_update_mode = 0; | |||
| 3286 | #endif | |||
| 3287 | ||||
| 3288 | idr_init(&adev->vm_manager.pasid_idr); | |||
| 3289 | mtx_init(&adev->vm_manager.pasid_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&adev-> vm_manager.pasid_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); | |||
| 3290 | } | |||
| 3291 | ||||
| 3292 | /** | |||
| 3293 | * amdgpu_vm_manager_fini - cleanup VM manager | |||
| 3294 | * | |||
| 3295 | * @adev: amdgpu_device pointer | |||
| 3296 | * | |||
| 3297 | * Cleanup the VM manager and free resources. | |||
| 3298 | */ | |||
| 3299 | void amdgpu_vm_manager_fini(struct amdgpu_device *adev) | |||
| 3300 | { | |||
| 3301 | WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr))({ int __ret = !!(!idr_is_empty(&adev->vm_manager.pasid_idr )); if (__ret) printf("WARNING %s failed at %s:%d\n", "!idr_is_empty(&adev->vm_manager.pasid_idr)" , "/usr/src/sys/dev/pci/drm/amd/amdgpu/amdgpu_vm.c", 3301); __builtin_expect (!!(__ret), 0); }); | |||
| 3302 | idr_destroy(&adev->vm_manager.pasid_idr); | |||
| 3303 | ||||
| 3304 | amdgpu_vmid_mgr_fini(adev); | |||
| 3305 | } | |||
| 3306 | ||||
| 3307 | /** | |||
| 3308 | * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. | |||
| 3309 | * | |||
| 3310 | * @dev: drm device pointer | |||
| 3311 | * @data: drm_amdgpu_vm | |||
| 3312 | * @filp: drm file pointer | |||
| 3313 | * | |||
| 3314 | * Returns: | |||
| 3315 | * 0 for success, -errno for errors. | |||
| 3316 | */ | |||
| 3317 | int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) | |||
| 3318 | { | |||
| 3319 | union drm_amdgpu_vm *args = data; | |||
| 3320 | struct amdgpu_device *adev = drm_to_adev(dev); | |||
| 3321 | struct amdgpu_fpriv *fpriv = filp->driver_priv; | |||
| 3322 | long timeout = msecs_to_jiffies(2000)(((uint64_t)(2000)) * hz / 1000); | |||
| 3323 | int r; | |||
| 3324 | ||||
| 3325 | switch (args->in.op) { | |||
| 3326 | case AMDGPU_VM_OP_RESERVE_VMID1: | |||
| 3327 | /* We only have requirement to reserve vmid from gfxhub */ | |||
| 3328 | r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, | |||
| 3329 | AMDGPU_GFXHUB_00); | |||
| 3330 | if (r) | |||
| 3331 | return r; | |||
| 3332 | break; | |||
| 3333 | case AMDGPU_VM_OP_UNRESERVE_VMID2: | |||
| 3334 | if (amdgpu_sriov_runtime(adev)((adev)->virt.caps & (1 << 4))) | |||
| 3335 | timeout = 8 * timeout; | |||
| 3336 | ||||
| 3337 | /* Wait vm idle to make sure the vmid set in SPM_VMID is | |||
| 3338 | * not referenced anymore. | |||
| 3339 | */ | |||
| 3340 | r = amdgpu_bo_reserve(fpriv->vm.root.base.bo, true1); | |||
| 3341 | if (r) | |||
| 3342 | return r; | |||
| 3343 | ||||
| 3344 | r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); | |||
| 3345 | if (r < 0) | |||
| 3346 | return r; | |||
| 3347 | ||||
| 3348 | amdgpu_bo_unreserve(fpriv->vm.root.base.bo); | |||
| 3349 | amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_00); | |||
| 3350 | break; | |||
| 3351 | default: | |||
| 3352 | return -EINVAL22; | |||
| 3353 | } | |||
| 3354 | ||||
| 3355 | return 0; | |||
| 3356 | } | |||
| 3357 | ||||
| 3358 | /** | |||
| 3359 | * amdgpu_vm_get_task_info - Extracts task info for a PASID. | |||
| 3360 | * | |||
| 3361 | * @adev: drm device pointer | |||
| 3362 | * @pasid: PASID identifier for VM | |||
| 3363 | * @task_info: task_info to fill. | |||
| 3364 | */ | |||
| 3365 | void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, | |||
| 3366 | struct amdgpu_task_info *task_info) | |||
| 3367 | { | |||
| 3368 | struct amdgpu_vm *vm; | |||
| 3369 | unsigned long flags; | |||
| 3370 | ||||
| 3371 | spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags)do { flags = 0; mtx_enter(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3372 | ||||
| 3373 | vm = idr_find(&adev->vm_manager.pasid_idr, pasid); | |||
| 3374 | if (vm) | |||
| 3375 | *task_info = vm->task_info; | |||
| 3376 | ||||
| 3377 | spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags)do { (void)(flags); mtx_leave(&adev->vm_manager.pasid_lock ); } while (0); | |||
| 3378 | } | |||
| 3379 | ||||
| 3380 | /** | |||
| 3381 | * amdgpu_vm_set_task_info - Sets VMs task info. | |||
| 3382 | * | |||
| 3383 | * @vm: vm for which to set the info | |||
| 3384 | */ | |||
| 3385 | void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) | |||
| 3386 | { | |||
| 3387 | if (vm->task_info.pid) | |||
| 3388 | return; | |||
| 3389 | ||||
| 3390 | #ifdef __linux__ | |||
| 3391 | vm->task_info.pid = current->pid; | |||
| 3392 | get_task_comm(vm->task_info.task_name, current); | |||
| 3393 | ||||
| 3394 | if (current->group_leader->mm != current->mm) | |||
| 3395 | return; | |||
| 3396 | ||||
| 3397 | vm->task_info.tgid = current->group_leader->pid; | |||
| 3398 | get_task_comm(vm->task_info.process_name, current->group_leader); | |||
| 3399 | #else | |||
| 3400 | vm->task_info.pid = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid; | |||
| 3401 | strlcpy(vm->task_info.task_name, curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_comm, | |||
| 3402 | sizeof(vm->task_info.task_name)); | |||
| 3403 | #endif | |||
| 3404 | } | |||
| 3405 | ||||
| 3406 | /** | |||
| 3407 | * amdgpu_vm_handle_fault - graceful handling of VM faults. | |||
| 3408 | * @adev: amdgpu device pointer | |||
| 3409 | * @pasid: PASID of the VM | |||
| 3410 | * @addr: Address of the fault | |||
| 3411 | * | |||
| 3412 | * Try to gracefully handle a VM fault. Return true if the fault was handled and | |||
| 3413 | * shouldn't be reported any more. | |||
| 3414 | */ | |||
| 3415 | bool_Bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, | |||
| 3416 | uint64_t addr) | |||
| 3417 | { | |||
| 3418 | struct amdgpu_bo *root; | |||
| 3419 | uint64_t value, flags; | |||
| 3420 | struct amdgpu_vm *vm; | |||
| 3421 | long r; | |||
| 3422 | ||||
| 3423 | spin_lock(&adev->vm_manager.pasid_lock)mtx_enter(&adev->vm_manager.pasid_lock); | |||
| 3424 | vm = idr_find(&adev->vm_manager.pasid_idr, pasid); | |||
| 3425 | if (vm) | |||
| 3426 | root = amdgpu_bo_ref(vm->root.base.bo); | |||
| 3427 | else | |||
| 3428 | root = NULL((void *)0); | |||
| 3429 | spin_unlock(&adev->vm_manager.pasid_lock)mtx_leave(&adev->vm_manager.pasid_lock); | |||
| 3430 | ||||
| 3431 | if (!root) | |||
| 3432 | return false0; | |||
| 3433 | ||||
| 3434 | r = amdgpu_bo_reserve(root, true1); | |||
| 3435 | if (r) | |||
| 3436 | goto error_unref; | |||
| 3437 | ||||
| 3438 | /* Double check that the VM still exists */ | |||
| 3439 | spin_lock(&adev->vm_manager.pasid_lock)mtx_enter(&adev->vm_manager.pasid_lock); | |||
| 3440 | vm = idr_find(&adev->vm_manager.pasid_idr, pasid); | |||
| 3441 | if (vm && vm->root.base.bo != root) | |||
| 3442 | vm = NULL((void *)0); | |||
| 3443 | spin_unlock(&adev->vm_manager.pasid_lock)mtx_leave(&adev->vm_manager.pasid_lock); | |||
| 3444 | if (!vm) | |||
| 3445 | goto error_unlock; | |||
| 3446 | ||||
| 3447 | addr /= AMDGPU_GPU_PAGE_SIZE4096; | |||
| 3448 | flags = AMDGPU_PTE_VALID(1ULL << 0) | AMDGPU_PTE_SNOOPED(1ULL << 2) | | |||
| 3449 | AMDGPU_PTE_SYSTEM(1ULL << 1); | |||
| 3450 | ||||
| 3451 | if (vm->is_compute_context) { | |||
| 3452 | /* Intentionally setting invalid PTE flag | |||
| 3453 | * combination to force a no-retry-fault | |||
| 3454 | */ | |||
| 3455 | flags = AMDGPU_PTE_EXECUTABLE(1ULL << 4) | AMDGPU_PDE_PTE(1ULL << 54) | | |||
| 3456 | AMDGPU_PTE_TF(1ULL << 56); | |||
| 3457 | value = 0; | |||
| 3458 | ||||
| 3459 | } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER0) { | |||
| 3460 | /* Redirect the access to the dummy page */ | |||
| 3461 | value = adev->dummy_page_addr; | |||
| 3462 | flags |= AMDGPU_PTE_EXECUTABLE(1ULL << 4) | AMDGPU_PTE_READABLE(1ULL << 5) | | |||
| 3463 | AMDGPU_PTE_WRITEABLE(1ULL << 6); | |||
| 3464 | ||||
| 3465 | } else { | |||
| 3466 | /* Let the hw retry silently on the PTE */ | |||
| 3467 | value = 0; | |||
| 3468 | } | |||
| 3469 | ||||
| 3470 | r = amdgpu_vm_bo_update_mapping(adev, vm, true1, false0, NULL((void *)0), addr, | |||
| 3471 | addr + 1, flags, value, NULL((void *)0), NULL((void *)0)); | |||
| 3472 | if (r) | |||
| 3473 | goto error_unlock; | |||
| 3474 | ||||
| 3475 | r = amdgpu_vm_update_pdes(adev, vm, true1); | |||
| 3476 | ||||
| 3477 | error_unlock: | |||
| 3478 | amdgpu_bo_unreserve(root); | |||
| 3479 | if (r < 0) | |||
| 3480 | DRM_ERROR("Can't handle page fault (%ld)\n", r)__drm_err("Can't handle page fault (%ld)\n", r); | |||
| 3481 | ||||
| 3482 | error_unref: | |||
| 3483 | amdgpu_bo_unref(&root); | |||
| 3484 | ||||
| 3485 | return false0; | |||
| 3486 | } |