File: | dev/pci/drm/radeon/radeon_vm.c |
Warning: | line 474, column 13 Although the value stored to 'last_pfn' is used in the enclosing expression, the value is never actually read from 'last_pfn' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
3 | * Copyright 2008 Red Hat Inc. |
4 | * Copyright 2009 Jerome Glisse. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Dave Airlie |
25 | * Alex Deucher |
26 | * Jerome Glisse |
27 | */ |
28 | |
29 | #include <drm/radeon_drm.h> |
30 | #include "radeon.h" |
31 | #include "radeon_trace.h" |
32 | |
33 | /* |
34 | * GPUVM |
35 | * GPUVM is similar to the legacy gart on older asics, however |
36 | * rather than there being a single global gart table |
37 | * for the entire GPU, there are multiple VM page tables active |
38 | * at any given time. The VM page tables can contain a mix |
39 | * vram pages and system memory pages and system memory pages |
40 | * can be mapped as snooped (cached system pages) or unsnooped |
41 | * (uncached system pages). |
42 | * Each VM has an ID associated with it and there is a page table |
43 | * associated with each VMID. When execting a command buffer, |
44 | * the kernel tells the the ring what VMID to use for that command |
45 | * buffer. VMIDs are allocated dynamically as commands are submitted. |
46 | * The userspace drivers maintain their own address space and the kernel |
47 | * sets up their pages tables accordingly when they submit their |
48 | * command buffers and a VMID is assigned. |
49 | * Cayman/Trinity support up to 8 active VMs at any given time; |
50 | * SI supports 16. |
51 | */ |
52 | |
53 | /** |
54 | * radeon_vm_num_pde - return the number of page directory entries |
55 | * |
56 | * @rdev: radeon_device pointer |
57 | * |
58 | * Calculate the number of page directory entries (cayman+). |
59 | */ |
60 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) |
61 | { |
62 | return rdev->vm_manager.max_pfn >> radeon_vm_block_size; |
63 | } |
64 | |
65 | /** |
66 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
67 | * |
68 | * @rdev: radeon_device pointer |
69 | * |
70 | * Calculate the size of the page directory in bytes (cayman+). |
71 | */ |
72 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
73 | { |
74 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8)(((radeon_vm_num_pdes(rdev) * 8) + (4096 - 1)) & ~(4096 - 1)); |
75 | } |
76 | |
77 | /** |
78 | * radeon_vm_manager_init - init the vm manager |
79 | * |
80 | * @rdev: radeon_device pointer |
81 | * |
82 | * Init the vm manager (cayman+). |
83 | * Returns 0 for success, error for failure. |
84 | */ |
85 | int radeon_vm_manager_init(struct radeon_device *rdev) |
86 | { |
87 | int r; |
88 | |
89 | if (!rdev->vm_manager.enabled) { |
90 | r = radeon_asic_vm_init(rdev)(rdev)->asic->vm.init((rdev)); |
91 | if (r) |
92 | return r; |
93 | |
94 | rdev->vm_manager.enabled = true1; |
95 | } |
96 | return 0; |
97 | } |
98 | |
99 | /** |
100 | * radeon_vm_manager_fini - tear down the vm manager |
101 | * |
102 | * @rdev: radeon_device pointer |
103 | * |
104 | * Tear down the VM manager (cayman+). |
105 | */ |
106 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
107 | { |
108 | int i; |
109 | |
110 | if (!rdev->vm_manager.enabled) |
111 | return; |
112 | |
113 | for (i = 0; i < RADEON_NUM_VM16; ++i) |
114 | radeon_fence_unref(&rdev->vm_manager.active[i]); |
115 | radeon_asic_vm_fini(rdev)(rdev)->asic->vm.fini((rdev)); |
116 | rdev->vm_manager.enabled = false0; |
117 | } |
118 | |
119 | /** |
120 | * radeon_vm_get_bos - add the vm BOs to a validation list |
121 | * |
122 | * @vm: vm providing the BOs |
123 | * @head: head of validation list |
124 | * |
125 | * Add the page directory to the list of BOs to |
126 | * validate for command submission (cayman+). |
127 | */ |
128 | struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, |
129 | struct radeon_vm *vm, |
130 | struct list_head *head) |
131 | { |
132 | struct radeon_bo_list *list; |
133 | unsigned i, idx; |
134 | |
135 | list = kvmalloc_array(vm->max_pde_used + 2, |
136 | sizeof(struct radeon_bo_list), GFP_KERNEL(0x0001 | 0x0004)); |
137 | if (!list) |
138 | return NULL((void *)0); |
139 | |
140 | /* add the vm page table to the list */ |
141 | list[0].robj = vm->page_directory; |
142 | list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM0x4; |
143 | list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM0x4; |
144 | list[0].tv.bo = &vm->page_directory->tbo; |
145 | list[0].tv.num_shared = 1; |
146 | list[0].tiling_flags = 0; |
147 | list_add(&list[0].tv.head, head); |
148 | |
149 | for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
150 | if (!vm->page_tables[i].bo) |
151 | continue; |
152 | |
153 | list[idx].robj = vm->page_tables[i].bo; |
154 | list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM0x4; |
155 | list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM0x4; |
156 | list[idx].tv.bo = &list[idx].robj->tbo; |
157 | list[idx].tv.num_shared = 1; |
158 | list[idx].tiling_flags = 0; |
159 | list_add(&list[idx++].tv.head, head); |
160 | } |
161 | |
162 | return list; |
163 | } |
164 | |
165 | /** |
166 | * radeon_vm_grab_id - allocate the next free VMID |
167 | * |
168 | * @rdev: radeon_device pointer |
169 | * @vm: vm to allocate id for |
170 | * @ring: ring we want to submit job to |
171 | * |
172 | * Allocate an id for the vm (cayman+). |
173 | * Returns the fence we need to sync to (if any). |
174 | * |
175 | * Global and local mutex must be locked! |
176 | */ |
177 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, |
178 | struct radeon_vm *vm, int ring) |
179 | { |
180 | struct radeon_fence *best[RADEON_NUM_RINGS8] = {}; |
181 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
182 | |
183 | unsigned choices[2] = {}; |
184 | unsigned i; |
185 | |
186 | /* check if the id is still valid */ |
187 | if (vm_id->id && vm_id->last_id_use && |
188 | vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) |
189 | return NULL((void *)0); |
190 | |
191 | /* we definitely need to flush */ |
192 | vm_id->pd_gpu_addr = ~0ll; |
193 | |
194 | /* skip over VMID 0, since it is the system VM */ |
195 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
196 | struct radeon_fence *fence = rdev->vm_manager.active[i]; |
197 | |
198 | if (fence == NULL((void *)0)) { |
199 | /* found a free one */ |
200 | vm_id->id = i; |
201 | trace_radeon_vm_grab_id(i, ring); |
202 | return NULL((void *)0); |
203 | } |
204 | |
205 | if (radeon_fence_is_earlier(fence, best[fence->ring])) { |
206 | best[fence->ring] = fence; |
207 | choices[fence->ring == ring ? 0 : 1] = i; |
208 | } |
209 | } |
210 | |
211 | for (i = 0; i < 2; ++i) { |
212 | if (choices[i]) { |
213 | vm_id->id = choices[i]; |
214 | trace_radeon_vm_grab_id(choices[i], ring); |
215 | return rdev->vm_manager.active[choices[i]]; |
216 | } |
217 | } |
218 | |
219 | /* should never happen */ |
220 | BUG()do { panic("BUG at %s:%d", "/usr/src/sys/dev/pci/drm/radeon/radeon_vm.c" , 220); } while (0); |
221 | return NULL((void *)0); |
222 | } |
223 | |
224 | /** |
225 | * radeon_vm_flush - hardware flush the vm |
226 | * |
227 | * @rdev: radeon_device pointer |
228 | * @vm: vm we want to flush |
229 | * @ring: ring to use for flush |
230 | * @updates: last vm update that is waited for |
231 | * |
232 | * Flush the vm (cayman+). |
233 | * |
234 | * Global and local mutex must be locked! |
235 | */ |
236 | void radeon_vm_flush(struct radeon_device *rdev, |
237 | struct radeon_vm *vm, |
238 | int ring, struct radeon_fence *updates) |
239 | { |
240 | uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); |
241 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
242 | |
243 | if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || |
244 | radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { |
245 | |
246 | trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); |
247 | radeon_fence_unref(&vm_id->flushed_updates); |
248 | vm_id->flushed_updates = radeon_fence_ref(updates); |
249 | vm_id->pd_gpu_addr = pd_addr; |
250 | radeon_ring_vm_flush(rdev, &rdev->ring[ring],(rdev)->asic->ring[(&rdev->ring[ring])->idx]-> vm_flush((rdev), (&rdev->ring[ring]), (vm_id->id), ( vm_id->pd_gpu_addr)) |
251 | vm_id->id, vm_id->pd_gpu_addr)(rdev)->asic->ring[(&rdev->ring[ring])->idx]-> vm_flush((rdev), (&rdev->ring[ring]), (vm_id->id), ( vm_id->pd_gpu_addr)); |
252 | |
253 | } |
254 | } |
255 | |
256 | /** |
257 | * radeon_vm_fence - remember fence for vm |
258 | * |
259 | * @rdev: radeon_device pointer |
260 | * @vm: vm we want to fence |
261 | * @fence: fence to remember |
262 | * |
263 | * Fence the vm (cayman+). |
264 | * Set the fence used to protect page table and id. |
265 | * |
266 | * Global and local mutex must be locked! |
267 | */ |
268 | void radeon_vm_fence(struct radeon_device *rdev, |
269 | struct radeon_vm *vm, |
270 | struct radeon_fence *fence) |
271 | { |
272 | unsigned vm_id = vm->ids[fence->ring].id; |
273 | |
274 | radeon_fence_unref(&rdev->vm_manager.active[vm_id]); |
275 | rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); |
276 | |
277 | radeon_fence_unref(&vm->ids[fence->ring].last_id_use); |
278 | vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); |
279 | } |
280 | |
281 | /** |
282 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo |
283 | * |
284 | * @vm: requested vm |
285 | * @bo: requested buffer object |
286 | * |
287 | * Find @bo inside the requested vm (cayman+). |
288 | * Search inside the @bos vm list for the requested vm |
289 | * Returns the found bo_va or NULL if none is found |
290 | * |
291 | * Object has to be reserved! |
292 | */ |
293 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, |
294 | struct radeon_bo *bo) |
295 | { |
296 | struct radeon_bo_va *bo_va; |
297 | |
298 | list_for_each_entry(bo_va, &bo->va, bo_list)for (bo_va = ({ const __typeof( ((__typeof(*bo_va) *)0)->bo_list ) *__mptr = ((&bo->va)->next); (__typeof(*bo_va) * )( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va), bo_list ) );}); &bo_va->bo_list != (&bo->va); bo_va = ( { const __typeof( ((__typeof(*bo_va) *)0)->bo_list ) *__mptr = (bo_va->bo_list.next); (__typeof(*bo_va) *)( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va), bo_list) );})) { |
299 | if (bo_va->vm == vm) |
300 | return bo_va; |
301 | |
302 | } |
303 | return NULL((void *)0); |
304 | } |
305 | |
306 | /** |
307 | * radeon_vm_bo_add - add a bo to a specific vm |
308 | * |
309 | * @rdev: radeon_device pointer |
310 | * @vm: requested vm |
311 | * @bo: radeon buffer object |
312 | * |
313 | * Add @bo into the requested vm (cayman+). |
314 | * Add @bo to the list of bos associated with the vm |
315 | * Returns newly added bo_va or NULL for failure |
316 | * |
317 | * Object has to be reserved! |
318 | */ |
319 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
320 | struct radeon_vm *vm, |
321 | struct radeon_bo *bo) |
322 | { |
323 | struct radeon_bo_va *bo_va; |
324 | |
325 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL(0x0001 | 0x0004)); |
326 | if (bo_va == NULL((void *)0)) |
327 | return NULL((void *)0); |
328 | |
329 | bo_va->vm = vm; |
330 | bo_va->bo = bo; |
331 | bo_va->it.start = 0; |
332 | bo_va->it.last = 0; |
333 | bo_va->flags = 0; |
334 | bo_va->ref_count = 1; |
335 | INIT_LIST_HEAD(&bo_va->bo_list); |
336 | INIT_LIST_HEAD(&bo_va->vm_status); |
337 | |
338 | mutex_lock(&vm->mutex)rw_enter_write(&vm->mutex); |
339 | list_add_tail(&bo_va->bo_list, &bo->va); |
340 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
341 | |
342 | return bo_va; |
343 | } |
344 | |
345 | /** |
346 | * radeon_vm_set_pages - helper to call the right asic function |
347 | * |
348 | * @rdev: radeon_device pointer |
349 | * @ib: indirect buffer to fill with commands |
350 | * @pe: addr of the page entry |
351 | * @addr: dst addr to write into pe |
352 | * @count: number of page entries to update |
353 | * @incr: increase next addr by incr bytes |
354 | * @flags: hw access flags |
355 | * |
356 | * Traces the parameters and calls the right asic functions |
357 | * to setup the page table using the DMA. |
358 | */ |
359 | static void radeon_vm_set_pages(struct radeon_device *rdev, |
360 | struct radeon_ib *ib, |
361 | uint64_t pe, |
362 | uint64_t addr, unsigned count, |
363 | uint32_t incr, uint32_t flags) |
364 | { |
365 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
366 | |
367 | if ((flags & R600_PTE_GART_MASK( (1 << 5) | (1 << 6) | (1 << 1) | (1 << 0) )) == R600_PTE_GART_MASK( (1 << 5) | (1 << 6) | (1 << 1) | (1 << 0) )) { |
368 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; |
369 | radeon_asic_vm_copy_pages(rdev, ib, pe, src, count)((rdev)->asic->vm.copy_pages((rdev), (ib), (pe), (src), (count))); |
370 | |
371 | } else if ((flags & R600_PTE_SYSTEM(1 << 1)) || (count < 3)) { |
372 | radeon_asic_vm_write_pages(rdev, ib, pe, addr,((rdev)->asic->vm.write_pages((rdev), (ib), (pe), (addr ), (count), (incr), (flags))) |
373 | count, incr, flags)((rdev)->asic->vm.write_pages((rdev), (ib), (pe), (addr ), (count), (incr), (flags))); |
374 | |
375 | } else { |
376 | radeon_asic_vm_set_pages(rdev, ib, pe, addr,((rdev)->asic->vm.set_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags))) |
377 | count, incr, flags)((rdev)->asic->vm.set_pages((rdev), (ib), (pe), (addr), (count), (incr), (flags))); |
378 | } |
379 | } |
380 | |
381 | /** |
382 | * radeon_vm_clear_bo - initially clear the page dir/table |
383 | * |
384 | * @rdev: radeon_device pointer |
385 | * @bo: bo to clear |
386 | */ |
387 | static int radeon_vm_clear_bo(struct radeon_device *rdev, |
388 | struct radeon_bo *bo) |
389 | { |
390 | struct ttm_operation_ctx ctx = { true1, false0 }; |
391 | struct radeon_ib ib; |
392 | unsigned entries; |
393 | uint64_t addr; |
394 | int r; |
395 | |
396 | r = radeon_bo_reserve(bo, false0); |
397 | if (r) |
398 | return r; |
399 | |
400 | r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); |
401 | if (r) |
402 | goto error_unreserve; |
403 | |
404 | addr = radeon_bo_gpu_offset(bo); |
405 | entries = radeon_bo_size(bo) / 8; |
406 | |
407 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX3, &ib, NULL((void *)0), 256); |
408 | if (r) |
409 | goto error_unreserve; |
410 | |
411 | ib.length_dw = 0; |
412 | |
413 | radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); |
414 | radeon_asic_vm_pad_ib(rdev, &ib)((rdev)->asic->vm.pad_ib((&ib))); |
415 | WARN_ON(ib.length_dw > 64)({ int __ret = !!(ib.length_dw > 64); if (__ret) printf("WARNING %s failed at %s:%d\n" , "ib.length_dw > 64", "/usr/src/sys/dev/pci/drm/radeon/radeon_vm.c" , 415); __builtin_expect(!!(__ret), 0); }); |
416 | |
417 | r = radeon_ib_schedule(rdev, &ib, NULL((void *)0), false0); |
418 | if (r) |
419 | goto error_free; |
420 | |
421 | ib.fence->is_vm_update = true1; |
422 | radeon_bo_fence(bo, ib.fence, false0); |
423 | |
424 | error_free: |
425 | radeon_ib_free(rdev, &ib); |
426 | |
427 | error_unreserve: |
428 | radeon_bo_unreserve(bo); |
429 | return r; |
430 | } |
431 | |
432 | /** |
433 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm |
434 | * |
435 | * @rdev: radeon_device pointer |
436 | * @bo_va: bo_va to store the address |
437 | * @soffset: requested offset of the buffer in the VM address space |
438 | * @flags: attributes of pages (read/write/valid/etc.) |
439 | * |
440 | * Set offset of @bo_va (cayman+). |
441 | * Validate and set the offset requested within the vm address space. |
442 | * Returns 0 for success, error for failure. |
443 | * |
444 | * Object has to be reserved and gets unreserved by this function! |
445 | */ |
446 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
447 | struct radeon_bo_va *bo_va, |
448 | uint64_t soffset, |
449 | uint32_t flags) |
450 | { |
451 | uint64_t size = radeon_bo_size(bo_va->bo); |
452 | struct radeon_vm *vm = bo_va->vm; |
453 | unsigned last_pfn, pt_idx; |
454 | uint64_t eoffset; |
455 | int r; |
456 | |
457 | if (soffset) { |
458 | /* make sure object fit at this offset */ |
459 | eoffset = soffset + size - 1; |
460 | if (soffset >= eoffset) { |
461 | r = -EINVAL22; |
462 | goto error_unreserve; |
463 | } |
464 | |
465 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE4096; |
466 | if (last_pfn >= rdev->vm_manager.max_pfn) { |
467 | dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n",printf("drm:pid%d:%s *ERROR* " "va above limit (0x%08X >= 0x%08X)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , last_pfn , rdev->vm_manager.max_pfn) |
468 | last_pfn, rdev->vm_manager.max_pfn)printf("drm:pid%d:%s *ERROR* " "va above limit (0x%08X >= 0x%08X)\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , last_pfn , rdev->vm_manager.max_pfn); |
469 | r = -EINVAL22; |
470 | goto error_unreserve; |
471 | } |
472 | |
473 | } else { |
474 | eoffset = last_pfn = 0; |
Although the value stored to 'last_pfn' is used in the enclosing expression, the value is never actually read from 'last_pfn' | |
475 | } |
476 | |
477 | mutex_lock(&vm->mutex)rw_enter_write(&vm->mutex); |
478 | soffset /= RADEON_GPU_PAGE_SIZE4096; |
479 | eoffset /= RADEON_GPU_PAGE_SIZE4096; |
480 | if (soffset || eoffset) { |
481 | struct interval_tree_node *it; |
482 | it = interval_tree_iter_first(&vm->va, soffset, eoffset); |
483 | if (it && it != &bo_va->it) { |
484 | struct radeon_bo_va *tmp; |
485 | tmp = container_of(it, struct radeon_bo_va, it)({ const __typeof( ((struct radeon_bo_va *)0)->it ) *__mptr = (it); (struct radeon_bo_va *)( (char *)__mptr - __builtin_offsetof (struct radeon_bo_va, it) );}); |
486 | /* bo and tmp overlap, invalid offset */ |
487 | dev_err(rdev->dev, "bo %p va 0x%010llx conflict with "printf("drm:pid%d:%s *ERROR* " "bo %p va 0x%010llx conflict with " "(bo %p 0x%010lx 0x%010lx)\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bo_va->bo, soffset, tmp->bo, tmp->it.start , tmp->it.last) |
488 | "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,printf("drm:pid%d:%s *ERROR* " "bo %p va 0x%010llx conflict with " "(bo %p 0x%010lx 0x%010lx)\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bo_va->bo, soffset, tmp->bo, tmp->it.start , tmp->it.last) |
489 | soffset, tmp->bo, tmp->it.start, tmp->it.last)printf("drm:pid%d:%s *ERROR* " "bo %p va 0x%010llx conflict with " "(bo %p 0x%010lx 0x%010lx)\n", ({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid , __func__ , bo_va->bo, soffset, tmp->bo, tmp->it.start , tmp->it.last); |
490 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
491 | r = -EINVAL22; |
492 | goto error_unreserve; |
493 | } |
494 | } |
495 | |
496 | if (bo_va->it.start || bo_va->it.last) { |
497 | /* add a clone of the bo_va to clear the old address */ |
498 | struct radeon_bo_va *tmp; |
499 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL(0x0001 | 0x0004)); |
500 | if (!tmp) { |
501 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
502 | r = -ENOMEM12; |
503 | goto error_unreserve; |
504 | } |
505 | tmp->it.start = bo_va->it.start; |
506 | tmp->it.last = bo_va->it.last; |
507 | tmp->vm = vm; |
508 | tmp->bo = radeon_bo_ref(bo_va->bo); |
509 | |
510 | interval_tree_remove(&bo_va->it, &vm->va); |
511 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
512 | bo_va->it.start = 0; |
513 | bo_va->it.last = 0; |
514 | list_del_init(&bo_va->vm_status); |
515 | list_add(&tmp->vm_status, &vm->freed); |
516 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
517 | } |
518 | |
519 | if (soffset || eoffset) { |
520 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
521 | bo_va->it.start = soffset; |
522 | bo_va->it.last = eoffset; |
523 | list_add(&bo_va->vm_status, &vm->cleared); |
524 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
525 | interval_tree_insert(&bo_va->it, &vm->va); |
526 | } |
527 | |
528 | bo_va->flags = flags; |
529 | |
530 | soffset >>= radeon_vm_block_size; |
531 | eoffset >>= radeon_vm_block_size; |
532 | |
533 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev))((!(eoffset >= radeon_vm_num_pdes(rdev))) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/dev/pci/drm/radeon/radeon_vm.c" , 533, "!(eoffset >= radeon_vm_num_pdes(rdev))")); |
534 | |
535 | if (eoffset > vm->max_pde_used) |
536 | vm->max_pde_used = eoffset; |
537 | |
538 | radeon_bo_unreserve(bo_va->bo); |
539 | |
540 | /* walk over the address space and allocate the page tables */ |
541 | for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { |
542 | struct radeon_bo *pt; |
543 | |
544 | if (vm->page_tables[pt_idx].bo) |
545 | continue; |
546 | |
547 | /* drop mutex to allocate and clear page table */ |
548 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
549 | |
550 | r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT(1 << radeon_vm_block_size) * 8, |
551 | RADEON_GPU_PAGE_SIZE4096, true1, |
552 | RADEON_GEM_DOMAIN_VRAM0x4, 0, |
553 | NULL((void *)0), NULL((void *)0), &pt); |
554 | if (r) |
555 | return r; |
556 | |
557 | r = radeon_vm_clear_bo(rdev, pt); |
558 | if (r) { |
559 | radeon_bo_unref(&pt); |
560 | return r; |
561 | } |
562 | |
563 | /* aquire mutex again */ |
564 | mutex_lock(&vm->mutex)rw_enter_write(&vm->mutex); |
565 | if (vm->page_tables[pt_idx].bo) { |
566 | /* someone else allocated the pt in the meantime */ |
567 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
568 | radeon_bo_unref(&pt); |
569 | mutex_lock(&vm->mutex)rw_enter_write(&vm->mutex); |
570 | continue; |
571 | } |
572 | |
573 | vm->page_tables[pt_idx].addr = 0; |
574 | vm->page_tables[pt_idx].bo = pt; |
575 | } |
576 | |
577 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
578 | return 0; |
579 | |
580 | error_unreserve: |
581 | radeon_bo_unreserve(bo_va->bo); |
582 | return r; |
583 | } |
584 | |
585 | /** |
586 | * radeon_vm_map_gart - get the physical address of a gart page |
587 | * |
588 | * @rdev: radeon_device pointer |
589 | * @addr: the unmapped addr |
590 | * |
591 | * Look up the physical address of the page that the pte resolves |
592 | * to (cayman+). |
593 | * Returns the physical address of the page. |
594 | */ |
595 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
596 | { |
597 | uint64_t result; |
598 | |
599 | /* page table offset */ |
600 | result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT12]; |
601 | result &= ~RADEON_GPU_PAGE_MASK(4096 - 1); |
602 | |
603 | return result; |
604 | } |
605 | |
606 | /** |
607 | * radeon_vm_page_flags - translate page flags to what the hw uses |
608 | * |
609 | * @flags: flags comming from userspace |
610 | * |
611 | * Translate the flags the userspace ABI uses to hw flags. |
612 | */ |
613 | static uint32_t radeon_vm_page_flags(uint32_t flags) |
614 | { |
615 | uint32_t hw_flags = 0; |
616 | |
617 | hw_flags |= (flags & RADEON_VM_PAGE_VALID(1 << 0)) ? R600_PTE_VALID(1 << 0) : 0; |
618 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE(1 << 1)) ? R600_PTE_READABLE(1 << 5) : 0; |
619 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE(1 << 2)) ? R600_PTE_WRITEABLE(1 << 6) : 0; |
620 | if (flags & RADEON_VM_PAGE_SYSTEM(1 << 3)) { |
621 | hw_flags |= R600_PTE_SYSTEM(1 << 1); |
622 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED(1 << 4)) ? R600_PTE_SNOOPED(1 << 2) : 0; |
623 | } |
624 | return hw_flags; |
625 | } |
626 | |
627 | /** |
628 | * radeon_vm_update_pdes - make sure that page directory is valid |
629 | * |
630 | * @rdev: radeon_device pointer |
631 | * @vm: requested vm |
632 | * @start: start of GPU address range |
633 | * @end: end of GPU address range |
634 | * |
635 | * Allocates new page tables if necessary |
636 | * and updates the page directory (cayman+). |
637 | * Returns 0 for success, error for failure. |
638 | * |
639 | * Global and local mutex must be locked! |
640 | */ |
641 | int radeon_vm_update_page_directory(struct radeon_device *rdev, |
642 | struct radeon_vm *vm) |
643 | { |
644 | struct radeon_bo *pd = vm->page_directory; |
645 | uint64_t pd_addr = radeon_bo_gpu_offset(pd); |
646 | uint32_t incr = RADEON_VM_PTE_COUNT(1 << radeon_vm_block_size) * 8; |
647 | uint64_t last_pde = ~0, last_pt = ~0; |
648 | unsigned count = 0, pt_idx, ndw; |
649 | struct radeon_ib ib; |
650 | int r; |
651 | |
652 | /* padding, etc. */ |
653 | ndw = 64; |
654 | |
655 | /* assume the worst case */ |
656 | ndw += vm->max_pde_used * 6; |
657 | |
658 | /* update too big for an IB */ |
659 | if (ndw > 0xfffff) |
660 | return -ENOMEM12; |
661 | |
662 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX3, &ib, NULL((void *)0), ndw * 4); |
663 | if (r) |
664 | return r; |
665 | ib.length_dw = 0; |
666 | |
667 | /* walk over the address space and update the page directory */ |
668 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
669 | struct radeon_bo *bo = vm->page_tables[pt_idx].bo; |
670 | uint64_t pde, pt; |
671 | |
672 | if (bo == NULL((void *)0)) |
673 | continue; |
674 | |
675 | pt = radeon_bo_gpu_offset(bo); |
676 | if (vm->page_tables[pt_idx].addr == pt) |
677 | continue; |
678 | vm->page_tables[pt_idx].addr = pt; |
679 | |
680 | pde = pd_addr + pt_idx * 8; |
681 | if (((last_pde + 8 * count) != pde) || |
682 | ((last_pt + incr * count) != pt)) { |
683 | |
684 | if (count) { |
685 | radeon_vm_set_pages(rdev, &ib, last_pde, |
686 | last_pt, count, incr, |
687 | R600_PTE_VALID(1 << 0)); |
688 | } |
689 | |
690 | count = 1; |
691 | last_pde = pde; |
692 | last_pt = pt; |
693 | } else { |
694 | ++count; |
695 | } |
696 | } |
697 | |
698 | if (count) |
699 | radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, |
700 | incr, R600_PTE_VALID(1 << 0)); |
701 | |
702 | if (ib.length_dw != 0) { |
703 | radeon_asic_vm_pad_ib(rdev, &ib)((rdev)->asic->vm.pad_ib((&ib))); |
704 | |
705 | radeon_sync_resv(rdev, &ib.sync, pd->tbo.base.resv, true1); |
706 | WARN_ON(ib.length_dw > ndw)({ int __ret = !!(ib.length_dw > ndw); if (__ret) printf("WARNING %s failed at %s:%d\n" , "ib.length_dw > ndw", "/usr/src/sys/dev/pci/drm/radeon/radeon_vm.c" , 706); __builtin_expect(!!(__ret), 0); }); |
707 | r = radeon_ib_schedule(rdev, &ib, NULL((void *)0), false0); |
708 | if (r) { |
709 | radeon_ib_free(rdev, &ib); |
710 | return r; |
711 | } |
712 | ib.fence->is_vm_update = true1; |
713 | radeon_bo_fence(pd, ib.fence, false0); |
714 | } |
715 | radeon_ib_free(rdev, &ib); |
716 | |
717 | return 0; |
718 | } |
719 | |
720 | /** |
721 | * radeon_vm_frag_ptes - add fragment information to PTEs |
722 | * |
723 | * @rdev: radeon_device pointer |
724 | * @ib: IB for the update |
725 | * @pe_start: first PTE to handle |
726 | * @pe_end: last PTE to handle |
727 | * @addr: addr those PTEs should point to |
728 | * @flags: hw mapping flags |
729 | * |
730 | * Global and local mutex must be locked! |
731 | */ |
732 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, |
733 | struct radeon_ib *ib, |
734 | uint64_t pe_start, uint64_t pe_end, |
735 | uint64_t addr, uint32_t flags) |
736 | { |
737 | /** |
738 | * The MC L1 TLB supports variable sized pages, based on a fragment |
739 | * field in the PTE. When this field is set to a non-zero value, page |
740 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE |
741 | * flags are considered valid for all PTEs within the fragment range |
742 | * and corresponding mappings are assumed to be physically contiguous. |
743 | * |
744 | * The L1 TLB can store a single PTE for the whole fragment, |
745 | * significantly increasing the space available for translation |
746 | * caching. This leads to large improvements in throughput when the |
747 | * TLB is under pressure. |
748 | * |
749 | * The L2 TLB distributes small and large fragments into two |
750 | * asymmetric partitions. The large fragment cache is significantly |
751 | * larger. Thus, we try to use large fragments wherever possible. |
752 | * Userspace can support this by aligning virtual base address and |
753 | * allocation size to the fragment size. |
754 | */ |
755 | |
756 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ |
757 | uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) || |
758 | (rdev->family == CHIP_ARUBA)) ? |
759 | R600_PTE_FRAG_256KB(6 << 7) : R600_PTE_FRAG_64KB(4 << 7); |
760 | uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) || |
761 | (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80; |
762 | |
763 | uint64_t frag_start = roundup2(pe_start, frag_align)(((pe_start) + ((frag_align) - 1)) & (~((__typeof(pe_start ))(frag_align) - 1))); |
764 | uint64_t frag_end = pe_end & ~(frag_align - 1); |
765 | |
766 | unsigned count; |
767 | |
768 | /* system pages are non continuously */ |
769 | if ((flags & R600_PTE_SYSTEM(1 << 1)) || !(flags & R600_PTE_VALID(1 << 0)) || |
770 | (frag_start >= frag_end)) { |
771 | |
772 | count = (pe_end - pe_start) / 8; |
773 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
774 | RADEON_GPU_PAGE_SIZE4096, flags); |
775 | return; |
776 | } |
777 | |
778 | /* handle the 4K area at the beginning */ |
779 | if (pe_start != frag_start) { |
780 | count = (frag_start - pe_start) / 8; |
781 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
782 | RADEON_GPU_PAGE_SIZE4096, flags); |
783 | addr += RADEON_GPU_PAGE_SIZE4096 * count; |
784 | } |
785 | |
786 | /* handle the area in the middle */ |
787 | count = (frag_end - frag_start) / 8; |
788 | radeon_vm_set_pages(rdev, ib, frag_start, addr, count, |
789 | RADEON_GPU_PAGE_SIZE4096, flags | frag_flags); |
790 | |
791 | /* handle the 4K area at the end */ |
792 | if (frag_end != pe_end) { |
793 | addr += RADEON_GPU_PAGE_SIZE4096 * count; |
794 | count = (pe_end - frag_end) / 8; |
795 | radeon_vm_set_pages(rdev, ib, frag_end, addr, count, |
796 | RADEON_GPU_PAGE_SIZE4096, flags); |
797 | } |
798 | } |
799 | |
800 | /** |
801 | * radeon_vm_update_ptes - make sure that page tables are valid |
802 | * |
803 | * @rdev: radeon_device pointer |
804 | * @vm: requested vm |
805 | * @start: start of GPU address range |
806 | * @end: end of GPU address range |
807 | * @dst: destination address to map to |
808 | * @flags: mapping flags |
809 | * |
810 | * Update the page tables in the range @start - @end (cayman+). |
811 | * |
812 | * Global and local mutex must be locked! |
813 | */ |
814 | static int radeon_vm_update_ptes(struct radeon_device *rdev, |
815 | struct radeon_vm *vm, |
816 | struct radeon_ib *ib, |
817 | uint64_t start, uint64_t end, |
818 | uint64_t dst, uint32_t flags) |
819 | { |
820 | uint64_t mask = RADEON_VM_PTE_COUNT(1 << radeon_vm_block_size) - 1; |
821 | uint64_t last_pte = ~0, last_dst = ~0; |
822 | unsigned count = 0; |
823 | uint64_t addr; |
824 | |
825 | /* walk over the address space and update the page tables */ |
826 | for (addr = start; addr < end; ) { |
827 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
828 | struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
829 | unsigned nptes; |
830 | uint64_t pte; |
831 | int r; |
832 | |
833 | radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true1); |
834 | r = dma_resv_reserve_shared(pt->tbo.base.resv, 1); |
835 | if (r) |
836 | return r; |
837 | |
838 | if ((addr & ~mask) == (end & ~mask)) |
839 | nptes = end - addr; |
840 | else |
841 | nptes = RADEON_VM_PTE_COUNT(1 << radeon_vm_block_size) - (addr & mask); |
842 | |
843 | pte = radeon_bo_gpu_offset(pt); |
844 | pte += (addr & mask) * 8; |
845 | |
846 | if ((last_pte + 8 * count) != pte) { |
847 | |
848 | if (count) { |
849 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
850 | last_pte + 8 * count, |
851 | last_dst, flags); |
852 | } |
853 | |
854 | count = nptes; |
855 | last_pte = pte; |
856 | last_dst = dst; |
857 | } else { |
858 | count += nptes; |
859 | } |
860 | |
861 | addr += nptes; |
862 | dst += nptes * RADEON_GPU_PAGE_SIZE4096; |
863 | } |
864 | |
865 | if (count) { |
866 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
867 | last_pte + 8 * count, |
868 | last_dst, flags); |
869 | } |
870 | |
871 | return 0; |
872 | } |
873 | |
874 | /** |
875 | * radeon_vm_fence_pts - fence page tables after an update |
876 | * |
877 | * @vm: requested vm |
878 | * @start: start of GPU address range |
879 | * @end: end of GPU address range |
880 | * @fence: fence to use |
881 | * |
882 | * Fence the page tables in the range @start - @end (cayman+). |
883 | * |
884 | * Global and local mutex must be locked! |
885 | */ |
886 | static void radeon_vm_fence_pts(struct radeon_vm *vm, |
887 | uint64_t start, uint64_t end, |
888 | struct radeon_fence *fence) |
889 | { |
890 | unsigned i; |
891 | |
892 | start >>= radeon_vm_block_size; |
893 | end = (end - 1) >> radeon_vm_block_size; |
894 | |
895 | for (i = start; i <= end; ++i) |
896 | radeon_bo_fence(vm->page_tables[i].bo, fence, true1); |
897 | } |
898 | |
899 | /** |
900 | * radeon_vm_bo_update - map a bo into the vm page table |
901 | * |
902 | * @rdev: radeon_device pointer |
903 | * @vm: requested vm |
904 | * @bo: radeon buffer object |
905 | * @mem: ttm mem |
906 | * |
907 | * Fill in the page table entries for @bo (cayman+). |
908 | * Returns 0 for success, -EINVAL for failure. |
909 | * |
910 | * Object have to be reserved and mutex must be locked! |
911 | */ |
912 | int radeon_vm_bo_update(struct radeon_device *rdev, |
913 | struct radeon_bo_va *bo_va, |
914 | struct ttm_resource *mem) |
915 | { |
916 | struct radeon_vm *vm = bo_va->vm; |
917 | struct radeon_ib ib; |
918 | unsigned nptes, ncmds, ndw; |
919 | uint64_t addr; |
920 | uint32_t flags; |
921 | int r; |
922 | |
923 | if (!bo_va->it.start) { |
924 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",printf("drm:pid%d:%s *ERROR* " "bo %p don't has a mapping in vm %p\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , bo_va-> bo, vm) |
925 | bo_va->bo, vm)printf("drm:pid%d:%s *ERROR* " "bo %p don't has a mapping in vm %p\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , bo_va-> bo, vm); |
926 | return -EINVAL22; |
927 | } |
928 | |
929 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
930 | if (mem) { |
931 | if (list_empty(&bo_va->vm_status)) { |
932 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
933 | return 0; |
934 | } |
935 | list_del_init(&bo_va->vm_status); |
936 | } else { |
937 | list_del(&bo_va->vm_status); |
938 | list_add(&bo_va->vm_status, &vm->cleared); |
939 | } |
940 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
941 | |
942 | bo_va->flags &= ~RADEON_VM_PAGE_VALID(1 << 0); |
943 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM(1 << 3); |
944 | bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED(1 << 4); |
945 | if (bo_va->bo && radeon_ttm_tt_is_readonly(rdev, bo_va->bo->tbo.ttm)) |
946 | bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE(1 << 2); |
947 | |
948 | if (mem) { |
949 | addr = (u64)mem->start << PAGE_SHIFT12; |
950 | if (mem->mem_type != TTM_PL_SYSTEM0) |
951 | bo_va->flags |= RADEON_VM_PAGE_VALID(1 << 0); |
952 | |
953 | if (mem->mem_type == TTM_PL_TT1) { |
954 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM(1 << 3); |
955 | if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC(1 << 2) | RADEON_GEM_GTT_UC(1 << 1)))) |
956 | bo_va->flags |= RADEON_VM_PAGE_SNOOPED(1 << 4); |
957 | |
958 | } else { |
959 | addr += rdev->vm_manager.vram_base_offset; |
960 | } |
961 | } else { |
962 | addr = 0; |
963 | } |
964 | |
965 | trace_radeon_vm_bo_update(bo_va); |
966 | |
967 | nptes = bo_va->it.last - bo_va->it.start + 1; |
968 | |
969 | /* reserve space for one command every (1 << BLOCK_SIZE) entries |
970 | or 2k dwords (whatever is smaller) */ |
971 | ncmds = (nptes >> min(radeon_vm_block_size, 11)(((radeon_vm_block_size)<(11))?(radeon_vm_block_size):(11) )) + 1; |
972 | |
973 | /* padding, etc. */ |
974 | ndw = 64; |
975 | |
976 | flags = radeon_vm_page_flags(bo_va->flags); |
977 | if ((flags & R600_PTE_GART_MASK( (1 << 5) | (1 << 6) | (1 << 1) | (1 << 0) )) == R600_PTE_GART_MASK( (1 << 5) | (1 << 6) | (1 << 1) | (1 << 0) )) { |
978 | /* only copy commands needed */ |
979 | ndw += ncmds * 7; |
980 | |
981 | } else if (flags & R600_PTE_SYSTEM(1 << 1)) { |
982 | /* header for write data commands */ |
983 | ndw += ncmds * 4; |
984 | |
985 | /* body of write data command */ |
986 | ndw += nptes * 2; |
987 | |
988 | } else { |
989 | /* set page commands needed */ |
990 | ndw += ncmds * 10; |
991 | |
992 | /* two extra commands for begin/end of fragment */ |
993 | ndw += 2 * 10; |
994 | } |
995 | |
996 | /* update too big for an IB */ |
997 | if (ndw > 0xfffff) |
998 | return -ENOMEM12; |
999 | |
1000 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX3, &ib, NULL((void *)0), ndw * 4); |
1001 | if (r) |
1002 | return r; |
1003 | ib.length_dw = 0; |
1004 | |
1005 | if (!(bo_va->flags & RADEON_VM_PAGE_VALID(1 << 0))) { |
1006 | unsigned i; |
1007 | |
1008 | for (i = 0; i < RADEON_NUM_RINGS8; ++i) |
1009 | radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); |
1010 | } |
1011 | |
1012 | r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
1013 | bo_va->it.last + 1, addr, |
1014 | radeon_vm_page_flags(bo_va->flags)); |
1015 | if (r) { |
1016 | radeon_ib_free(rdev, &ib); |
1017 | return r; |
1018 | } |
1019 | |
1020 | radeon_asic_vm_pad_ib(rdev, &ib)((rdev)->asic->vm.pad_ib((&ib))); |
1021 | WARN_ON(ib.length_dw > ndw)({ int __ret = !!(ib.length_dw > ndw); if (__ret) printf("WARNING %s failed at %s:%d\n" , "ib.length_dw > ndw", "/usr/src/sys/dev/pci/drm/radeon/radeon_vm.c" , 1021); __builtin_expect(!!(__ret), 0); }); |
1022 | |
1023 | r = radeon_ib_schedule(rdev, &ib, NULL((void *)0), false0); |
1024 | if (r) { |
1025 | radeon_ib_free(rdev, &ib); |
1026 | return r; |
1027 | } |
1028 | ib.fence->is_vm_update = true1; |
1029 | radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); |
1030 | radeon_fence_unref(&bo_va->last_pt_update); |
1031 | bo_va->last_pt_update = radeon_fence_ref(ib.fence); |
1032 | radeon_ib_free(rdev, &ib); |
1033 | |
1034 | return 0; |
1035 | } |
1036 | |
1037 | /** |
1038 | * radeon_vm_clear_freed - clear freed BOs in the PT |
1039 | * |
1040 | * @rdev: radeon_device pointer |
1041 | * @vm: requested vm |
1042 | * |
1043 | * Make sure all freed BOs are cleared in the PT. |
1044 | * Returns 0 for success. |
1045 | * |
1046 | * PTs have to be reserved and mutex must be locked! |
1047 | */ |
1048 | int radeon_vm_clear_freed(struct radeon_device *rdev, |
1049 | struct radeon_vm *vm) |
1050 | { |
1051 | struct radeon_bo_va *bo_va; |
1052 | int r = 0; |
1053 | |
1054 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
1055 | while (!list_empty(&vm->freed)) { |
1056 | bo_va = list_first_entry(&vm->freed,({ const __typeof( ((struct radeon_bo_va *)0)->vm_status ) *__mptr = ((&vm->freed)->next); (struct radeon_bo_va *)( (char *)__mptr - __builtin_offsetof(struct radeon_bo_va, vm_status) );}) |
1057 | struct radeon_bo_va, vm_status)({ const __typeof( ((struct radeon_bo_va *)0)->vm_status ) *__mptr = ((&vm->freed)->next); (struct radeon_bo_va *)( (char *)__mptr - __builtin_offsetof(struct radeon_bo_va, vm_status) );}); |
1058 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
1059 | |
1060 | r = radeon_vm_bo_update(rdev, bo_va, NULL((void *)0)); |
1061 | radeon_bo_unref(&bo_va->bo); |
1062 | radeon_fence_unref(&bo_va->last_pt_update); |
1063 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
1064 | list_del(&bo_va->vm_status); |
1065 | kfree(bo_va); |
1066 | if (r) |
1067 | break; |
1068 | |
1069 | } |
1070 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
1071 | return r; |
1072 | |
1073 | } |
1074 | |
1075 | /** |
1076 | * radeon_vm_clear_invalids - clear invalidated BOs in the PT |
1077 | * |
1078 | * @rdev: radeon_device pointer |
1079 | * @vm: requested vm |
1080 | * |
1081 | * Make sure all invalidated BOs are cleared in the PT. |
1082 | * Returns 0 for success. |
1083 | * |
1084 | * PTs have to be reserved and mutex must be locked! |
1085 | */ |
1086 | int radeon_vm_clear_invalids(struct radeon_device *rdev, |
1087 | struct radeon_vm *vm) |
1088 | { |
1089 | struct radeon_bo_va *bo_va; |
1090 | int r; |
1091 | |
1092 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
1093 | while (!list_empty(&vm->invalidated)) { |
1094 | bo_va = list_first_entry(&vm->invalidated,({ const __typeof( ((struct radeon_bo_va *)0)->vm_status ) *__mptr = ((&vm->invalidated)->next); (struct radeon_bo_va *)( (char *)__mptr - __builtin_offsetof(struct radeon_bo_va, vm_status) );}) |
1095 | struct radeon_bo_va, vm_status)({ const __typeof( ((struct radeon_bo_va *)0)->vm_status ) *__mptr = ((&vm->invalidated)->next); (struct radeon_bo_va *)( (char *)__mptr - __builtin_offsetof(struct radeon_bo_va, vm_status) );}); |
1096 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
1097 | |
1098 | r = radeon_vm_bo_update(rdev, bo_va, NULL((void *)0)); |
1099 | if (r) |
1100 | return r; |
1101 | |
1102 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
1103 | } |
1104 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
1105 | |
1106 | return 0; |
1107 | } |
1108 | |
1109 | /** |
1110 | * radeon_vm_bo_rmv - remove a bo to a specific vm |
1111 | * |
1112 | * @rdev: radeon_device pointer |
1113 | * @bo_va: requested bo_va |
1114 | * |
1115 | * Remove @bo_va->bo from the requested vm (cayman+). |
1116 | * |
1117 | * Object have to be reserved! |
1118 | */ |
1119 | void radeon_vm_bo_rmv(struct radeon_device *rdev, |
1120 | struct radeon_bo_va *bo_va) |
1121 | { |
1122 | struct radeon_vm *vm = bo_va->vm; |
1123 | |
1124 | list_del(&bo_va->bo_list); |
1125 | |
1126 | mutex_lock(&vm->mutex)rw_enter_write(&vm->mutex); |
1127 | if (bo_va->it.start || bo_va->it.last) |
1128 | interval_tree_remove(&bo_va->it, &vm->va); |
1129 | |
1130 | spin_lock(&vm->status_lock)mtx_enter(&vm->status_lock); |
1131 | list_del(&bo_va->vm_status); |
1132 | if (bo_va->it.start || bo_va->it.last) { |
1133 | bo_va->bo = radeon_bo_ref(bo_va->bo); |
1134 | list_add(&bo_va->vm_status, &vm->freed); |
1135 | } else { |
1136 | radeon_fence_unref(&bo_va->last_pt_update); |
1137 | kfree(bo_va); |
1138 | } |
1139 | spin_unlock(&vm->status_lock)mtx_leave(&vm->status_lock); |
1140 | |
1141 | mutex_unlock(&vm->mutex)rw_exit_write(&vm->mutex); |
1142 | } |
1143 | |
1144 | /** |
1145 | * radeon_vm_bo_invalidate - mark the bo as invalid |
1146 | * |
1147 | * @rdev: radeon_device pointer |
1148 | * @vm: requested vm |
1149 | * @bo: radeon buffer object |
1150 | * |
1151 | * Mark @bo as invalid (cayman+). |
1152 | */ |
1153 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
1154 | struct radeon_bo *bo) |
1155 | { |
1156 | struct radeon_bo_va *bo_va; |
1157 | |
1158 | list_for_each_entry(bo_va, &bo->va, bo_list)for (bo_va = ({ const __typeof( ((__typeof(*bo_va) *)0)->bo_list ) *__mptr = ((&bo->va)->next); (__typeof(*bo_va) * )( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va), bo_list ) );}); &bo_va->bo_list != (&bo->va); bo_va = ( { const __typeof( ((__typeof(*bo_va) *)0)->bo_list ) *__mptr = (bo_va->bo_list.next); (__typeof(*bo_va) *)( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va), bo_list) );})) { |
1159 | spin_lock(&bo_va->vm->status_lock)mtx_enter(&bo_va->vm->status_lock); |
1160 | if (list_empty(&bo_va->vm_status) && |
1161 | (bo_va->it.start || bo_va->it.last)) |
1162 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); |
1163 | spin_unlock(&bo_va->vm->status_lock)mtx_leave(&bo_va->vm->status_lock); |
1164 | } |
1165 | } |
1166 | |
1167 | /** |
1168 | * radeon_vm_init - initialize a vm instance |
1169 | * |
1170 | * @rdev: radeon_device pointer |
1171 | * @vm: requested vm |
1172 | * |
1173 | * Init @vm fields (cayman+). |
1174 | */ |
1175 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
1176 | { |
1177 | const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,(((32768)<((1 << radeon_vm_block_size) * 8))?(32768) :((1 << radeon_vm_block_size) * 8)) |
1178 | RADEON_VM_PTE_COUNT * 8)(((32768)<((1 << radeon_vm_block_size) * 8))?(32768) :((1 << radeon_vm_block_size) * 8)); |
1179 | unsigned pd_size, pd_entries, pts_size; |
1180 | int i, r; |
1181 | |
1182 | vm->ib_bo_va = NULL((void *)0); |
1183 | for (i = 0; i < RADEON_NUM_RINGS8; ++i) { |
1184 | vm->ids[i].id = 0; |
1185 | vm->ids[i].flushed_updates = NULL((void *)0); |
1186 | vm->ids[i].last_id_use = NULL((void *)0); |
1187 | } |
1188 | rw_init(&vm->mutex, "vmlk")_rw_init_flags(&vm->mutex, "vmlk", 0, ((void *)0)); |
1189 | vm->va = RB_ROOT_CACHED(struct rb_root_cached) { ((void *)0) }; |
1190 | mtx_init(&vm->status_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&vm-> status_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); |
1191 | INIT_LIST_HEAD(&vm->invalidated); |
1192 | INIT_LIST_HEAD(&vm->freed); |
1193 | INIT_LIST_HEAD(&vm->cleared); |
1194 | |
1195 | pd_size = radeon_vm_directory_size(rdev); |
1196 | pd_entries = radeon_vm_num_pdes(rdev); |
1197 | |
1198 | /* allocate page table array */ |
1199 | pts_size = pd_entries * sizeof(struct radeon_vm_pt); |
1200 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL(0x0001 | 0x0004)); |
1201 | if (vm->page_tables == NULL((void *)0)) { |
1202 | DRM_ERROR("Cannot allocate memory for page table array\n")__drm_err("Cannot allocate memory for page table array\n"); |
1203 | return -ENOMEM12; |
1204 | } |
1205 | |
1206 | r = radeon_bo_create(rdev, pd_size, align, true1, |
1207 | RADEON_GEM_DOMAIN_VRAM0x4, 0, NULL((void *)0), |
1208 | NULL((void *)0), &vm->page_directory); |
1209 | if (r) |
1210 | return r; |
1211 | |
1212 | r = radeon_vm_clear_bo(rdev, vm->page_directory); |
1213 | if (r) { |
1214 | radeon_bo_unref(&vm->page_directory); |
1215 | vm->page_directory = NULL((void *)0); |
1216 | return r; |
1217 | } |
1218 | |
1219 | return 0; |
1220 | } |
1221 | |
1222 | /** |
1223 | * radeon_vm_fini - tear down a vm instance |
1224 | * |
1225 | * @rdev: radeon_device pointer |
1226 | * @vm: requested vm |
1227 | * |
1228 | * Tear down @vm (cayman+). |
1229 | * Unbind the VM and remove all bos from the vm bo list |
1230 | */ |
1231 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
1232 | { |
1233 | struct radeon_bo_va *bo_va, *tmp; |
1234 | int i, r; |
1235 | |
1236 | if (!RB_EMPTY_ROOT(&vm->va.rb_root)((&vm->va.rb_root)->rb_node == ((void *)0))) |
1237 | dev_err(rdev->dev, "still active bo inside vm\n")printf("drm:pid%d:%s *ERROR* " "still active bo inside vm\n", ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__); |
1238 | |
1239 | rbtree_postorder_for_each_entry_safe(bo_va, tmp,for ((bo_va) = (__rb_deepest_left((&vm->va.rb_root)-> rb_node) ? ({ const __typeof( ((__typeof(*bo_va) *)0)->it. rb ) *__mptr = (__rb_deepest_left((&vm->va.rb_root)-> rb_node)); (__typeof(*bo_va) *)( (char *)__mptr - __builtin_offsetof (__typeof(*bo_va), it.rb) );}) : ((void *)0)); ((bo_va) != (( void *)0)) && ((tmp) = (rb_next_postorder(&bo_va-> it.rb) ? ({ const __typeof( ((typeof(*bo_va) *)0)->it.rb ) *__mptr = (rb_next_postorder(&bo_va->it.rb)); (typeof (*bo_va) *)( (char *)__mptr - __builtin_offsetof(typeof(*bo_va ), it.rb) );}) : ((void *)0)), 1); (bo_va) = (tmp)) |
1240 | &vm->va.rb_root, it.rb)for ((bo_va) = (__rb_deepest_left((&vm->va.rb_root)-> rb_node) ? ({ const __typeof( ((__typeof(*bo_va) *)0)->it. rb ) *__mptr = (__rb_deepest_left((&vm->va.rb_root)-> rb_node)); (__typeof(*bo_va) *)( (char *)__mptr - __builtin_offsetof (__typeof(*bo_va), it.rb) );}) : ((void *)0)); ((bo_va) != (( void *)0)) && ((tmp) = (rb_next_postorder(&bo_va-> it.rb) ? ({ const __typeof( ((typeof(*bo_va) *)0)->it.rb ) *__mptr = (rb_next_postorder(&bo_va->it.rb)); (typeof (*bo_va) *)( (char *)__mptr - __builtin_offsetof(typeof(*bo_va ), it.rb) );}) : ((void *)0)), 1); (bo_va) = (tmp)) { |
1241 | interval_tree_remove(&bo_va->it, &vm->va); |
1242 | r = radeon_bo_reserve(bo_va->bo, false0); |
1243 | if (!r) { |
1244 | list_del_init(&bo_va->bo_list); |
1245 | radeon_bo_unreserve(bo_va->bo); |
1246 | radeon_fence_unref(&bo_va->last_pt_update); |
1247 | kfree(bo_va); |
1248 | } |
1249 | } |
1250 | list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status)for (bo_va = ({ const __typeof( ((__typeof(*bo_va) *)0)->vm_status ) *__mptr = ((&vm->freed)->next); (__typeof(*bo_va ) *)( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va), vm_status ) );}), tmp = ({ const __typeof( ((__typeof(*bo_va) *)0)-> vm_status ) *__mptr = (bo_va->vm_status.next); (__typeof(* bo_va) *)( (char *)__mptr - __builtin_offsetof(__typeof(*bo_va ), vm_status) );}); &bo_va->vm_status != (&vm-> freed); bo_va = tmp, tmp = ({ const __typeof( ((__typeof(*tmp ) *)0)->vm_status ) *__mptr = (tmp->vm_status.next); (__typeof (*tmp) *)( (char *)__mptr - __builtin_offsetof(__typeof(*tmp) , vm_status) );})) { |
1251 | radeon_bo_unref(&bo_va->bo); |
1252 | radeon_fence_unref(&bo_va->last_pt_update); |
1253 | kfree(bo_va); |
1254 | } |
1255 | |
1256 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) |
1257 | radeon_bo_unref(&vm->page_tables[i].bo); |
1258 | kfree(vm->page_tables); |
1259 | |
1260 | radeon_bo_unref(&vm->page_directory); |
1261 | |
1262 | for (i = 0; i < RADEON_NUM_RINGS8; ++i) { |
1263 | radeon_fence_unref(&vm->ids[i].flushed_updates); |
1264 | radeon_fence_unref(&vm->ids[i].last_id_use); |
1265 | } |
1266 | |
1267 | mutex_destroy(&vm->mutex); |
1268 | } |