File: | dev/pci/drm/i915/gem/i915_gem_ttm_move.c |
Warning: | line 331, column 2 1st function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | // SPDX-License-Identifier: MIT | |||
2 | /* | |||
3 | * Copyright © 2021 Intel Corporation | |||
4 | */ | |||
5 | ||||
6 | #include <drm/ttm/ttm_bo_driver.h> | |||
7 | ||||
8 | #include "i915_deps.h" | |||
9 | #include "i915_drv.h" | |||
10 | #include "intel_memory_region.h" | |||
11 | #include "intel_region_ttm.h" | |||
12 | ||||
13 | #include "gem/i915_gem_object.h" | |||
14 | #include "gem/i915_gem_region.h" | |||
15 | #include "gem/i915_gem_ttm.h" | |||
16 | #include "gem/i915_gem_ttm_move.h" | |||
17 | ||||
18 | #include "gt/intel_engine_pm.h" | |||
19 | #include "gt/intel_gt.h" | |||
20 | #include "gt/intel_migrate.h" | |||
21 | ||||
22 | /** | |||
23 | * DOC: Selftest failure modes for failsafe migration: | |||
24 | * | |||
25 | * For fail_gpu_migration, the gpu blit scheduled is always a clear blit | |||
26 | * rather than a copy blit, and then we force the failure paths as if | |||
27 | * the blit fence returned an error. | |||
28 | * | |||
29 | * For fail_work_allocation we fail the kmalloc of the async worker, we | |||
30 | * sync the gpu blit. If it then fails, or fail_gpu_migration is set to | |||
31 | * true, then a memcpy operation is performed sync. | |||
32 | */ | |||
33 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0 | |||
34 | static bool_Bool fail_gpu_migration; | |||
35 | static bool_Bool fail_work_allocation; | |||
36 | static bool_Bool ban_memcpy; | |||
37 | ||||
38 | void i915_ttm_migrate_set_failure_modes(bool_Bool gpu_migration, | |||
39 | bool_Bool work_allocation) | |||
40 | { | |||
41 | fail_gpu_migration = gpu_migration; | |||
42 | fail_work_allocation = work_allocation; | |||
43 | } | |||
44 | ||||
45 | void i915_ttm_migrate_set_ban_memcpy(bool_Bool ban) | |||
46 | { | |||
47 | ban_memcpy = ban; | |||
48 | } | |||
49 | #endif | |||
50 | ||||
51 | static enum i915_cache_level | |||
52 | i915_ttm_cache_level(struct drm_i915_privateinteldrm_softc *i915, struct ttm_resource *res, | |||
53 | struct ttm_tt *ttm) | |||
54 | { | |||
55 | return ((HAS_LLC(i915)((&(i915)->__info)->has_llc) || HAS_SNOOP(i915)((&(i915)->__info)->has_snoop)) && | |||
56 | !i915_ttm_gtt_binds_lmem(res) && | |||
57 | ttm->caching == ttm_cached) ? I915_CACHE_LLC : | |||
58 | I915_CACHE_NONE; | |||
59 | } | |||
60 | ||||
61 | static struct intel_memory_region * | |||
62 | i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) | |||
63 | { | |||
64 | struct drm_i915_privateinteldrm_softc *i915 = container_of(bdev, typeof(*i915), bdev)({ const __typeof( ((typeof(*i915) *)0)->bdev ) *__mptr = ( bdev); (typeof(*i915) *)( (char *)__mptr - __builtin_offsetof (typeof(*i915), bdev) );}); | |||
65 | ||||
66 | /* There's some room for optimization here... */ | |||
67 | GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&((void)0) | |||
68 | ttm_mem_type < I915_PL_LMEM0)((void)0); | |||
69 | if (ttm_mem_type == I915_PL_SYSTEM0) | |||
70 | return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM, | |||
71 | 0); | |||
72 | ||||
73 | return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL, | |||
74 | ttm_mem_type - I915_PL_LMEM03); | |||
75 | } | |||
76 | ||||
77 | /** | |||
78 | * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a | |||
79 | * TTM move | |||
80 | * @obj: The gem object | |||
81 | */ | |||
82 | void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj) | |||
83 | { | |||
84 | struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); | |||
85 | ||||
86 | if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) { | |||
87 | obj->write_domain = I915_GEM_DOMAIN_WC0x00000080; | |||
88 | obj->read_domains = I915_GEM_DOMAIN_WC0x00000080; | |||
89 | } else { | |||
90 | obj->write_domain = I915_GEM_DOMAIN_CPU0x00000001; | |||
91 | obj->read_domains = I915_GEM_DOMAIN_CPU0x00000001; | |||
92 | } | |||
93 | } | |||
94 | ||||
95 | /** | |||
96 | * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move | |||
97 | * @obj: The gem object | |||
98 | * | |||
99 | * Adjusts the GEM object's region, mem_flags and cache coherency after a | |||
100 | * TTM move. | |||
101 | */ | |||
102 | void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) | |||
103 | { | |||
104 | struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); | |||
105 | unsigned int cache_level; | |||
106 | unsigned int i; | |||
107 | ||||
108 | /* | |||
109 | * If object was moved to an allowable region, update the object | |||
110 | * region to consider it migrated. Note that if it's currently not | |||
111 | * in an allowable region, it's evicted and we don't update the | |||
112 | * object region. | |||
113 | */ | |||
114 | if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) { | |||
115 | for (i = 0; i < obj->mm.n_placements; ++i) { | |||
116 | struct intel_memory_region *mr = obj->mm.placements[i]; | |||
117 | ||||
118 | if (intel_region_to_ttm_type(mr) == bo->resource->mem_type && | |||
119 | mr != obj->mm.region) { | |||
120 | i915_gem_object_release_memory_region(obj); | |||
121 | i915_gem_object_init_memory_region(obj, mr); | |||
122 | break; | |||
123 | } | |||
124 | } | |||
125 | } | |||
126 | ||||
127 | obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE(1UL << (0)) | I915_BO_FLAG_IOMEM(1UL << (1))); | |||
128 | ||||
129 | obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM(1UL << (1)) : | |||
130 | I915_BO_FLAG_STRUCT_PAGE(1UL << (0)); | |||
131 | ||||
132 | cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource, | |||
133 | bo->ttm); | |||
134 | i915_gem_object_set_cache_coherency(obj, cache_level); | |||
135 | } | |||
136 | ||||
137 | /** | |||
138 | * i915_ttm_move_notify - Prepare an object for move | |||
139 | * @bo: The ttm buffer object. | |||
140 | * | |||
141 | * This function prepares an object for move by removing all GPU bindings, | |||
142 | * removing all CPU mapings and finally releasing the pages sg-table. | |||
143 | * | |||
144 | * Return: 0 if successful, negative error code on error. | |||
145 | */ | |||
146 | int i915_ttm_move_notify(struct ttm_buffer_object *bo) | |||
147 | { | |||
148 | struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); | |||
149 | int ret; | |||
150 | ||||
151 | /* | |||
152 | * Note: The async unbinding here will actually transform the | |||
153 | * blocking wait for unbind into a wait before finally submitting | |||
154 | * evict / migration blit and thus stall the migration timeline | |||
155 | * which may not be good for overall throughput. We should make | |||
156 | * sure we await the unbind fences *after* the migration blit | |||
157 | * instead of *before* as we currently do. | |||
158 | */ | |||
159 | ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE(1UL << (0)) | | |||
160 | I915_GEM_OBJECT_UNBIND_ASYNC(1UL << (4))); | |||
161 | if (ret) | |||
162 | return ret; | |||
163 | ||||
164 | ret = __i915_gem_object_put_pages(obj); | |||
165 | if (ret) | |||
166 | return ret; | |||
167 | ||||
168 | return 0; | |||
169 | } | |||
170 | ||||
171 | static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, | |||
172 | bool_Bool clear, | |||
173 | struct ttm_resource *dst_mem, | |||
174 | struct ttm_tt *dst_ttm, | |||
175 | struct sg_table *dst_st, | |||
176 | const struct i915_deps *deps) | |||
177 | { | |||
178 | struct drm_i915_privateinteldrm_softc *i915 = container_of(bo->bdev, typeof(*i915),({ const __typeof( ((typeof(*i915) *)0)->bdev ) *__mptr = ( bo->bdev); (typeof(*i915) *)( (char *)__mptr - __builtin_offsetof (typeof(*i915), bdev) );}) | |||
179 | bdev)({ const __typeof( ((typeof(*i915) *)0)->bdev ) *__mptr = ( bo->bdev); (typeof(*i915) *)( (char *)__mptr - __builtin_offsetof (typeof(*i915), bdev) );}); | |||
180 | struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); | |||
181 | struct i915_request *rq; | |||
182 | struct ttm_tt *src_ttm = bo->ttm; | |||
183 | enum i915_cache_level src_level, dst_level; | |||
184 | int ret; | |||
185 | ||||
186 | if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915))) | |||
187 | return ERR_PTR(-EINVAL22); | |||
188 | ||||
189 | /* With fail_gpu_migration, we always perform a GPU clear. */ | |||
190 | if (I915_SELFTEST_ONLY(fail_gpu_migration)0) | |||
191 | clear = true1; | |||
192 | ||||
193 | dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm); | |||
194 | if (clear) { | |||
195 | if (bo->type == ttm_bo_type_kernel && | |||
196 | !I915_SELFTEST_ONLY(fail_gpu_migration)0) | |||
197 | return ERR_PTR(-EINVAL22); | |||
198 | ||||
199 | intel_engine_pm_get(to_gt(i915)->migrate.context->engine); | |||
200 | ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps, | |||
201 | dst_st->sgl, dst_level, | |||
202 | i915_ttm_gtt_binds_lmem(dst_mem), | |||
203 | 0, &rq); | |||
204 | } else { | |||
205 | struct i915_refct_sgt *src_rsgt = | |||
206 | i915_ttm_resource_get_st(obj, bo->resource); | |||
207 | ||||
208 | if (IS_ERR(src_rsgt)) | |||
209 | return ERR_CAST(src_rsgt); | |||
210 | ||||
211 | src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm); | |||
212 | intel_engine_pm_get(to_gt(i915)->migrate.context->engine); | |||
213 | ret = intel_context_migrate_copy(to_gt(i915)->migrate.context, | |||
214 | deps, src_rsgt->table.sgl, | |||
215 | src_level, | |||
216 | i915_ttm_gtt_binds_lmem(bo->resource), | |||
217 | dst_st->sgl, dst_level, | |||
218 | i915_ttm_gtt_binds_lmem(dst_mem), | |||
219 | &rq); | |||
220 | ||||
221 | i915_refct_sgt_put(src_rsgt); | |||
222 | } | |||
223 | ||||
224 | intel_engine_pm_put(to_gt(i915)->migrate.context->engine); | |||
225 | ||||
226 | if (ret && rq) { | |||
227 | i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT(0x7fffffff)); | |||
228 | i915_request_put(rq); | |||
229 | } | |||
230 | ||||
231 | return ret ? ERR_PTR(ret) : &rq->fence; | |||
232 | } | |||
233 | ||||
234 | /** | |||
235 | * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality. | |||
236 | * @_dst_iter: Storage space for the destination kmap iterator. | |||
237 | * @_src_iter: Storage space for the source kmap iterator. | |||
238 | * @dst_iter: Pointer to the destination kmap iterator. | |||
239 | * @src_iter: Pointer to the source kmap iterator. | |||
240 | * @clear: Whether to clear instead of copy. | |||
241 | * @src_rsgt: Refcounted scatter-gather list of source memory. | |||
242 | * @dst_rsgt: Refcounted scatter-gather list of destination memory. | |||
243 | */ | |||
244 | struct i915_ttm_memcpy_arg { | |||
245 | union { | |||
246 | struct ttm_kmap_iter_tt tt; | |||
247 | struct ttm_kmap_iter_iomap io; | |||
248 | } _dst_iter, | |||
249 | _src_iter; | |||
250 | struct ttm_kmap_iter *dst_iter; | |||
251 | struct ttm_kmap_iter *src_iter; | |||
252 | unsigned long num_pages; | |||
253 | bool_Bool clear; | |||
254 | struct i915_refct_sgt *src_rsgt; | |||
255 | struct i915_refct_sgt *dst_rsgt; | |||
256 | }; | |||
257 | ||||
258 | /** | |||
259 | * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence. | |||
260 | * @fence: The dma-fence. | |||
261 | * @work: The work struct use for the memcpy work. | |||
262 | * @lock: The fence lock. Not used to protect anything else ATM. | |||
263 | * @irq_work: Low latency worker to signal the fence since it can't be done | |||
264 | * from the callback for lockdep reasons. | |||
265 | * @cb: Callback for the accelerated migration fence. | |||
266 | * @arg: The argument for the memcpy functionality. | |||
267 | * @i915: The i915 pointer. | |||
268 | * @obj: The GEM object. | |||
269 | * @memcpy_allowed: Instead of processing the @arg, and falling back to memcpy | |||
270 | * or memset, we wedge the device and set the @obj unknown_state, to prevent | |||
271 | * further access to the object with the CPU or GPU. On some devices we might | |||
272 | * only be permitted to use the blitter engine for such operations. | |||
273 | */ | |||
274 | struct i915_ttm_memcpy_work { | |||
275 | struct dma_fence fence; | |||
276 | struct work_struct work; | |||
277 | spinlock_t lock; | |||
278 | struct irq_work irq_work; | |||
279 | struct dma_fence_cb cb; | |||
280 | struct i915_ttm_memcpy_arg arg; | |||
281 | struct drm_i915_privateinteldrm_softc *i915; | |||
282 | struct drm_i915_gem_object *obj; | |||
283 | bool_Bool memcpy_allowed; | |||
284 | }; | |||
285 | ||||
286 | static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) | |||
287 | { | |||
288 | STUB()do { printf("%s: stub\n", __func__); } while(0); | |||
289 | #ifdef notyet | |||
290 | ttm_move_memcpy(arg->clear, arg->num_pages, | |||
291 | arg->dst_iter, arg->src_iter); | |||
292 | #endif | |||
293 | } | |||
294 | ||||
295 | static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg, | |||
296 | struct ttm_buffer_object *bo, bool_Bool clear, | |||
297 | struct ttm_resource *dst_mem, | |||
298 | struct ttm_tt *dst_ttm, | |||
299 | struct i915_refct_sgt *dst_rsgt) | |||
300 | { | |||
301 | STUB()do { printf("%s: stub\n", __func__); } while(0); | |||
302 | #ifdef notyet | |||
303 | struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); | |||
304 | struct intel_memory_region *dst_reg, *src_reg; | |||
305 | ||||
306 | dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); | |||
307 | src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); | |||
308 | GEM_BUG_ON(!dst_reg || !src_reg)((void)0); | |||
309 | ||||
310 | arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ? | |||
311 | ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) : | |||
312 | ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap, | |||
313 | &dst_rsgt->table, dst_reg->region.start); | |||
314 | ||||
315 | arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ? | |||
316 | ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) : | |||
317 | ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap, | |||
318 | &obj->ttm.cached_io_rsgt->table, | |||
319 | src_reg->region.start); | |||
320 | arg->clear = clear; | |||
321 | arg->num_pages = bo->base.size >> PAGE_SHIFT12; | |||
322 | ||||
323 | arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt); | |||
324 | arg->src_rsgt = clear ? NULL((void *)0) : | |||
325 | i915_ttm_resource_get_st(obj, bo->resource); | |||
326 | #endif | |||
327 | } | |||
328 | ||||
329 | static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg) | |||
330 | { | |||
331 | i915_refct_sgt_put(arg->src_rsgt); | |||
| ||||
332 | i915_refct_sgt_put(arg->dst_rsgt); | |||
333 | } | |||
334 | ||||
335 | static void __memcpy_work(struct work_struct *work) | |||
336 | { | |||
337 | struct i915_ttm_memcpy_work *copy_work = | |||
338 | container_of(work, typeof(*copy_work), work)({ const __typeof( ((typeof(*copy_work) *)0)->work ) *__mptr = (work); (typeof(*copy_work) *)( (char *)__mptr - __builtin_offsetof (typeof(*copy_work), work) );}); | |||
339 | struct i915_ttm_memcpy_arg *arg = ©_work->arg; | |||
340 | bool_Bool cookie; | |||
341 | ||||
342 | /* | |||
343 | * FIXME: We need to take a closer look here. We should be able to plonk | |||
344 | * this into the fence critical section. | |||
345 | */ | |||
346 | if (!copy_work->memcpy_allowed) { | |||
347 | struct intel_gt *gt; | |||
348 | unsigned int id; | |||
349 | ||||
350 | for_each_gt(gt, copy_work->i915, id)for ((id) = 0; (id) < 4; (id)++) if (!(((gt) = (copy_work-> i915)->gt[(id)]))) {} else | |||
351 | intel_gt_set_wedged(gt); | |||
352 | } | |||
353 | ||||
354 | cookie = dma_fence_begin_signalling(); | |||
355 | ||||
356 | if (copy_work->memcpy_allowed) { | |||
357 | i915_ttm_move_memcpy(arg); | |||
358 | } else { | |||
359 | /* | |||
360 | * Prevent further use of the object. Any future GTT binding or | |||
361 | * CPU access is not allowed once we signal the fence. Outside | |||
362 | * of the fence critical section, we then also then wedge the gpu | |||
363 | * to indicate the device is not functional. | |||
364 | * | |||
365 | * The below dma_fence_signal() is our write-memory-barrier. | |||
366 | */ | |||
367 | copy_work->obj->mm.unknown_state = true1; | |||
368 | } | |||
369 | ||||
370 | dma_fence_end_signalling(cookie); | |||
371 | ||||
372 | dma_fence_signal(©_work->fence); | |||
373 | ||||
374 | i915_ttm_memcpy_release(arg); | |||
375 | i915_gem_object_put(copy_work->obj); | |||
376 | dma_fence_put(©_work->fence); | |||
377 | } | |||
378 | ||||
379 | static void __memcpy_irq_work(struct irq_work *irq_work) | |||
380 | { | |||
381 | struct i915_ttm_memcpy_work *copy_work = | |||
382 | container_of(irq_work, typeof(*copy_work), irq_work)({ const __typeof( ((typeof(*copy_work) *)0)->irq_work ) * __mptr = (irq_work); (typeof(*copy_work) *)( (char *)__mptr - __builtin_offsetof(typeof(*copy_work), irq_work) );}); | |||
383 | struct i915_ttm_memcpy_arg *arg = ©_work->arg; | |||
384 | ||||
385 | dma_fence_signal(©_work->fence); | |||
386 | i915_ttm_memcpy_release(arg); | |||
387 | i915_gem_object_put(copy_work->obj); | |||
388 | dma_fence_put(©_work->fence); | |||
389 | } | |||
390 | ||||
391 | static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb) | |||
392 | { | |||
393 | struct i915_ttm_memcpy_work *copy_work = | |||
394 | container_of(cb, typeof(*copy_work), cb)({ const __typeof( ((typeof(*copy_work) *)0)->cb ) *__mptr = (cb); (typeof(*copy_work) *)( (char *)__mptr - __builtin_offsetof (typeof(*copy_work), cb) );}); | |||
395 | ||||
396 | if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))__builtin_expect(!!(fence->error || 0), 0)) { | |||
397 | INIT_WORK(©_work->work, __memcpy_work); | |||
398 | queue_work(system_unbound_wq, ©_work->work); | |||
399 | } else { | |||
400 | init_irq_work(©_work->irq_work, __memcpy_irq_work); | |||
401 | irq_work_queue(©_work->irq_work); | |||
402 | } | |||
403 | } | |||
404 | ||||
405 | static const char *get_driver_name(struct dma_fence *fence) | |||
406 | { | |||
407 | return "i915_ttm_memcpy_work"; | |||
408 | } | |||
409 | ||||
410 | static const char *get_timeline_name(struct dma_fence *fence) | |||
411 | { | |||
412 | return "unbound"; | |||
413 | } | |||
414 | ||||
415 | static const struct dma_fence_ops dma_fence_memcpy_ops = { | |||
416 | .get_driver_name = get_driver_name, | |||
417 | .get_timeline_name = get_timeline_name, | |||
418 | }; | |||
419 | ||||
420 | static struct dma_fence * | |||
421 | i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, | |||
422 | struct dma_fence *dep) | |||
423 | { | |||
424 | int ret; | |||
425 | ||||
426 | mtx_init(&work->lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&work-> lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0); | |||
427 | dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0); | |||
428 | dma_fence_get(&work->fence); | |||
429 | ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb); | |||
430 | if (ret) { | |||
431 | if (ret != -ENOENT2) | |||
432 | dma_fence_wait(dep, false0); | |||
433 | ||||
434 | return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration)0 ? -EINVAL22 : | |||
435 | dep->error); | |||
436 | } | |||
437 | ||||
438 | return &work->fence; | |||
439 | } | |||
440 | ||||
441 | static bool_Bool i915_ttm_memcpy_allowed(struct ttm_buffer_object *bo, | |||
442 | struct ttm_resource *dst_mem) | |||
443 | { | |||
444 | if (i915_gem_object_needs_ccs_pages(i915_ttm_to_gem(bo))) | |||
445 | return false0; | |||
446 | ||||
447 | if (!(i915_ttm_resource_mappable(bo->resource) && | |||
448 | i915_ttm_resource_mappable(dst_mem))) | |||
449 | return false0; | |||
450 | ||||
451 | return I915_SELFTEST_ONLY(ban_memcpy)0 ? false0 : true1; | |||
452 | } | |||
453 | ||||
454 | static struct dma_fence * | |||
455 | __i915_ttm_move(struct ttm_buffer_object *bo, | |||
456 | const struct ttm_operation_ctx *ctx, bool_Bool clear, | |||
457 | struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm, | |||
458 | struct i915_refct_sgt *dst_rsgt, bool_Bool allow_accel, | |||
459 | const struct i915_deps *move_deps) | |||
460 | { | |||
461 | const bool_Bool memcpy_allowed = i915_ttm_memcpy_allowed(bo, dst_mem); | |||
462 | struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); | |||
463 | struct drm_i915_privateinteldrm_softc *i915 = to_i915(bo->base.dev); | |||
464 | struct i915_ttm_memcpy_work *copy_work = NULL((void *)0); | |||
465 | struct i915_ttm_memcpy_arg _arg, *arg = &_arg; | |||
466 | struct dma_fence *fence = ERR_PTR(-EINVAL22); | |||
467 | ||||
468 | if (allow_accel) { | |||
469 | fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm, | |||
470 | &dst_rsgt->table, move_deps); | |||
471 | ||||
472 | /* | |||
473 | * We only need to intercept the error when moving to lmem. | |||
474 | * When moving to system, TTM or shmem will provide us with | |||
475 | * cleared pages. | |||
476 | */ | |||
477 | if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) && | |||
478 | !I915_SELFTEST_ONLY(fail_gpu_migration ||0 | |||
479 | fail_work_allocation)0) | |||
480 | goto out; | |||
481 | } | |||
482 | ||||
483 | /* If we've scheduled gpu migration. Try to arm error intercept. */ | |||
484 | if (!IS_ERR(fence)) { | |||
485 | struct dma_fence *dep = fence; | |||
486 | ||||
487 | if (!I915_SELFTEST_ONLY(fail_work_allocation)0) | |||
488 | copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL(0x0001 | 0x0004)); | |||
489 | ||||
490 | if (copy_work) { | |||
491 | copy_work->i915 = i915; | |||
492 | copy_work->memcpy_allowed = memcpy_allowed; | |||
493 | copy_work->obj = i915_gem_object_get(obj); | |||
494 | arg = ©_work->arg; | |||
495 | if (memcpy_allowed) | |||
496 | i915_ttm_memcpy_init(arg, bo, clear, dst_mem, | |||
497 | dst_ttm, dst_rsgt); | |||
498 | ||||
499 | fence = i915_ttm_memcpy_work_arm(copy_work, dep); | |||
500 | } else { | |||
501 | dma_fence_wait(dep, false0); | |||
502 | fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration)0 ? | |||
503 | -EINVAL22 : fence->error); | |||
504 | } | |||
505 | dma_fence_put(dep); | |||
506 | ||||
507 | if (!IS_ERR(fence)) | |||
508 | goto out; | |||
509 | } else { | |||
510 | int err = PTR_ERR(fence); | |||
511 | ||||
512 | if (err == -EINTR4 || err == -ERESTARTSYS4 || err == -EAGAIN35) | |||
513 | return fence; | |||
514 | ||||
515 | if (move_deps
| |||
516 | err = i915_deps_sync(move_deps, ctx); | |||
517 | if (err) | |||
518 | return ERR_PTR(err); | |||
519 | } | |||
520 | } | |||
521 | ||||
522 | /* Error intercept failed or no accelerated migration to start with */ | |||
523 | ||||
524 | if (memcpy_allowed
| |||
525 | if (!copy_work
| |||
526 | i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, | |||
527 | dst_rsgt); | |||
528 | i915_ttm_move_memcpy(arg); | |||
529 | i915_ttm_memcpy_release(arg); | |||
530 | } | |||
531 | if (copy_work) | |||
532 | i915_gem_object_put(copy_work->obj); | |||
533 | kfree(copy_work); | |||
534 | ||||
535 | return memcpy_allowed ? NULL((void *)0) : ERR_PTR(-EIO5); | |||
536 | out: | |||
537 | if (!fence && copy_work) { | |||
538 | i915_ttm_memcpy_release(arg); | |||
539 | i915_gem_object_put(copy_work->obj); | |||
540 | kfree(copy_work); | |||
541 | } | |||
542 | ||||
543 | return fence; | |||
544 | } | |||
545 | ||||
546 | /** | |||
547 | * i915_ttm_move - The TTM move callback used by i915. | |||
548 | * @bo: The buffer object. | |||
549 | * @evict: Whether this is an eviction. | |||
550 | * @dst_mem: The destination ttm resource. | |||
551 | * @hop: If we need multihop, what temporary memory type to move to. | |||
552 | * | |||
553 | * Return: 0 if successful, negative error code otherwise. | |||
554 | */ | |||
555 | int i915_ttm_move(struct ttm_buffer_object *bo, bool_Bool evict, | |||
556 | struct ttm_operation_ctx *ctx, | |||
557 | struct ttm_resource *dst_mem, | |||
558 | struct ttm_place *hop) | |||
559 | { | |||
560 | struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); | |||
561 | struct ttm_resource_manager *dst_man = | |||
562 | ttm_manager_type(bo->bdev, dst_mem->mem_type); | |||
563 | struct dma_fence *migration_fence = NULL((void *)0); | |||
564 | struct ttm_tt *ttm = bo->ttm; | |||
565 | struct i915_refct_sgt *dst_rsgt; | |||
566 | bool_Bool clear; | |||
567 | int ret; | |||
568 | ||||
569 | if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))({ __builtin_expect(!!(!!(i915_ttm_is_ghost_object(bo))), 0); })) { | |||
570 | ttm_bo_move_null(bo, dst_mem); | |||
571 | return 0; | |||
572 | } | |||
573 | ||||
574 | ret = i915_ttm_move_notify(bo); | |||
575 | if (ret) | |||
576 | return ret; | |||
577 | ||||
578 | if (obj->mm.madv != I915_MADV_WILLNEED0) { | |||
579 | i915_ttm_purge(obj); | |||
580 | ttm_resource_free(bo, &dst_mem); | |||
581 | return 0; | |||
582 | } | |||
583 | ||||
584 | /* Populate ttm with pages if needed. Typically system memory. */ | |||
585 | if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED(1 << 0)))) { | |||
586 | ret = ttm_tt_populate(bo->bdev, ttm, ctx); | |||
587 | if (ret) | |||
588 | return ret; | |||
589 | } | |||
590 | ||||
591 | dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem); | |||
592 | if (IS_ERR(dst_rsgt)) | |||
593 | return PTR_ERR(dst_rsgt); | |||
594 | ||||
595 | clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); | |||
596 | if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC(1 << 1)))) { | |||
597 | struct i915_deps deps; | |||
598 | ||||
599 | i915_deps_init(&deps, GFP_KERNEL(0x0001 | 0x0004) | __GFP_NORETRY0 | __GFP_NOWARN0); | |||
600 | ret = i915_deps_add_resv(&deps, bo->base.resv, ctx); | |||
601 | if (ret) { | |||
602 | i915_refct_sgt_put(dst_rsgt); | |||
603 | return ret; | |||
604 | } | |||
605 | ||||
606 | migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, ttm, | |||
607 | dst_rsgt, true1, &deps); | |||
608 | i915_deps_fini(&deps); | |||
609 | } | |||
610 | ||||
611 | /* We can possibly get an -ERESTARTSYS here */ | |||
612 | if (IS_ERR(migration_fence)) { | |||
613 | i915_refct_sgt_put(dst_rsgt); | |||
614 | return PTR_ERR(migration_fence); | |||
615 | } | |||
616 | ||||
617 | if (migration_fence) { | |||
618 | if (I915_SELFTEST_ONLY(evict && fail_gpu_migration)0) | |||
619 | ret = -EIO5; /* never feed non-migrate fences into ttm */ | |||
620 | else | |||
621 | ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, | |||
622 | true1, dst_mem); | |||
623 | if (ret) { | |||
624 | dma_fence_wait(migration_fence, false0); | |||
625 | ttm_bo_move_sync_cleanup(bo, dst_mem); | |||
626 | } | |||
627 | dma_fence_put(migration_fence); | |||
628 | } else { | |||
629 | ttm_bo_move_sync_cleanup(bo, dst_mem); | |||
630 | } | |||
631 | ||||
632 | i915_ttm_adjust_domains_after_move(obj); | |||
633 | i915_ttm_free_cached_io_rsgt(obj); | |||
634 | ||||
635 | if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) { | |||
636 | obj->ttm.cached_io_rsgt = dst_rsgt; | |||
637 | obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl; | |||
638 | obj->ttm.get_io_page.sg_idx = 0; | |||
639 | } else { | |||
640 | i915_refct_sgt_put(dst_rsgt); | |||
641 | } | |||
642 | ||||
643 | i915_ttm_adjust_lru(obj); | |||
644 | i915_ttm_adjust_gem_after_move(obj); | |||
645 | return 0; | |||
646 | } | |||
647 | ||||
648 | /** | |||
649 | * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to | |||
650 | * another | |||
651 | * @dst: The destination object | |||
652 | * @src: The source object | |||
653 | * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used. | |||
654 | * @intr: Whether to perform waits interruptible: | |||
655 | * | |||
656 | * Note: The caller is responsible for assuring that the underlying | |||
657 | * TTM objects are populated if needed and locked. | |||
658 | * | |||
659 | * Return: Zero on success. Negative error code on error. If @intr == true, | |||
660 | * then it may return -ERESTARTSYS or -EINTR. | |||
661 | */ | |||
662 | int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, | |||
663 | struct drm_i915_gem_object *src, | |||
664 | bool_Bool allow_accel, bool_Bool intr) | |||
665 | { | |||
666 | struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst); | |||
667 | struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src); | |||
668 | struct ttm_operation_ctx ctx = { | |||
669 | .interruptible = intr, | |||
670 | }; | |||
671 | struct i915_refct_sgt *dst_rsgt; | |||
672 | struct dma_fence *copy_fence; | |||
673 | struct i915_deps deps; | |||
674 | int ret; | |||
675 | ||||
676 | assert_object_held(dst)do { (void)(&((dst)->base.resv)->lock.base); } while (0); | |||
| ||||
677 | assert_object_held(src)do { (void)(&((src)->base.resv)->lock.base); } while (0); | |||
678 | i915_deps_init(&deps, GFP_KERNEL(0x0001 | 0x0004) | __GFP_NORETRY0 | __GFP_NOWARN0); | |||
679 | ||||
680 | ret = dma_resv_reserve_fences(src_bo->base.resv, 1); | |||
681 | if (ret) | |||
682 | return ret; | |||
683 | ||||
684 | ret = dma_resv_reserve_fences(dst_bo->base.resv, 1); | |||
685 | if (ret) | |||
686 | return ret; | |||
687 | ||||
688 | ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx); | |||
689 | if (ret) | |||
690 | return ret; | |||
691 | ||||
692 | ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx); | |||
693 | if (ret) | |||
694 | return ret; | |||
695 | ||||
696 | dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource); | |||
697 | copy_fence = __i915_ttm_move(src_bo, &ctx, false0, dst_bo->resource, | |||
698 | dst_bo->ttm, dst_rsgt, allow_accel, | |||
699 | &deps); | |||
700 | ||||
701 | i915_deps_fini(&deps); | |||
702 | i915_refct_sgt_put(dst_rsgt); | |||
703 | if (IS_ERR_OR_NULL(copy_fence)) | |||
704 | return PTR_ERR_OR_ZERO(copy_fence); | |||
705 | ||||
706 | dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE); | |||
707 | dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ); | |||
708 | dma_fence_put(copy_fence); | |||
709 | ||||
710 | return 0; | |||
711 | } |