Bug Summary

File:dev/pci/drm/i915/gem/i915_gem_execbuffer.c
Warning:line 3056, column 3
Value stored to 'syncobj' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name i915_gem_execbuffer.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2008,2010 Intel Corporation
5 */
6
7#include <linux/dma-resv.h>
8#include <linux/highmem.h>
9#include <linux/sync_file.h>
10#include <linux/uaccess.h>
11
12#include <drm/drm_syncobj.h>
13
14#include <dev/pci/pcivar.h>
15#include <dev/pci/agpvar.h>
16
17#include "display/intel_frontbuffer.h"
18
19#include "gem/i915_gem_ioctls.h"
20#include "gt/intel_context.h"
21#include "gt/intel_gpu_commands.h"
22#include "gt/intel_gt.h"
23#include "gt/intel_gt_buffer_pool.h"
24#include "gt/intel_gt_pm.h"
25#include "gt/intel_ring.h"
26
27#include "pxp/intel_pxp.h"
28
29#include "i915_cmd_parser.h"
30#include "i915_drv.h"
31#include "i915_file_private.h"
32#include "i915_gem_clflush.h"
33#include "i915_gem_context.h"
34#include "i915_gem_evict.h"
35#include "i915_gem_ioctls.h"
36#include "i915_trace.h"
37#include "i915_user_extensions.h"
38
39struct eb_vma {
40 struct i915_vma *vma;
41 unsigned int flags;
42
43 /** This vma's place in the execbuf reservation list */
44 struct drm_i915_gem_exec_object2 *exec;
45 struct list_head bind_link;
46 struct list_head reloc_link;
47
48 struct hlist_node node;
49 u32 handle;
50};
51
52enum {
53 FORCE_CPU_RELOC = 1,
54 FORCE_GTT_RELOC,
55 FORCE_GPU_RELOC,
56#define DBG_FORCE_RELOC0 0 /* choose one of the above! */
57};
58
59/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
60#define __EXEC_OBJECT_HAS_PIN(1UL << (30)) BIT(30)(1UL << (30))
61#define __EXEC_OBJECT_HAS_FENCE(1UL << (29)) BIT(29)(1UL << (29))
62#define __EXEC_OBJECT_USERPTR_INIT(1UL << (28)) BIT(28)(1UL << (28))
63#define __EXEC_OBJECT_NEEDS_MAP(1UL << (27)) BIT(27)(1UL << (27))
64#define __EXEC_OBJECT_NEEDS_BIAS(1UL << (26)) BIT(26)(1UL << (26))
65#define __EXEC_OBJECT_INTERNAL_FLAGS(~0u << 26) (~0u << 26) /* all of the above + */
66#define __EXEC_OBJECT_RESERVED((1UL << (30)) | (1UL << (29))) (__EXEC_OBJECT_HAS_PIN(1UL << (30)) | __EXEC_OBJECT_HAS_FENCE(1UL << (29)))
67
68#define __EXEC_HAS_RELOC(1UL << (31)) BIT(31)(1UL << (31))
69#define __EXEC_ENGINE_PINNED(1UL << (30)) BIT(30)(1UL << (30))
70#define __EXEC_USERPTR_USED(1UL << (29)) BIT(29)(1UL << (29))
71#define __EXEC_INTERNAL_FLAGS(~0u << 29) (~0u << 29)
72#define UPDATE(1ULL << (7)) PIN_OFFSET_FIXED(1ULL << (7))
73
74#define BATCH_OFFSET_BIAS(256*1024) (256*1024)
75
76#define __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
\
77 (__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1)) | \
78 I915_EXEC_CONSTANTS_MASK(3<<6) | \
79 I915_EXEC_RESOURCE_STREAMER(1<<15))
80
81/* Catch emission of unexpected errors for CI! */
82#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0
83#undef EINVAL22
84#define EINVAL22 ({ \
85 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__)___drm_dbg(((void *)0), DRM_UT_DRIVER, "EINVAL at %s:%d\n", __func__
, 85)
; \
86 22; \
87})
88#endif
89
90/**
91 * DOC: User command execution
92 *
93 * Userspace submits commands to be executed on the GPU as an instruction
94 * stream within a GEM object we call a batchbuffer. This instructions may
95 * refer to other GEM objects containing auxiliary state such as kernels,
96 * samplers, render targets and even secondary batchbuffers. Userspace does
97 * not know where in the GPU memory these objects reside and so before the
98 * batchbuffer is passed to the GPU for execution, those addresses in the
99 * batchbuffer and auxiliary objects are updated. This is known as relocation,
100 * or patching. To try and avoid having to relocate each object on the next
101 * execution, userspace is told the location of those objects in this pass,
102 * but this remains just a hint as the kernel may choose a new location for
103 * any object in the future.
104 *
105 * At the level of talking to the hardware, submitting a batchbuffer for the
106 * GPU to execute is to add content to a buffer from which the HW
107 * command streamer is reading.
108 *
109 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
110 * Execlists, this command is not placed on the same buffer as the
111 * remaining items.
112 *
113 * 2. Add a command to invalidate caches to the buffer.
114 *
115 * 3. Add a batchbuffer start command to the buffer; the start command is
116 * essentially a token together with the GPU address of the batchbuffer
117 * to be executed.
118 *
119 * 4. Add a pipeline flush to the buffer.
120 *
121 * 5. Add a memory write command to the buffer to record when the GPU
122 * is done executing the batchbuffer. The memory write writes the
123 * global sequence number of the request, ``i915_request::global_seqno``;
124 * the i915 driver uses the current value in the register to determine
125 * if the GPU has completed the batchbuffer.
126 *
127 * 6. Add a user interrupt command to the buffer. This command instructs
128 * the GPU to issue an interrupt when the command, pipeline flush and
129 * memory write are completed.
130 *
131 * 7. Inform the hardware of the additional commands added to the buffer
132 * (by updating the tail pointer).
133 *
134 * Processing an execbuf ioctl is conceptually split up into a few phases.
135 *
136 * 1. Validation - Ensure all the pointers, handles and flags are valid.
137 * 2. Reservation - Assign GPU address space for every object
138 * 3. Relocation - Update any addresses to point to the final locations
139 * 4. Serialisation - Order the request with respect to its dependencies
140 * 5. Construction - Construct a request to execute the batchbuffer
141 * 6. Submission (at some point in the future execution)
142 *
143 * Reserving resources for the execbuf is the most complicated phase. We
144 * neither want to have to migrate the object in the address space, nor do
145 * we want to have to update any relocations pointing to this object. Ideally,
146 * we want to leave the object where it is and for all the existing relocations
147 * to match. If the object is given a new address, or if userspace thinks the
148 * object is elsewhere, we have to parse all the relocation entries and update
149 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
150 * all the target addresses in all of its objects match the value in the
151 * relocation entries and that they all match the presumed offsets given by the
152 * list of execbuffer objects. Using this knowledge, we know that if we haven't
153 * moved any buffers, all the relocation entries are valid and we can skip
154 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
155 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
156 *
157 * The addresses written in the objects must match the corresponding
158 * reloc.presumed_offset which in turn must match the corresponding
159 * execobject.offset.
160 *
161 * Any render targets written to in the batch must be flagged with
162 * EXEC_OBJECT_WRITE.
163 *
164 * To avoid stalling, execobject.offset should match the current
165 * address of that object within the active context.
166 *
167 * The reservation is done is multiple phases. First we try and keep any
168 * object already bound in its current location - so as long as meets the
169 * constraints imposed by the new execbuffer. Any object left unbound after the
170 * first pass is then fitted into any available idle space. If an object does
171 * not fit, all objects are removed from the reservation and the process rerun
172 * after sorting the objects into a priority order (more difficult to fit
173 * objects are tried first). Failing that, the entire VM is cleared and we try
174 * to fit the execbuf once last time before concluding that it simply will not
175 * fit.
176 *
177 * A small complication to all of this is that we allow userspace not only to
178 * specify an alignment and a size for the object in the address space, but
179 * we also allow userspace to specify the exact offset. This objects are
180 * simpler to place (the location is known a priori) all we have to do is make
181 * sure the space is available.
182 *
183 * Once all the objects are in place, patching up the buried pointers to point
184 * to the final locations is a fairly simple job of walking over the relocation
185 * entry arrays, looking up the right address and rewriting the value into
186 * the object. Simple! ... The relocation entries are stored in user memory
187 * and so to access them we have to copy them into a local buffer. That copy
188 * has to avoid taking any pagefaults as they may lead back to a GEM object
189 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
190 * the relocation into multiple passes. First we try to do everything within an
191 * atomic context (avoid the pagefaults) which requires that we never wait. If
192 * we detect that we may wait, or if we need to fault, then we have to fallback
193 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
194 * bells yet?) Dropping the mutex means that we lose all the state we have
195 * built up so far for the execbuf and we must reset any global data. However,
196 * we do leave the objects pinned in their final locations - which is a
197 * potential issue for concurrent execbufs. Once we have left the mutex, we can
198 * allocate and copy all the relocation entries into a large array at our
199 * leisure, reacquire the mutex, reclaim all the objects and other state and
200 * then proceed to update any incorrect addresses with the objects.
201 *
202 * As we process the relocation entries, we maintain a record of whether the
203 * object is being written to. Using NORELOC, we expect userspace to provide
204 * this information instead. We also check whether we can skip the relocation
205 * by comparing the expected value inside the relocation entry with the target's
206 * final address. If they differ, we have to map the current object and rewrite
207 * the 4 or 8 byte pointer within.
208 *
209 * Serialising an execbuf is quite simple according to the rules of the GEM
210 * ABI. Execution within each context is ordered by the order of submission.
211 * Writes to any GEM object are in order of submission and are exclusive. Reads
212 * from a GEM object are unordered with respect to other reads, but ordered by
213 * writes. A write submitted after a read cannot occur before the read, and
214 * similarly any read submitted after a write cannot occur before the write.
215 * Writes are ordered between engines such that only one write occurs at any
216 * time (completing any reads beforehand) - using semaphores where available
217 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
218 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
219 * reads before starting, and any read (either using set-domain or pread) must
220 * flush all GPU writes before starting. (Note we only employ a barrier before,
221 * we currently rely on userspace not concurrently starting a new execution
222 * whilst reading or writing to an object. This may be an advantage or not
223 * depending on how much you trust userspace not to shoot themselves in the
224 * foot.) Serialisation may just result in the request being inserted into
225 * a DAG awaiting its turn, but most simple is to wait on the CPU until
226 * all dependencies are resolved.
227 *
228 * After all of that, is just a matter of closing the request and handing it to
229 * the hardware (well, leaving it in a queue to be executed). However, we also
230 * offer the ability for batchbuffers to be run with elevated privileges so
231 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
232 * Before any batch is given extra privileges we first must check that it
233 * contains no nefarious instructions, we check that each instruction is from
234 * our whitelist and all registers are also from an allowed list. We first
235 * copy the user's batchbuffer to a shadow (so that the user doesn't have
236 * access to it, either by the CPU or GPU as we scan it) and then parse each
237 * instruction. If everything is ok, we set a flag telling the hardware to run
238 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
239 */
240
241struct eb_fence {
242 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
243 struct dma_fence *dma_fence;
244 u64 value;
245 struct dma_fence_chain *chain_fence;
246};
247
248struct i915_execbuffer {
249 struct drm_i915_privateinteldrm_softc *i915; /** i915 backpointer */
250 struct drm_file *file; /** per-file lookup tables and limits */
251 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
252 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
253 struct eb_vma *vma;
254
255 struct intel_gt *gt; /* gt for the execbuf */
256 struct intel_context *context; /* logical state for the request */
257 struct i915_gem_context *gem_context; /** caller's context */
258
259 /** our requests to build */
260 struct i915_request *requests[MAX_ENGINE_INSTANCE8 + 1];
261 /** identity of the batch obj/vma */
262 struct eb_vma *batches[MAX_ENGINE_INSTANCE8 + 1];
263 struct i915_vma *trampoline; /** trampoline used for chaining */
264
265 /** used for excl fence in dma_resv objects when > 1 BB submitted */
266 struct dma_fence *composite_fence;
267
268 /** actual size of execobj[] as we may extend it for the cmdparser */
269 unsigned int buffer_count;
270
271 /* number of batches in execbuf IOCTL */
272 unsigned int num_batches;
273
274 /** list of vma not yet bound during reservation phase */
275 struct list_head unbound;
276
277 /** list of vma that have execobj.relocation_count */
278 struct list_head relocs;
279
280 struct i915_gem_ww_ctx ww;
281
282 /**
283 * Track the most recently used object for relocations, as we
284 * frequently have to perform multiple relocations within the same
285 * obj/page
286 */
287 struct reloc_cache {
288 struct drm_mm_node node; /** temporary GTT binding */
289 unsigned long vaddr; /** Current kmap address */
290 unsigned long page; /** Currently mapped page index */
291 unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */
292 bool_Bool use_64bit_reloc : 1;
293 bool_Bool has_llc : 1;
294 bool_Bool has_fence : 1;
295 bool_Bool needs_unfenced : 1;
296
297 struct agp_map *map;
298 bus_space_tag_t iot;
299 bus_space_handle_t ioh;
300 } reloc_cache;
301
302 u64 invalid_flags; /** Set of execobj.flags that are invalid */
303
304 /** Length of batch within object */
305 u64 batch_len[MAX_ENGINE_INSTANCE8 + 1];
306 u32 batch_start_offset; /** Location within object of batch */
307 u32 batch_flags; /** Flags composed for emit_bb_start() */
308 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
309
310 /**
311 * Indicate either the size of the hastable used to resolve
312 * relocation handles, or if negative that we are using a direct
313 * index into the execobj[].
314 */
315 int lut_size;
316 struct hlist_head *buckets; /** ht for relocation handles */
317
318 struct eb_fence *fences;
319 unsigned long num_fences;
320#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1
321 struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE8 + 1];
322#endif
323};
324
325static int eb_parse(struct i915_execbuffer *eb);
326static int eb_pin_engine(struct i915_execbuffer *eb, bool_Bool throttle);
327static void eb_unpin_engine(struct i915_execbuffer *eb);
328static void eb_capture_release(struct i915_execbuffer *eb);
329
330static inline bool_Bool eb_use_cmdparser(const struct i915_execbuffer *eb)
331{
332 return intel_engine_requires_cmd_parser(eb->context->engine) ||
333 (intel_engine_using_cmd_parser(eb->context->engine) &&
334 eb->args->batch_len);
335}
336
337static int eb_create(struct i915_execbuffer *eb)
338{
339 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT(1<<12))) {
340 unsigned int size = 1 + ilog2(eb->buffer_count)((sizeof(eb->buffer_count) <= 4) ? (fls(eb->buffer_count
) - 1) : (flsl(eb->buffer_count) - 1))
;
341
342 /*
343 * Without a 1:1 association between relocation handles and
344 * the execobject[] index, we instead create a hashtable.
345 * We size it dynamically based on available memory, starting
346 * first with 1:1 assocative hash and scaling back until
347 * the allocation succeeds.
348 *
349 * Later on we use a positive lut_size to indicate we are
350 * using this hashtable, and a negative value to indicate a
351 * direct lookup.
352 */
353 do {
354 gfp_t flags;
355
356 /* While we can still reduce the allocation size, don't
357 * raise a warning and allow the allocation to fail.
358 * On the last pass though, we want to try as hard
359 * as possible to perform the allocation and warn
360 * if it fails.
361 */
362 flags = GFP_KERNEL(0x0001 | 0x0004);
363 if (size > 1)
364 flags |= __GFP_NORETRY0 | __GFP_NOWARN0;
365
366 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
367 flags);
368 if (eb->buckets)
369 break;
370 } while (--size);
371
372 if (unlikely(!size)__builtin_expect(!!(!size), 0))
373 return -ENOMEM12;
374
375 eb->lut_size = size;
376 } else {
377 eb->lut_size = -eb->buffer_count;
378 }
379
380 return 0;
381}
382
383static bool_Bool
384eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
385 const struct i915_vma *vma,
386 unsigned int flags)
387{
388 if (vma->node.size < entry->pad_to_size)
389 return true1;
390
391 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)(((vma->node.start) & ((entry->alignment) - 1)) == 0
)
)
392 return true1;
393
394 if (flags & EXEC_OBJECT_PINNED(1<<4) &&
395 vma->node.start != entry->offset)
396 return true1;
397
398 if (flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (26)) &&
399 vma->node.start < BATCH_OFFSET_BIAS(256*1024))
400 return true1;
401
402 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)) &&
403 (vma->node.start + vma->node.size + 4095) >> 32)
404 return true1;
405
406 if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (27)) &&
407 !i915_vma_is_map_and_fenceable(vma))
408 return true1;
409
410 return false0;
411}
412
413static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
414 unsigned int exec_flags)
415{
416 u64 pin_flags = 0;
417
418 if (exec_flags & EXEC_OBJECT_NEEDS_GTT(1<<1))
419 pin_flags |= PIN_GLOBAL(1ULL << (10));
420
421 /*
422 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
423 * limit address to the first 4GBs for unflagged objects.
424 */
425 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
426 pin_flags |= PIN_ZONE_4G(1ULL << (4));
427
428 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (27)))
429 pin_flags |= PIN_MAPPABLE(1ULL << (3));
430
431 if (exec_flags & EXEC_OBJECT_PINNED(1<<4))
432 pin_flags |= entry->offset | PIN_OFFSET_FIXED(1ULL << (7));
433 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (26)))
434 pin_flags |= BATCH_OFFSET_BIAS(256*1024) | PIN_OFFSET_BIAS(1ULL << (6));
435
436 return pin_flags;
437}
438
439static inline int
440eb_pin_vma(struct i915_execbuffer *eb,
441 const struct drm_i915_gem_exec_object2 *entry,
442 struct eb_vma *ev)
443{
444 struct i915_vma *vma = ev->vma;
445 u64 pin_flags;
446 int err;
447
448 if (vma->node.size)
449 pin_flags = vma->node.start;
450 else
451 pin_flags = entry->offset & PIN_OFFSET_MASK-(1ULL << (12));
452
453 pin_flags |= PIN_USER(1ULL << (11)) | PIN_NOEVICT(1ULL << (0)) | PIN_OFFSET_FIXED(1ULL << (7)) | PIN_VALIDATE(1ULL << (8));
454 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)__builtin_expect(!!(ev->flags & (1<<1)), 0))
455 pin_flags |= PIN_GLOBAL(1ULL << (10));
456
457 /* Attempt to reuse the current location if available */
458 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags);
459 if (err == -EDEADLK11)
460 return err;
461
462 if (unlikely(err)__builtin_expect(!!(err), 0)) {
463 if (entry->flags & EXEC_OBJECT_PINNED(1<<4))
464 return err;
465
466 /* Failing that pick any _free_ space if suitable */
467 err = i915_vma_pin_ww(vma, &eb->ww,
468 entry->pad_to_size,
469 entry->alignment,
470 eb_pin_flags(entry, ev->flags) |
471 PIN_USER(1ULL << (11)) | PIN_NOEVICT(1ULL << (0)) | PIN_VALIDATE(1ULL << (8)));
472 if (unlikely(err)__builtin_expect(!!(err), 0))
473 return err;
474 }
475
476 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
477 err = i915_vma_pin_fence(vma);
478 if (unlikely(err)__builtin_expect(!!(err), 0))
479 return err;
480
481 if (vma->fence)
482 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (29));
483 }
484
485 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (30));
486 if (eb_vma_misplaced(entry, vma, ev->flags))
487 return -EBADSLT22;
488
489 return 0;
490}
491
492static inline void
493eb_unreserve_vma(struct eb_vma *ev)
494{
495 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)__builtin_expect(!!(ev->flags & (1UL << (29))), 0
)
)
496 __i915_vma_unpin_fence(ev->vma);
497
498 ev->flags &= ~__EXEC_OBJECT_RESERVED((1UL << (30)) | (1UL << (29)));
499}
500
501static int
502eb_validate_vma(struct i915_execbuffer *eb,
503 struct drm_i915_gem_exec_object2 *entry,
504 struct i915_vma *vma)
505{
506 /* Relocations are disallowed for all platforms after TGL-LP. This
507 * also covers all platforms with local memory.
508 */
509 if (entry->relocation_count &&
510 GRAPHICS_VER(eb->i915)((&(eb->i915)->__runtime)->graphics.ip.ver) >= 12 && !IS_TIGERLAKE(eb->i915)IS_PLATFORM(eb->i915, INTEL_TIGERLAKE))
511 return -EINVAL22;
512
513 if (unlikely(entry->flags & eb->invalid_flags)__builtin_expect(!!(entry->flags & eb->invalid_flags
), 0)
)
514 return -EINVAL22;
515
516 if (unlikely(entry->alignment &&__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
517 !is_power_of_2_u64(entry->alignment))__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
)
518 return -EINVAL22;
519
520 /*
521 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
522 * any non-page-aligned or non-canonical addresses.
523 */
524 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
525 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
)
526 return -EINVAL22;
527
528 /* pad_to_size was once a reserved field, so sanitize it */
529 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE(1<<5)) {
530 if (unlikely(offset_in_page(entry->pad_to_size))__builtin_expect(!!(((vaddr_t)(entry->pad_to_size) & (
(1 << 12) - 1))), 0)
)
531 return -EINVAL22;
532 } else {
533 entry->pad_to_size = 0;
534 }
535 /*
536 * From drm_mm perspective address space is continuous,
537 * so from this point we're always using non-canonical
538 * form internally.
539 */
540 entry->offset = gen8_noncanonical_addr(entry->offset);
541
542 if (!eb->reloc_cache.has_fence) {
543 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE(1<<0);
544 } else {
545 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE(1<<0) ||
546 eb->reloc_cache.needs_unfenced) &&
547 i915_gem_object_is_tiled(vma->obj))
548 entry->flags |= EXEC_OBJECT_NEEDS_GTT(1<<1) | __EXEC_OBJECT_NEEDS_MAP(1UL << (27));
549 }
550
551 return 0;
552}
553
554static inline bool_Bool
555is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
556{
557 return eb->args->flags & I915_EXEC_BATCH_FIRST(1<<18) ?
558 buffer_idx < eb->num_batches :
559 buffer_idx >= eb->args->buffer_count - eb->num_batches;
560}
561
562static int
563eb_add_vma(struct i915_execbuffer *eb,
564 unsigned int *current_batch,
565 unsigned int i,
566 struct i915_vma *vma)
567{
568 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
569 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
570 struct eb_vma *ev = &eb->vma[i];
571
572 ev->vma = vma;
573 ev->exec = entry;
574 ev->flags = entry->flags;
575
576 if (eb->lut_size > 0) {
577 ev->handle = entry->handle;
578 hlist_add_head(&ev->node,
579 &eb->buckets[hash_32(entry->handle,
580 eb->lut_size)]);
581 }
582
583 if (entry->relocation_count)
584 list_add_tail(&ev->reloc_link, &eb->relocs);
585
586 /*
587 * SNA is doing fancy tricks with compressing batch buffers, which leads
588 * to negative relocation deltas. Usually that works out ok since the
589 * relocate address is still positive, except when the batch is placed
590 * very low in the GTT. Ensure this doesn't happen.
591 *
592 * Note that actual hangs have only been observed on gen7, but for
593 * paranoia do it everywhere.
594 */
595 if (is_batch_buffer(eb, i)) {
596 if (entry->relocation_count &&
597 !(ev->flags & EXEC_OBJECT_PINNED(1<<4)))
598 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS(1UL << (26));
599 if (eb->reloc_cache.has_fence)
600 ev->flags |= EXEC_OBJECT_NEEDS_FENCE(1<<0);
601
602 eb->batches[*current_batch] = ev;
603
604 if (unlikely(ev->flags & EXEC_OBJECT_WRITE)__builtin_expect(!!(ev->flags & (1<<2)), 0)) {
605 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
606 "Attempting to use self-modifying batch buffer\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
;
607 return -EINVAL22;
608 }
609
610 if (range_overflows_t(u64,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
611 eb->batch_start_offset,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
612 eb->args->batch_len,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
613 ev->vma->size)({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
) {
614 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Attempting to use out-of-bounds batch\n"
)
;
615 return -EINVAL22;
616 }
617
618 if (eb->args->batch_len == 0)
619 eb->batch_len[*current_batch] = ev->vma->size -
620 eb->batch_start_offset;
621 else
622 eb->batch_len[*current_batch] = eb->args->batch_len;
623 if (unlikely(eb->batch_len[*current_batch] == 0)__builtin_expect(!!(eb->batch_len[*current_batch] == 0), 0
)
) { /* impossible! */
624 drm_dbg(&i915->drm, "Invalid batch length\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Invalid batch length\n"
)
;
625 return -EINVAL22;
626 }
627
628 ++*current_batch;
629 }
630
631 return 0;
632}
633
634static inline int use_cpu_reloc(const struct reloc_cache *cache,
635 const struct drm_i915_gem_object *obj)
636{
637 if (!i915_gem_object_has_struct_page(obj))
638 return false0;
639
640 if (DBG_FORCE_RELOC0 == FORCE_CPU_RELOC)
641 return true1;
642
643 if (DBG_FORCE_RELOC0 == FORCE_GTT_RELOC)
644 return false0;
645
646 return (cache->has_llc ||
647 obj->cache_dirty ||
648 obj->cache_level != I915_CACHE_NONE);
649}
650
651static int eb_reserve_vma(struct i915_execbuffer *eb,
652 struct eb_vma *ev,
653 u64 pin_flags)
654{
655 struct drm_i915_gem_exec_object2 *entry = ev->exec;
656 struct i915_vma *vma = ev->vma;
657 int err;
658
659 if (drm_mm_node_allocated(&vma->node) &&
660 eb_vma_misplaced(entry, vma, ev->flags)) {
661 err = i915_vma_unbind(vma);
662 if (err)
663 return err;
664 }
665
666 err = i915_vma_pin_ww(vma, &eb->ww,
667 entry->pad_to_size, entry->alignment,
668 eb_pin_flags(entry, ev->flags) | pin_flags);
669 if (err)
670 return err;
671
672 if (entry->offset != vma->node.start) {
673 entry->offset = vma->node.start | UPDATE(1ULL << (7));
674 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
675 }
676
677 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
678 err = i915_vma_pin_fence(vma);
679 if (unlikely(err)__builtin_expect(!!(err), 0))
680 return err;
681
682 if (vma->fence)
683 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (29));
684 }
685
686 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (30));
687 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags))((void)0);
688
689 return 0;
690}
691
692static bool_Bool eb_unbind(struct i915_execbuffer *eb, bool_Bool force)
693{
694 const unsigned int count = eb->buffer_count;
695 unsigned int i;
696 struct list_head last;
697 bool_Bool unpinned = false0;
698
699 /* Resort *all* the objects into priority order */
700 INIT_LIST_HEAD(&eb->unbound);
701 INIT_LIST_HEAD(&last);
702
703 for (i = 0; i < count; i++) {
704 struct eb_vma *ev = &eb->vma[i];
705 unsigned int flags = ev->flags;
706
707 if (!force && flags & EXEC_OBJECT_PINNED(1<<4) &&
708 flags & __EXEC_OBJECT_HAS_PIN(1UL << (30)))
709 continue;
710
711 unpinned = true1;
712 eb_unreserve_vma(ev);
713
714 if (flags & EXEC_OBJECT_PINNED(1<<4))
715 /* Pinned must have their slot */
716 list_add(&ev->bind_link, &eb->unbound);
717 else if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (27)))
718 /* Map require the lowest 256MiB (aperture) */
719 list_add_tail(&ev->bind_link, &eb->unbound);
720 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
721 /* Prioritise 4GiB region for restricted bo */
722 list_add(&ev->bind_link, &last);
723 else
724 list_add_tail(&ev->bind_link, &last);
725 }
726
727 list_splice_tail(&last, &eb->unbound);
728 return unpinned;
729}
730
731static int eb_reserve(struct i915_execbuffer *eb)
732{
733 struct eb_vma *ev;
734 unsigned int pass;
735 int err = 0;
736 bool_Bool unpinned;
737
738 /*
739 * We have one more buffers that we couldn't bind, which could be due to
740 * various reasons. To resolve this we have 4 passes, with every next
741 * level turning the screws tighter:
742 *
743 * 0. Unbind all objects that do not match the GTT constraints for the
744 * execbuffer (fenceable, mappable, alignment etc). Bind all new
745 * objects. This avoids unnecessary unbinding of later objects in order
746 * to make room for the earlier objects *unless* we need to defragment.
747 *
748 * 1. Reorder the buffers, where objects with the most restrictive
749 * placement requirements go first (ignoring fixed location buffers for
750 * now). For example, objects needing the mappable aperture (the first
751 * 256M of GTT), should go first vs objects that can be placed just
752 * about anywhere. Repeat the previous pass.
753 *
754 * 2. Consider buffers that are pinned at a fixed location. Also try to
755 * evict the entire VM this time, leaving only objects that we were
756 * unable to lock. Try again to bind the buffers. (still using the new
757 * buffer order).
758 *
759 * 3. We likely have object lock contention for one or more stubborn
760 * objects in the VM, for which we need to evict to make forward
761 * progress (perhaps we are fighting the shrinker?). When evicting the
762 * VM this time around, anything that we can't lock we now track using
763 * the busy_bo, using the full lock (after dropping the vm->mutex to
764 * prevent deadlocks), instead of trylock. We then continue to evict the
765 * VM, this time with the stubborn object locked, which we can now
766 * hopefully unbind (if still bound in the VM). Repeat until the VM is
767 * evicted. Finally we should be able bind everything.
768 */
769 for (pass = 0; pass <= 3; pass++) {
770 int pin_flags = PIN_USER(1ULL << (11)) | PIN_VALIDATE(1ULL << (8));
771
772 if (pass == 0)
773 pin_flags |= PIN_NONBLOCK(1ULL << (2));
774
775 if (pass >= 1)
776 unpinned = eb_unbind(eb, pass >= 2);
777
778 if (pass == 2) {
779 err = mutex_lock_interruptible(&eb->context->vm->mutex);
780 if (!err) {
781 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL((void *)0));
782 mutex_unlock(&eb->context->vm->mutex)rw_exit_write(&eb->context->vm->mutex);
783 }
784 if (err)
785 return err;
786 }
787
788 if (pass == 3) {
789retry:
790 err = mutex_lock_interruptible(&eb->context->vm->mutex);
791 if (!err) {
792 struct drm_i915_gem_object *busy_bo = NULL((void *)0);
793
794 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
795 mutex_unlock(&eb->context->vm->mutex)rw_exit_write(&eb->context->vm->mutex);
796 if (err && busy_bo) {
797 err = i915_gem_object_lock(busy_bo, &eb->ww);
798 i915_gem_object_put(busy_bo);
799 if (!err)
800 goto retry;
801 }
802 }
803 if (err)
804 return err;
805 }
806
807 list_for_each_entry(ev, &eb->unbound, bind_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->bind_link
) *__mptr = ((&eb->unbound)->next); (__typeof(*ev)
*)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), bind_link
) );}); &ev->bind_link != (&eb->unbound); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->bind_link ) *__mptr
= (ev->bind_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), bind_link) );}))
{
808 err = eb_reserve_vma(eb, ev, pin_flags);
809 if (err)
810 break;
811 }
812
813 if (err != -ENOSPC28)
814 break;
815 }
816
817 return err;
818}
819
820static int eb_select_context(struct i915_execbuffer *eb)
821{
822 struct i915_gem_context *ctx;
823
824 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
825 if (unlikely(IS_ERR(ctx))__builtin_expect(!!(IS_ERR(ctx)), 0))
826 return PTR_ERR(ctx);
827
828 eb->gem_context = ctx;
829 if (i915_gem_context_has_full_ppgtt(ctx))
830 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT(1<<1);
831
832 return 0;
833}
834
835static int __eb_add_lut(struct i915_execbuffer *eb,
836 u32 handle, struct i915_vma *vma)
837{
838 struct i915_gem_context *ctx = eb->gem_context;
839 struct i915_lut_handle *lut;
840 int err;
841
842 lut = i915_lut_handle_alloc();
843 if (unlikely(!lut)__builtin_expect(!!(!lut), 0))
844 return -ENOMEM12;
845
846 i915_vma_get(vma);
847 if (!atomic_fetch_inc(&vma->open_count)__sync_fetch_and_add(&vma->open_count, 1))
848 i915_vma_reopen(vma);
849 lut->handle = handle;
850 lut->ctx = ctx;
851
852 /* Check that the context hasn't been closed in the meantime */
853 err = -EINTR4;
854 if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
855 if (likely(!i915_gem_context_is_closed(ctx))__builtin_expect(!!(!i915_gem_context_is_closed(ctx)), 1))
856 err = radix_tree_insert(&ctx->handles_vma, handle, vma);
857 else
858 err = -ENOENT2;
859 if (err == 0) { /* And nor has this handle */
860 struct drm_i915_gem_object *obj = vma->obj;
861
862 spin_lock(&obj->lut_lock)mtx_enter(&obj->lut_lock);
863 if (idr_find(&eb->file->object_idr, handle) == obj) {
864 list_add(&lut->obj_link, &obj->lut_list);
865 } else {
866 radix_tree_delete(&ctx->handles_vma, handle);
867 err = -ENOENT2;
868 }
869 spin_unlock(&obj->lut_lock)mtx_leave(&obj->lut_lock);
870 }
871 mutex_unlock(&ctx->lut_mutex)rw_exit_write(&ctx->lut_mutex);
872 }
873 if (unlikely(err)__builtin_expect(!!(err), 0))
874 goto err;
875
876 return 0;
877
878err:
879 i915_vma_close(vma);
880 i915_vma_put(vma);
881 i915_lut_handle_free(lut);
882 return err;
883}
884
885static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
886{
887 struct i915_address_space *vm = eb->context->vm;
888
889 do {
890 struct drm_i915_gem_object *obj;
891 struct i915_vma *vma;
892 int err;
893
894 rcu_read_lock();
895 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
896 if (likely(vma && vma->vm == vm)__builtin_expect(!!(vma && vma->vm == vm), 1))
897 vma = i915_vma_tryget(vma);
898 rcu_read_unlock();
899 if (likely(vma)__builtin_expect(!!(vma), 1))
900 return vma;
901
902 obj = i915_gem_object_lookup(eb->file, handle);
903 if (unlikely(!obj)__builtin_expect(!!(!obj), 0))
904 return ERR_PTR(-ENOENT2);
905
906 /*
907 * If the user has opted-in for protected-object tracking, make
908 * sure the object encryption can be used.
909 * We only need to do this when the object is first used with
910 * this context, because the context itself will be banned when
911 * the protected objects become invalid.
912 */
913 if (i915_gem_context_uses_protected_content(eb->gem_context) &&
914 i915_gem_object_is_protected(obj)) {
915 err = intel_pxp_key_check(&vm->gt->pxp, obj, true1);
916 if (err) {
917 i915_gem_object_put(obj);
918 return ERR_PTR(err);
919 }
920 }
921
922 vma = i915_vma_instance(obj, vm, NULL((void *)0));
923 if (IS_ERR(vma)) {
924 i915_gem_object_put(obj);
925 return vma;
926 }
927
928 err = __eb_add_lut(eb, handle, vma);
929 if (likely(!err)__builtin_expect(!!(!err), 1))
930 return vma;
931
932 i915_gem_object_put(obj);
933 if (err != -EEXIST17)
934 return ERR_PTR(err);
935 } while (1);
936}
937
938static int eb_lookup_vmas(struct i915_execbuffer *eb)
939{
940 unsigned int i, current_batch = 0;
941 int err = 0;
942
943 INIT_LIST_HEAD(&eb->relocs);
944
945 for (i = 0; i < eb->buffer_count; i++) {
946 struct i915_vma *vma;
947
948 vma = eb_lookup_vma(eb, eb->exec[i].handle);
949 if (IS_ERR(vma)) {
950 err = PTR_ERR(vma);
951 goto err;
952 }
953
954 err = eb_validate_vma(eb, &eb->exec[i], vma);
955 if (unlikely(err)__builtin_expect(!!(err), 0)) {
956 i915_vma_put(vma);
957 goto err;
958 }
959
960 err = eb_add_vma(eb, &current_batch, i, vma);
961 if (err)
962 return err;
963
964 if (i915_gem_object_is_userptr(vma->obj)) {
965 err = i915_gem_object_userptr_submit_init(vma->obj);
966 if (err) {
967 if (i + 1 < eb->buffer_count) {
968 /*
969 * Execbuffer code expects last vma entry to be NULL,
970 * since we already initialized this entry,
971 * set the next value to NULL or we mess up
972 * cleanup handling.
973 */
974 eb->vma[i + 1].vma = NULL((void *)0);
975 }
976
977 return err;
978 }
979
980 eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT(1UL << (28));
981 eb->args->flags |= __EXEC_USERPTR_USED(1UL << (29));
982 }
983 }
984
985 return 0;
986
987err:
988 eb->vma[i].vma = NULL((void *)0);
989 return err;
990}
991
992static int eb_lock_vmas(struct i915_execbuffer *eb)
993{
994 unsigned int i;
995 int err;
996
997 for (i = 0; i < eb->buffer_count; i++) {
998 struct eb_vma *ev = &eb->vma[i];
999 struct i915_vma *vma = ev->vma;
1000
1001 err = i915_gem_object_lock(vma->obj, &eb->ww);
1002 if (err)
1003 return err;
1004 }
1005
1006 return 0;
1007}
1008
1009static int eb_validate_vmas(struct i915_execbuffer *eb)
1010{
1011 unsigned int i;
1012 int err;
1013
1014 INIT_LIST_HEAD(&eb->unbound);
1015
1016 err = eb_lock_vmas(eb);
1017 if (err)
1018 return err;
1019
1020 for (i = 0; i < eb->buffer_count; i++) {
1021 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1022 struct eb_vma *ev = &eb->vma[i];
1023 struct i915_vma *vma = ev->vma;
1024
1025 err = eb_pin_vma(eb, entry, ev);
1026 if (err == -EDEADLK11)
1027 return err;
1028
1029 if (!err) {
1030 if (entry->offset != vma->node.start) {
1031 entry->offset = vma->node.start | UPDATE(1ULL << (7));
1032 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
1033 }
1034 } else {
1035 eb_unreserve_vma(ev);
1036
1037 list_add_tail(&ev->bind_link, &eb->unbound);
1038 if (drm_mm_node_allocated(&vma->node)) {
1039 err = i915_vma_unbind(vma);
1040 if (err)
1041 return err;
1042 }
1043 }
1044
1045 /* Reserve enough slots to accommodate composite fences */
1046 err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
1047 if (err)
1048 return err;
1049
1050 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&((void)0)
1051 eb_vma_misplaced(&eb->exec[i], vma, ev->flags))((void)0);
1052 }
1053
1054 if (!list_empty(&eb->unbound))
1055 return eb_reserve(eb);
1056
1057 return 0;
1058}
1059
1060static struct eb_vma *
1061eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
1062{
1063 if (eb->lut_size < 0) {
1064 if (handle >= -eb->lut_size)
1065 return NULL((void *)0);
1066 return &eb->vma[handle];
1067 } else {
1068 struct hlist_head *head;
1069 struct eb_vma *ev;
1070
1071 head = &eb->buckets[hash_32(handle, eb->lut_size)];
1072 hlist_for_each_entry(ev, head, node)for (ev = (((head)->first) ? ({ const __typeof( ((__typeof
(*ev) *)0)->node ) *__mptr = ((head)->first); (__typeof
(*ev) *)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), node
) );}) : ((void *)0)); ev != ((void *)0); ev = (((ev)->node
.next) ? ({ const __typeof( ((__typeof(*ev) *)0)->node ) *
__mptr = ((ev)->node.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), node) );}) : ((void *)0)
))
{
1073 if (ev->handle == handle)
1074 return ev;
1075 }
1076 return NULL((void *)0);
1077 }
1078}
1079
1080static void eb_release_vmas(struct i915_execbuffer *eb, bool_Bool final)
1081{
1082 const unsigned int count = eb->buffer_count;
1083 unsigned int i;
1084
1085 for (i = 0; i < count; i++) {
1086 struct eb_vma *ev = &eb->vma[i];
1087 struct i915_vma *vma = ev->vma;
1088
1089 if (!vma)
1090 break;
1091
1092 eb_unreserve_vma(ev);
1093
1094 if (final)
1095 i915_vma_put(vma);
1096 }
1097
1098 eb_capture_release(eb);
1099 eb_unpin_engine(eb);
1100}
1101
1102static void eb_destroy(const struct i915_execbuffer *eb)
1103{
1104 if (eb->lut_size > 0)
1105 kfree(eb->buckets);
1106}
1107
1108static inline u64
1109relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
1110 const struct i915_vma *target)
1111{
1112 return gen8_canonical_addr((int)reloc->delta + target->node.start);
1113}
1114
1115static void reloc_cache_init(struct reloc_cache *cache,
1116 struct drm_i915_privateinteldrm_softc *i915)
1117{
1118 cache->page = -1;
1119 cache->vaddr = 0;
1120 /* Must be a variable in the struct to allow GCC to unroll. */
1121 cache->graphics_ver = GRAPHICS_VER(i915)((&(i915)->__runtime)->graphics.ip.ver);
1122 cache->has_llc = HAS_LLC(i915)((&(i915)->__info)->has_llc);
1123 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915)((&(i915)->__info)->has_64bit_reloc);
1124 cache->has_fence = cache->graphics_ver < 4;
1125 cache->needs_unfenced = INTEL_INFO(i915)(&(i915)->__info)->unfenced_needs_alignment;
1126 cache->node.flags = 0;
1127
1128 cache->map = i915->agph;
1129 cache->iot = i915->bst;
1130}
1131
1132static inline void *unmask_page(unsigned long p)
1133{
1134 return (void *)(uintptr_t)(p & LINUX_PAGE_MASK(~((1 << 12) - 1)));
1135}
1136
1137static inline unsigned int unmask_flags(unsigned long p)
1138{
1139 return p & ~LINUX_PAGE_MASK(~((1 << 12) - 1));
1140}
1141
1142#define KMAP0x4 0x4 /* after CLFLUSH_FLAGS */
1143
1144static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
1145{
1146 struct drm_i915_privateinteldrm_softc *i915 =
1147 container_of(cache, struct i915_execbuffer, reloc_cache)({ const __typeof( ((struct i915_execbuffer *)0)->reloc_cache
) *__mptr = (cache); (struct i915_execbuffer *)( (char *)__mptr
- __builtin_offsetof(struct i915_execbuffer, reloc_cache) );
})
->i915;
1148 return to_gt(i915)->ggtt;
1149}
1150
1151static void reloc_cache_unmap(struct reloc_cache *cache)
1152{
1153 void *vaddr;
1154
1155 if (!cache->vaddr)
1156 return;
1157
1158 vaddr = unmask_page(cache->vaddr);
1159 if (cache->vaddr & KMAP0x4)
1160 kunmap_atomic(vaddr);
1161 else
1162#ifdef __linux__
1163 io_mapping_unmap_atomic((void __iomem *)vaddr);
1164#else
1165 agp_unmap_atomic(cache->map, cache->ioh);
1166#endif
1167}
1168
1169static void reloc_cache_remap(struct reloc_cache *cache,
1170 struct drm_i915_gem_object *obj)
1171{
1172 void *vaddr;
1173
1174 if (!cache->vaddr)
1175 return;
1176
1177 if (cache->vaddr & KMAP0x4) {
1178 struct vm_page *page = i915_gem_object_get_page(obj, cache->page);
1179
1180 vaddr = kmap_atomic(page);
1181 cache->vaddr = unmask_flags(cache->vaddr) |
1182 (unsigned long)vaddr;
1183 } else {
1184 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1185 unsigned long offset;
1186
1187 offset = cache->node.start;
1188 if (!drm_mm_node_allocated(&cache->node))
1189 offset += cache->page << PAGE_SHIFT12;
1190
1191#ifdef __linux__
1192 cache->vaddr = (unsigned long)
1193 io_mapping_map_atomic_wc(&ggtt->iomap, offset);
1194#else
1195 agp_map_atomic(cache->map, offset, &cache->ioh);
1196 cache->vaddr = (unsigned long)
1197 bus_space_vaddr(cache->iot, cache->ioh)((cache->iot)->vaddr((cache->ioh)));
1198#endif
1199 }
1200}
1201
1202static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
1203{
1204 void *vaddr;
1205
1206 if (!cache->vaddr)
1207 return;
1208
1209 vaddr = unmask_page(cache->vaddr);
1210 if (cache->vaddr & KMAP0x4) {
1211 struct drm_i915_gem_object *obj =
1212 (struct drm_i915_gem_object *)cache->node.mm;
1213 if (cache->vaddr & CLFLUSH_AFTER(1UL << (1)))
1214 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1215
1216 kunmap_atomic(vaddr);
1217 i915_gem_object_finish_access(obj);
1218 } else {
1219 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1220
1221 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1222#ifdef __linux__
1223 io_mapping_unmap_atomic((void __iomem *)vaddr);
1224#else
1225 agp_unmap_atomic(cache->map, cache->ioh);
1226#endif
1227
1228 if (drm_mm_node_allocated(&cache->node)) {
1229 ggtt->vm.clear_range(&ggtt->vm,
1230 cache->node.start,
1231 cache->node.size);
1232 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1233 drm_mm_remove_node(&cache->node);
1234 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1235 } else {
1236 i915_vma_unpin((struct i915_vma *)cache->node.mm);
1237 }
1238 }
1239
1240 cache->vaddr = 0;
1241 cache->page = -1;
1242}
1243
1244static void *reloc_kmap(struct drm_i915_gem_object *obj,
1245 struct reloc_cache *cache,
1246 unsigned long pageno)
1247{
1248 void *vaddr;
1249 struct vm_page *page;
1250
1251 if (cache->vaddr) {
1252 kunmap_atomic(unmask_page(cache->vaddr));
1253 } else {
1254 unsigned int flushes;
1255 int err;
1256
1257 err = i915_gem_object_prepare_write(obj, &flushes);
1258 if (err)
1259 return ERR_PTR(err);
1260
1261 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS)extern char _ctassert[(!(0x4 & ((1UL << (0)) | (1UL
<< (1))))) ? 1 : -1 ] __attribute__((__unused__))
;
1262 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK)extern char _ctassert[(!((0x4 | ((1UL << (0)) | (1UL <<
(1)))) & (~((1 << 12) - 1)))) ? 1 : -1 ] __attribute__
((__unused__))
;
1263
1264 cache->vaddr = flushes | KMAP0x4;
1265 cache->node.mm = (void *)obj;
1266 if (flushes)
1267 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1268 }
1269
1270 page = i915_gem_object_get_page(obj, pageno);
1271 if (!obj->mm.dirty)
1272 set_page_dirty(page)x86_atomic_clearbits_u32(&page->pg_flags, 0x00000008);
1273
1274 vaddr = kmap_atomic(page);
1275 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1276 cache->page = pageno;
1277
1278 return vaddr;
1279}
1280
1281static void *reloc_iomap(struct i915_vma *batch,
1282 struct i915_execbuffer *eb,
1283 unsigned long page)
1284{
1285 struct drm_i915_gem_object *obj = batch->obj;
1286 struct reloc_cache *cache = &eb->reloc_cache;
1287 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1288 unsigned long offset;
1289 void *vaddr;
1290
1291 if (cache->vaddr) {
1292 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1293#ifdef __linux__
1294 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1295#else
1296 agp_unmap_atomic(cache->map, cache->ioh);
1297#endif
1298 } else {
1299 struct i915_vma *vma = ERR_PTR(-ENODEV19);
1300 int err;
1301
1302 if (i915_gem_object_is_tiled(obj))
1303 return ERR_PTR(-EINVAL22);
1304
1305 if (use_cpu_reloc(cache, obj))
1306 return NULL((void *)0);
1307
1308 err = i915_gem_object_set_to_gtt_domain(obj, true1);
1309 if (err)
1310 return ERR_PTR(err);
1311
1312 /*
1313 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
1314 * VMA from the object list because we no longer pin.
1315 *
1316 * Only attempt to pin the batch buffer to ggtt if the current batch
1317 * is not inside ggtt, or the batch buffer is not misplaced.
1318 */
1319 if (!i915_is_ggtt(batch->vm)((batch->vm)->is_ggtt) ||
1320 !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE(1ULL << (3)))) {
1321 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL((void *)0), 0, 0,
1322 PIN_MAPPABLE(1ULL << (3)) |
1323 PIN_NONBLOCK(1ULL << (2)) /* NOWARN */ |
1324 PIN_NOEVICT(1ULL << (0)));
1325 }
1326
1327 if (vma == ERR_PTR(-EDEADLK11))
1328 return vma;
1329
1330 if (IS_ERR(vma)) {
1331 memset(&cache->node, 0, sizeof(cache->node))__builtin_memset((&cache->node), (0), (sizeof(cache->
node)))
;
1332 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1333 err = drm_mm_insert_node_in_range
1334 (&ggtt->vm.mm, &cache->node,
1335 PAGE_SIZE(1 << 12), 0, I915_COLOR_UNEVICTABLE(-1),
1336 0, ggtt->mappable_end,
1337 DRM_MM_INSERT_LOW);
1338 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1339 if (err) /* no inactive aperture space, use cpu reloc */
1340 return NULL((void *)0);
1341 } else {
1342 cache->node.start = vma->node.start;
1343 cache->node.mm = (void *)vma;
1344 }
1345 }
1346
1347 offset = cache->node.start;
1348 if (drm_mm_node_allocated(&cache->node)) {
1349 ggtt->vm.insert_page(&ggtt->vm,
1350 i915_gem_object_get_dma_address(obj, page),
1351 offset, I915_CACHE_NONE, 0);
1352 } else {
1353 offset += page << PAGE_SHIFT12;
1354 }
1355
1356#ifdef __linux__
1357 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1358 offset);
1359#else
1360 agp_map_atomic(cache->map, offset, &cache->ioh);
1361 vaddr = bus_space_vaddr(cache->iot, cache->ioh)((cache->iot)->vaddr((cache->ioh)));
1362#endif
1363 cache->page = page;
1364 cache->vaddr = (unsigned long)vaddr;
1365
1366 return vaddr;
1367}
1368
1369static void *reloc_vaddr(struct i915_vma *vma,
1370 struct i915_execbuffer *eb,
1371 unsigned long page)
1372{
1373 struct reloc_cache *cache = &eb->reloc_cache;
1374 void *vaddr;
1375
1376 if (cache->page == page) {
1377 vaddr = unmask_page(cache->vaddr);
1378 } else {
1379 vaddr = NULL((void *)0);
1380 if ((cache->vaddr & KMAP0x4) == 0)
1381 vaddr = reloc_iomap(vma, eb, page);
1382 if (!vaddr)
1383 vaddr = reloc_kmap(vma->obj, cache, page);
1384 }
1385
1386 return vaddr;
1387}
1388
1389static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1390{
1391 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))__builtin_expect(!!(flushes & ((1UL << (0)) | (1UL <<
(1)))), 0)
) {
1392 if (flushes & CLFLUSH_BEFORE(1UL << (0)))
1393 drm_clflush_virt_range(addr, sizeof(*addr));
1394
1395 *addr = value;
1396
1397 /*
1398 * Writes to the same cacheline are serialised by the CPU
1399 * (including clflush). On the write path, we only require
1400 * that it hits memory in an orderly fashion and place
1401 * mb barriers at the start and end of the relocation phase
1402 * to ensure ordering of clflush wrt to the system.
1403 */
1404 if (flushes & CLFLUSH_AFTER(1UL << (1)))
1405 drm_clflush_virt_range(addr, sizeof(*addr));
1406 } else
1407 *addr = value;
1408}
1409
1410static u64
1411relocate_entry(struct i915_vma *vma,
1412 const struct drm_i915_gem_relocation_entry *reloc,
1413 struct i915_execbuffer *eb,
1414 const struct i915_vma *target)
1415{
1416 u64 target_addr = relocation_target(reloc, target);
1417 u64 offset = reloc->offset;
1418 bool_Bool wide = eb->reloc_cache.use_64bit_reloc;
1419 void *vaddr;
1420
1421repeat:
1422 vaddr = reloc_vaddr(vma, eb,
1423 offset >> PAGE_SHIFT12);
1424 if (IS_ERR(vaddr))
1425 return PTR_ERR(vaddr);
1426
1427 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)))((void)0);
1428 clflush_write32(vaddr + offset_in_page(offset)((vaddr_t)(offset) & ((1 << 12) - 1)),
1429 lower_32_bits(target_addr)((u32)(target_addr)),
1430 eb->reloc_cache.vaddr);
1431
1432 if (wide) {
1433 offset += sizeof(u32);
1434 target_addr >>= 32;
1435 wide = false0;
1436 goto repeat;
1437 }
1438
1439 return target->node.start | UPDATE(1ULL << (7));
1440}
1441
1442static u64
1443eb_relocate_entry(struct i915_execbuffer *eb,
1444 struct eb_vma *ev,
1445 const struct drm_i915_gem_relocation_entry *reloc)
1446{
1447 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
1448 struct eb_vma *target;
1449 int err;
1450
1451 /* we've already hold a reference to all valid objects */
1452 target = eb_get_vma(eb, reloc->target_handle);
1453 if (unlikely(!target)__builtin_expect(!!(!target), 0))
1454 return -ENOENT2;
1455
1456 /* Validate that the target is in a valid r/w GPU domain */
1457 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))__builtin_expect(!!(reloc->write_domain & (reloc->write_domain
- 1)), 0)
) {
1458 drm_dbg(&i915->drm, "reloc with multiple write domains: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1459 "target %d offset %d "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1460 "read %08x write %08x",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1461 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1462 (int) reloc->offset,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1463 reloc->read_domains,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1464 reloc->write_domain)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1465 return -EINVAL22;
1466 }
1467 if (unlikely((reloc->write_domain | reloc->read_domains)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
1468 & ~I915_GEM_GPU_DOMAINS)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
) {
1469 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1470 "target %d offset %d "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1471 "read %08x write %08x",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1472 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1473 (int) reloc->offset,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1474 reloc->read_domains,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1475 reloc->write_domain)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1476 return -EINVAL22;
1477 }
1478
1479 if (reloc->write_domain) {
1480 target->flags |= EXEC_OBJECT_WRITE(1<<2);
1481
1482 /*
1483 * Sandybridge PPGTT errata: We need a global gtt mapping
1484 * for MI and pipe_control writes because the gpu doesn't
1485 * properly redirect them through the ppgtt for non_secure
1486 * batchbuffers.
1487 */
1488 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION0x00000010 &&
1489 GRAPHICS_VER(eb->i915)((&(eb->i915)->__runtime)->graphics.ip.ver) == 6 &&
1490 !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND((int)(1UL << (10))))) {
1491 struct i915_vma *vma = target->vma;
1492
1493 reloc_cache_unmap(&eb->reloc_cache);
1494 mutex_lock(&vma->vm->mutex)rw_enter_write(&vma->vm->mutex);
1495 err = i915_vma_bind(target->vma,
1496 target->vma->obj->cache_level,
1497 PIN_GLOBAL(1ULL << (10)), NULL((void *)0), NULL((void *)0));
1498 mutex_unlock(&vma->vm->mutex)rw_exit_write(&vma->vm->mutex);
1499 reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
1500 if (err)
1501 return err;
1502 }
1503 }
1504
1505 /*
1506 * If the relocation already has the right value in it, no
1507 * more work needs to be done.
1508 */
1509 if (!DBG_FORCE_RELOC0 &&
1510 gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
1511 return 0;
1512
1513 /* Check that the relocation address is valid... */
1514 if (unlikely(reloc->offset >__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
1515 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
) {
1516 drm_dbg(&i915->drm, "Relocation beyond object bounds: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1517 "target %d offset %d size %d.\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1518 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1519 (int)reloc->offset,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1520 (int)ev->vma->size)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
;
1521 return -EINVAL22;
1522 }
1523 if (unlikely(reloc->offset & 3)__builtin_expect(!!(reloc->offset & 3), 0)) {
1524 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1525 "target %d offset %d.\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1526 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1527 (int)reloc->offset)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
;
1528 return -EINVAL22;
1529 }
1530
1531 /*
1532 * If we write into the object, we need to force the synchronisation
1533 * barrier, either with an asynchronous clflush or if we executed the
1534 * patching using the GPU (though that should be serialised by the
1535 * timeline). To be completely sure, and since we are required to
1536 * do relocations we are already stalling, disable the user's opt
1537 * out of our synchronisation.
1538 */
1539 ev->flags &= ~EXEC_OBJECT_ASYNC(1<<6);
1540
1541 /* and update the user's relocation entry */
1542 return relocate_entry(ev->vma, reloc, eb, target->vma);
1543}
1544
1545static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
1546{
1547#define N_RELOC(x)((x) / sizeof(struct drm_i915_gem_relocation_entry)) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1548 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)((512) / sizeof(struct drm_i915_gem_relocation_entry))];
1549 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1550 struct drm_i915_gem_relocation_entry __user *urelocs =
1551 u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1552 unsigned long remain = entry->relocation_count;
1553
1554 if (unlikely(remain > N_RELOC(ULONG_MAX))__builtin_expect(!!(remain > ((0xffffffffffffffffUL) / sizeof
(struct drm_i915_gem_relocation_entry))), 0)
)
1555 return -EINVAL22;
1556
1557 /*
1558 * We must check that the entire relocation array is safe
1559 * to read. However, if the array is not writable the user loses
1560 * the updated relocation values.
1561 */
1562 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))__builtin_expect(!!(!access_ok(urelocs, remain * sizeof(*urelocs
))), 0)
)
1563 return -EFAULT14;
1564
1565 do {
1566 struct drm_i915_gem_relocation_entry *r = stack;
1567 unsigned int count =
1568 min_t(unsigned long, remain, ARRAY_SIZE(stack))({ unsigned long __min_a = (remain); unsigned long __min_b = (
(sizeof((stack)) / sizeof((stack)[0]))); __min_a < __min_b
? __min_a : __min_b; })
;
1569 unsigned int copied;
1570
1571 /*
1572 * This is the fast path and we cannot handle a pagefault
1573 * whilst holding the struct mutex lest the user pass in the
1574 * relocations contained within a mmaped bo. For in such a case
1575 * we, the page fault handler would call i915_gem_fault() and
1576 * we would try to acquire the struct mutex again. Obviously
1577 * this is bad and so lockdep complains vehemently.
1578 */
1579 pagefault_disable();
1580 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1581 pagefault_enable();
1582 if (unlikely(copied)__builtin_expect(!!(copied), 0)) {
1583 remain = -EFAULT14;
1584 goto out;
1585 }
1586
1587 remain -= count;
1588 do {
1589 u64 offset = eb_relocate_entry(eb, ev, r);
1590
1591 if (likely(offset == 0)__builtin_expect(!!(offset == 0), 1)) {
1592 } else if ((s64)offset < 0) {
1593 remain = (int)offset;
1594 goto out;
1595 } else {
1596 /*
1597 * Note that reporting an error now
1598 * leaves everything in an inconsistent
1599 * state as we have *already* changed
1600 * the relocation value inside the
1601 * object. As we have not changed the
1602 * reloc.presumed_offset or will not
1603 * change the execobject.offset, on the
1604 * call we may not rewrite the value
1605 * inside the object, leaving it
1606 * dangling and causing a GPU hang. Unless
1607 * userspace dynamically rebuilds the
1608 * relocations on each execbuf rather than
1609 * presume a static tree.
1610 *
1611 * We did previously check if the relocations
1612 * were writable (access_ok), an error now
1613 * would be a strange race with mprotect,
1614 * having already demonstrated that we
1615 * can read from this userspace address.
1616 */
1617 offset = gen8_canonical_addr(offset & ~UPDATE(1ULL << (7)));
1618 __put_user(offset,({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
1619 &urelocs[r - stack].presumed_offset)({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
;
1620 }
1621 } while (r++, --count);
1622 urelocs += ARRAY_SIZE(stack)(sizeof((stack)) / sizeof((stack)[0]));
1623 } while (remain);
1624out:
1625 reloc_cache_reset(&eb->reloc_cache, eb);
1626 return remain;
1627}
1628
1629static int
1630eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
1631{
1632 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1633 struct drm_i915_gem_relocation_entry *relocs =
1634 u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
1635 unsigned int i;
1636 int err;
1637
1638 for (i = 0; i < entry->relocation_count; i++) {
1639 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
1640
1641 if ((s64)offset < 0) {
1642 err = (int)offset;
1643 goto err;
1644 }
1645 }
1646 err = 0;
1647err:
1648 reloc_cache_reset(&eb->reloc_cache, eb);
1649 return err;
1650}
1651
1652static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1653{
1654 const char __user *addr, *end;
1655 unsigned long size;
1656 char __maybe_unused__attribute__((__unused__)) c;
1657
1658 size = entry->relocation_count;
1659 if (size == 0)
1660 return 0;
1661
1662 if (size > N_RELOC(ULONG_MAX)((0xffffffffffffffffUL) / sizeof(struct drm_i915_gem_relocation_entry
))
)
1663 return -EINVAL22;
1664
1665 addr = u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1666 size *= sizeof(struct drm_i915_gem_relocation_entry);
1667 if (!access_ok(addr, size))
1668 return -EFAULT14;
1669
1670 end = addr + size;
1671 for (; addr < end; addr += PAGE_SIZE(1 << 12)) {
1672 int err = __get_user(c, addr)-copyin((addr), &((c)), sizeof((c)));
1673 if (err)
1674 return err;
1675 }
1676 return __get_user(c, end - 1)-copyin((end - 1), &((c)), sizeof((c)));
1677}
1678
1679static int eb_copy_relocations(const struct i915_execbuffer *eb)
1680{
1681 struct drm_i915_gem_relocation_entry *relocs;
1682 const unsigned int count = eb->buffer_count;
1683 unsigned int i;
1684 int err;
1685
1686 for (i = 0; i < count; i++) {
1687 const unsigned int nreloc = eb->exec[i].relocation_count;
1688 struct drm_i915_gem_relocation_entry __user *urelocs;
1689 unsigned long size;
1690 unsigned long copied;
1691
1692 if (nreloc == 0)
1693 continue;
1694
1695 err = check_relocations(&eb->exec[i]);
1696 if (err)
1697 goto err;
1698
1699 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr)((void *)(uintptr_t)(eb->exec[i].relocs_ptr));
1700 size = nreloc * sizeof(*relocs);
1701
1702 relocs = kvmalloc_array(size, 1, GFP_KERNEL(0x0001 | 0x0004));
1703 if (!relocs) {
1704 err = -ENOMEM12;
1705 goto err;
1706 }
1707
1708 /* copy_from_user is limited to < 4GiB */
1709 copied = 0;
1710 do {
1711 unsigned int len =
1712 min_t(u64, BIT_ULL(31), size - copied)({ u64 __min_a = ((1ULL << (31))); u64 __min_b = (size -
copied); __min_a < __min_b ? __min_a : __min_b; })
;
1713
1714 if (__copy_from_user((char *)relocs + copied,
1715 (char __user *)urelocs + copied,
1716 len))
1717 goto end;
1718
1719 copied += len;
1720 } while (copied < size);
1721
1722 /*
1723 * As we do not update the known relocation offsets after
1724 * relocating (due to the complexities in lock handling),
1725 * we need to mark them as invalid now so that we force the
1726 * relocation processing next time. Just in case the target
1727 * object is evicted and then rebound into its old
1728 * presumed_offset before the next execbuffer - if that
1729 * happened we would make the mistake of assuming that the
1730 * relocations were valid.
1731 */
1732 if (!user_access_begin(urelocs, size)access_ok(urelocs, size))
1733 goto end;
1734
1735 for (copied = 0; copied < nreloc; copied++)
1736 unsafe_put_user(-1,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1737 &urelocs[copied].presumed_offset,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1738 end_user)({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
;
1739 user_access_end();
1740
1741 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1742 }
1743
1744 return 0;
1745
1746end_user:
1747 user_access_end();
1748end:
1749 kvfree(relocs);
1750 err = -EFAULT14;
1751err:
1752 while (i--) {
1753 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(eb->exec[i].relocs_ptr
); })
;
1754 if (eb->exec[i].relocation_count)
1755 kvfree(relocs);
1756 }
1757 return err;
1758}
1759
1760static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1761{
1762 const unsigned int count = eb->buffer_count;
1763 unsigned int i;
1764
1765 for (i = 0; i < count; i++) {
1766 int err;
1767
1768 err = check_relocations(&eb->exec[i]);
1769 if (err)
1770 return err;
1771 }
1772
1773 return 0;
1774}
1775
1776static int eb_reinit_userptr(struct i915_execbuffer *eb)
1777{
1778 const unsigned int count = eb->buffer_count;
1779 unsigned int i;
1780 int ret;
1781
1782 if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))__builtin_expect(!!(!(eb->args->flags & (1UL <<
(29)))), 1)
)
1783 return 0;
1784
1785 for (i = 0; i < count; i++) {
1786 struct eb_vma *ev = &eb->vma[i];
1787
1788 if (!i915_gem_object_is_userptr(ev->vma->obj))
1789 continue;
1790
1791 ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
1792 if (ret)
1793 return ret;
1794
1795 ev->flags |= __EXEC_OBJECT_USERPTR_INIT(1UL << (28));
1796 }
1797
1798 return 0;
1799}
1800
1801static noinline__attribute__((__noinline__)) int eb_relocate_parse_slow(struct i915_execbuffer *eb)
1802{
1803 bool_Bool have_copy = false0;
1804 struct eb_vma *ev;
1805 int err = 0;
1806
1807repeat:
1808 if (signal_pending(current)(((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" :
"=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_curproc)->p_siglist | (({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p
->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_curproc)->p_sigmask)
) {
1809 err = -ERESTARTSYS4;
1810 goto out;
1811 }
1812
1813 /* We may process another execbuffer during the unlock... */
1814 eb_release_vmas(eb, false0);
1815 i915_gem_ww_ctx_fini(&eb->ww);
1816
1817 /*
1818 * We take 3 passes through the slowpatch.
1819 *
1820 * 1 - we try to just prefault all the user relocation entries and
1821 * then attempt to reuse the atomic pagefault disabled fast path again.
1822 *
1823 * 2 - we copy the user entries to a local buffer here outside of the
1824 * local and allow ourselves to wait upon any rendering before
1825 * relocations
1826 *
1827 * 3 - we already have a local copy of the relocation entries, but
1828 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1829 */
1830 if (!err) {
1831 err = eb_prefault_relocations(eb);
1832 } else if (!have_copy) {
1833 err = eb_copy_relocations(eb);
1834 have_copy = err == 0;
1835 } else {
1836 cond_resched()do { if (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0"
: "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_schedstate.spc_schedflags & 0x0002) yield
(); } while (0)
;
1837 err = 0;
1838 }
1839
1840 if (!err)
1841 err = eb_reinit_userptr(eb);
1842
1843 i915_gem_ww_ctx_init(&eb->ww, true1);
1844 if (err)
1845 goto out;
1846
1847 /* reacquire the objects */
1848repeat_validate:
1849 err = eb_pin_engine(eb, false0);
1850 if (err)
1851 goto err;
1852
1853 err = eb_validate_vmas(eb);
1854 if (err)
1855 goto err;
1856
1857 GEM_BUG_ON(!eb->batches[0])((void)0);
1858
1859 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
1860 if (!have_copy) {
1861 err = eb_relocate_vma(eb, ev);
1862 if (err)
1863 break;
1864 } else {
1865 err = eb_relocate_vma_slow(eb, ev);
1866 if (err)
1867 break;
1868 }
1869 }
1870
1871 if (err == -EDEADLK11)
1872 goto err;
1873
1874 if (err && !have_copy)
1875 goto repeat;
1876
1877 if (err)
1878 goto err;
1879
1880 /* as last step, parse the command buffer */
1881 err = eb_parse(eb);
1882 if (err)
1883 goto err;
1884
1885 /*
1886 * Leave the user relocations as are, this is the painfully slow path,
1887 * and we want to avoid the complication of dropping the lock whilst
1888 * having buffers reserved in the aperture and so causing spurious
1889 * ENOSPC for random operations.
1890 */
1891
1892err:
1893 if (err == -EDEADLK11) {
1894 eb_release_vmas(eb, false0);
1895 err = i915_gem_ww_ctx_backoff(&eb->ww);
1896 if (!err)
1897 goto repeat_validate;
1898 }
1899
1900 if (err == -EAGAIN35)
1901 goto repeat;
1902
1903out:
1904 if (have_copy) {
1905 const unsigned int count = eb->buffer_count;
1906 unsigned int i;
1907
1908 for (i = 0; i < count; i++) {
1909 const struct drm_i915_gem_exec_object2 *entry =
1910 &eb->exec[i];
1911 struct drm_i915_gem_relocation_entry *relocs;
1912
1913 if (!entry->relocation_count)
1914 continue;
1915
1916 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
1917 kvfree(relocs);
1918 }
1919 }
1920
1921 return err;
1922}
1923
1924static int eb_relocate_parse(struct i915_execbuffer *eb)
1925{
1926 int err;
1927 bool_Bool throttle = true1;
1928
1929retry:
1930 err = eb_pin_engine(eb, throttle);
1931 if (err) {
1932 if (err != -EDEADLK11)
1933 return err;
1934
1935 goto err;
1936 }
1937
1938 /* only throttle once, even if we didn't need to throttle */
1939 throttle = false0;
1940
1941 err = eb_validate_vmas(eb);
1942 if (err == -EAGAIN35)
1943 goto slow;
1944 else if (err)
1945 goto err;
1946
1947 /* The objects are in their final locations, apply the relocations. */
1948 if (eb->args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
1949 struct eb_vma *ev;
1950
1951 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
1952 err = eb_relocate_vma(eb, ev);
1953 if (err)
1954 break;
1955 }
1956
1957 if (err == -EDEADLK11)
1958 goto err;
1959 else if (err)
1960 goto slow;
1961 }
1962
1963 if (!err)
1964 err = eb_parse(eb);
1965
1966err:
1967 if (err == -EDEADLK11) {
1968 eb_release_vmas(eb, false0);
1969 err = i915_gem_ww_ctx_backoff(&eb->ww);
1970 if (!err)
1971 goto retry;
1972 }
1973
1974 return err;
1975
1976slow:
1977 err = eb_relocate_parse_slow(eb);
1978 if (err)
1979 /*
1980 * If the user expects the execobject.offset and
1981 * reloc.presumed_offset to be an exact match,
1982 * as for using NO_RELOC, then we cannot update
1983 * the execobject.offset until we have completed
1984 * relocation.
1985 */
1986 eb->args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
1987
1988 return err;
1989}
1990
1991/*
1992 * Using two helper loops for the order of which requests / batches are created
1993 * and added the to backend. Requests are created in order from the parent to
1994 * the last child. Requests are added in the reverse order, from the last child
1995 * to parent. This is done for locking reasons as the timeline lock is acquired
1996 * during request creation and released when the request is added to the
1997 * backend. To make lockdep happy (see intel_context_timeline_lock) this must be
1998 * the ordering.
1999 */
2000#define for_each_batch_create_order(_eb, _i)for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i)) \
2001 for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i))
2002#define for_each_batch_add_order(_eb, _i)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i
))
\
2003 BUILD_BUG_ON(!typecheck(int, _i))extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
))
; \
2004 for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))
2005
2006static struct i915_request *
2007eb_find_first_request_added(struct i915_execbuffer *eb)
2008{
2009 int i;
2010
2011 for_each_batch_add_order(eb, i)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((i) = (eb)->num_batches - 1; (i) >= 0; --(i))
2012 if (eb->requests[i])
2013 return eb->requests[i];
2014
2015 GEM_BUG_ON("Request not found")((void)0);
2016
2017 return NULL((void *)0);
2018}
2019
2020#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1
2021
2022/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
2023static int eb_capture_stage(struct i915_execbuffer *eb)
2024{
2025 const unsigned int count = eb->buffer_count;
2026 unsigned int i = count, j;
2027
2028 while (i--) {
2029 struct eb_vma *ev = &eb->vma[i];
2030 struct i915_vma *vma = ev->vma;
2031 unsigned int flags = ev->flags;
2032
2033 if (!(flags & EXEC_OBJECT_CAPTURE(1<<7)))
2034 continue;
2035
2036 if (i915_gem_context_is_recoverable(eb->gem_context) &&
2037 (IS_DGFX(eb->i915)((&(eb->i915)->__info)->is_dgfx) || GRAPHICS_VER_FULL(eb->i915)(((&(eb->i915)->__runtime)->graphics.ip.ver) <<
8 | ((&(eb->i915)->__runtime)->graphics.ip.rel)
)
> IP_VER(12, 0)((12) << 8 | (0))))
2038 return -EINVAL22;
2039
2040 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2041 struct i915_capture_list *capture;
2042
2043 capture = kmalloc(sizeof(*capture), GFP_KERNEL(0x0001 | 0x0004));
2044 if (!capture)
2045 continue;
2046
2047 capture->next = eb->capture_lists[j];
2048 capture->vma_res = i915_vma_resource_get(vma->resource);
2049 eb->capture_lists[j] = capture;
2050 }
2051 }
2052
2053 return 0;
2054}
2055
2056/* Commit once we're in the critical path */
2057static void eb_capture_commit(struct i915_execbuffer *eb)
2058{
2059 unsigned int j;
2060
2061 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2062 struct i915_request *rq = eb->requests[j];
2063
2064 if (!rq)
2065 break;
2066
2067 rq->capture_list = eb->capture_lists[j];
2068 eb->capture_lists[j] = NULL((void *)0);
2069 }
2070}
2071
2072/*
2073 * Release anything that didn't get committed due to errors.
2074 * The capture_list will otherwise be freed at request retire.
2075 */
2076static void eb_capture_release(struct i915_execbuffer *eb)
2077{
2078 unsigned int j;
2079
2080 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2081 if (eb->capture_lists[j]) {
2082 i915_request_free_capture_list(eb->capture_lists[j]);
2083 eb->capture_lists[j] = NULL((void *)0);
2084 }
2085 }
2086}
2087
2088static void eb_capture_list_clear(struct i915_execbuffer *eb)
2089{
2090 memset(eb->capture_lists, 0, sizeof(eb->capture_lists))__builtin_memset((eb->capture_lists), (0), (sizeof(eb->
capture_lists)))
;
2091}
2092
2093#else
2094
2095static int eb_capture_stage(struct i915_execbuffer *eb)
2096{
2097 return 0;
2098}
2099
2100static void eb_capture_commit(struct i915_execbuffer *eb)
2101{
2102}
2103
2104static void eb_capture_release(struct i915_execbuffer *eb)
2105{
2106}
2107
2108static void eb_capture_list_clear(struct i915_execbuffer *eb)
2109{
2110}
2111
2112#endif
2113
2114static int eb_move_to_gpu(struct i915_execbuffer *eb)
2115{
2116 const unsigned int count = eb->buffer_count;
2117 unsigned int i = count;
2118 int err = 0, j;
2119
2120 while (i--) {
2121 struct eb_vma *ev = &eb->vma[i];
2122 struct i915_vma *vma = ev->vma;
2123 unsigned int flags = ev->flags;
2124 struct drm_i915_gem_object *obj = vma->obj;
2125
2126 assert_vma_held(vma)do { (void)(&((vma)->obj->base.resv)->lock.base)
; } while(0)
;
2127
2128 /*
2129 * If the GPU is not _reading_ through the CPU cache, we need
2130 * to make sure that any writes (both previous GPU writes from
2131 * before a change in snooping levels and normal CPU writes)
2132 * caught in that cache are flushed to main memory.
2133 *
2134 * We want to say
2135 * obj->cache_dirty &&
2136 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
2137 * but gcc's optimiser doesn't handle that as well and emits
2138 * two jumps instead of one. Maybe one day...
2139 *
2140 * FIXME: There is also sync flushing in set_pages(), which
2141 * serves a different purpose(some of the time at least).
2142 *
2143 * We should consider:
2144 *
2145 * 1. Rip out the async flush code.
2146 *
2147 * 2. Or make the sync flushing use the async clflush path
2148 * using mandatory fences underneath. Currently the below
2149 * async flush happens after we bind the object.
2150 */
2151 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)__builtin_expect(!!(obj->cache_dirty & ~obj->cache_coherent
), 0)
) {
2152 if (i915_gem_clflush_object(obj, 0))
2153 flags &= ~EXEC_OBJECT_ASYNC(1<<6);
2154 }
2155
2156 /* We only need to await on the first request */
2157 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC(1<<6))) {
2158 err = i915_request_await_object
2159 (eb_find_first_request_added(eb), obj,
2160 flags & EXEC_OBJECT_WRITE(1<<2));
2161 }
2162
2163 for_each_batch_add_order(eb, j)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((j) = (eb)->num_batches - 1; (j) >= 0; --(j))
{
2164 if (err)
2165 break;
2166 if (!eb->requests[j])
2167 continue;
2168
2169 err = _i915_vma_move_to_active(vma, eb->requests[j],
2170 j ? NULL((void *)0) :
2171 eb->composite_fence ?
2172 eb->composite_fence :
2173 &eb->requests[j]->fence,
2174 flags | __EXEC_OBJECT_NO_RESERVE(1UL << (31)));
2175 }
2176 }
2177
2178#ifdef CONFIG_MMU_NOTIFIER
2179 if (!err && (eb->args->flags & __EXEC_USERPTR_USED(1UL << (29)))) {
2180 read_lock(&eb->i915->mm.notifier_lock)mtx_enter(&eb->i915->mm.notifier_lock);
2181
2182 /*
2183 * count is always at least 1, otherwise __EXEC_USERPTR_USED
2184 * could not have been set
2185 */
2186 for (i = 0; i < count; i++) {
2187 struct eb_vma *ev = &eb->vma[i];
2188 struct drm_i915_gem_object *obj = ev->vma->obj;
2189
2190 if (!i915_gem_object_is_userptr(obj))
2191 continue;
2192
2193 err = i915_gem_object_userptr_submit_done(obj);
2194 if (err)
2195 break;
2196 }
2197
2198 read_unlock(&eb->i915->mm.notifier_lock)mtx_leave(&eb->i915->mm.notifier_lock);
2199 }
2200#endif
2201
2202 if (unlikely(err)__builtin_expect(!!(err), 0))
2203 goto err_skip;
2204
2205 /* Unconditionally flush any chipset caches (for streaming writes). */
2206 intel_gt_chipset_flush(eb->gt);
2207 eb_capture_commit(eb);
2208
2209 return 0;
2210
2211err_skip:
2212 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2213 if (!eb->requests[j])
2214 break;
2215
2216 i915_request_set_error_once(eb->requests[j], err);
2217 }
2218 return err;
2219}
2220
2221static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
2222{
2223 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
)
2224 return -EINVAL22;
2225
2226 /* Kernel clipping was a DRI1 misfeature */
2227 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY(1<<19) |
2228 I915_EXEC_USE_EXTENSIONS(1 << 21)))) {
2229 if (exec->num_cliprects || exec->cliprects_ptr)
2230 return -EINVAL22;
2231 }
2232
2233 if (exec->DR4 == 0xffffffff) {
2234 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n")___drm_dbg(((void *)0), DRM_UT_CORE, "UXA submitting garbage DR4, fixing up\n"
)
;
2235 exec->DR4 = 0;
2236 }
2237 if (exec->DR1 || exec->DR4)
2238 return -EINVAL22;
2239
2240 if ((exec->batch_start_offset | exec->batch_len) & 0x7)
2241 return -EINVAL22;
2242
2243 return 0;
2244}
2245
2246static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
2247{
2248 u32 *cs;
2249 int i;
2250
2251 if (GRAPHICS_VER(rq->engine->i915)((&(rq->engine->i915)->__runtime)->graphics.ip
.ver)
!= 7 || rq->engine->id != RCS0) {
2252 drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n")__drm_dev_dbg(((void *)0), (&rq->engine->i915->drm
) ? (&rq->engine->i915->drm)->dev : ((void *)
0), DRM_UT_DRIVER, "sol reset is gen7/rcs only\n")
;
2253 return -EINVAL22;
2254 }
2255
2256 cs = intel_ring_begin(rq, 4 * 2 + 2);
2257 if (IS_ERR(cs))
2258 return PTR_ERR(cs);
2259
2260 *cs++ = MI_LOAD_REGISTER_IMM(4)(((0x0) << 29) | (0x22) << 23 | (2*(4)-1));
2261 for (i = 0; i < 4; i++) {
2262 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)((const i915_reg_t){ .reg = (0x5280 + (i) * 4) }));
2263 *cs++ = 0;
2264 }
2265 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
2266 intel_ring_advance(rq, cs);
2267
2268 return 0;
2269}
2270
2271static struct i915_vma *
2272shadow_batch_pin(struct i915_execbuffer *eb,
2273 struct drm_i915_gem_object *obj,
2274 struct i915_address_space *vm,
2275 unsigned int flags)
2276{
2277 struct i915_vma *vma;
2278 int err;
2279
2280 vma = i915_vma_instance(obj, vm, NULL((void *)0));
2281 if (IS_ERR(vma))
2282 return vma;
2283
2284 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE(1ULL << (8)));
2285 if (err)
2286 return ERR_PTR(err);
2287
2288 return vma;
2289}
2290
2291static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
2292{
2293 /*
2294 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2295 * batch" bit. Hence we need to pin secure batches into the global gtt.
2296 * hsw should have this fixed, but bdw mucks it up again. */
2297 if (eb->batch_flags & I915_DISPATCH_SECURE(1UL << (0)))
2298 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL((void *)0), 0, 0, PIN_VALIDATE(1ULL << (8)));
2299
2300 return NULL((void *)0);
2301}
2302
2303static int eb_parse(struct i915_execbuffer *eb)
2304{
2305 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2306 struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
2307 struct i915_vma *shadow, *trampoline, *batch;
2308 unsigned long len;
2309 int err;
2310
2311 if (!eb_use_cmdparser(eb)) {
2312 batch = eb_dispatch_secure(eb, eb->batches[0]->vma);
2313 if (IS_ERR(batch))
2314 return PTR_ERR(batch);
2315
2316 goto secure_batch;
2317 }
2318
2319 if (intel_context_is_parallel(eb->context))
2320 return -EINVAL22;
2321
2322 len = eb->batch_len[0];
2323 if (!CMDPARSER_USES_GGTT(eb->i915)(((&(eb->i915)->__runtime)->graphics.ip.ver) == 7
)
) {
2324 /*
2325 * ppGTT backed shadow buffers must be mapped RO, to prevent
2326 * post-scan tampering
2327 */
2328 if (!eb->context->vm->has_read_only) {
2329 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
2330 "Cannot prevent post-scan tampering without RO capable vm\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
;
2331 return -EINVAL22;
2332 }
2333 } else {
2334 len += I915_CMD_PARSER_TRAMPOLINE_SIZE8;
2335 }
2336 if (unlikely(len < eb->batch_len[0])__builtin_expect(!!(len < eb->batch_len[0]), 0)) /* last paranoid check of overflow */
2337 return -EINVAL22;
2338
2339 if (!pool) {
2340 pool = intel_gt_get_buffer_pool(eb->gt, len,
2341 I915_MAP_WB);
2342 if (IS_ERR(pool))
2343 return PTR_ERR(pool);
2344 eb->batch_pool = pool;
2345 }
2346
2347 err = i915_gem_object_lock(pool->obj, &eb->ww);
2348 if (err)
2349 return err;
2350
2351 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER(1ULL << (11)));
2352 if (IS_ERR(shadow))
2353 return PTR_ERR(shadow);
2354
2355 intel_gt_buffer_pool_mark_used(pool);
2356 i915_gem_object_set_readonly(shadow->obj);
2357 shadow->private = pool;
2358
2359 trampoline = NULL((void *)0);
2360 if (CMDPARSER_USES_GGTT(eb->i915)(((&(eb->i915)->__runtime)->graphics.ip.ver) == 7
)
) {
2361 trampoline = shadow;
2362
2363 shadow = shadow_batch_pin(eb, pool->obj,
2364 &eb->gt->ggtt->vm,
2365 PIN_GLOBAL(1ULL << (10)));
2366 if (IS_ERR(shadow))
2367 return PTR_ERR(shadow);
2368
2369 shadow->private = pool;
2370
2371 eb->batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
2372 }
2373
2374 batch = eb_dispatch_secure(eb, shadow);
2375 if (IS_ERR(batch))
2376 return PTR_ERR(batch);
2377
2378 err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
2379 if (err)
2380 return err;
2381
2382 err = intel_engine_cmd_parser(eb->context->engine,
2383 eb->batches[0]->vma,
2384 eb->batch_start_offset,
2385 eb->batch_len[0],
2386 shadow, trampoline);
2387 if (err)
2388 return err;
2389
2390 eb->batches[0] = &eb->vma[eb->buffer_count++];
2391 eb->batches[0]->vma = i915_vma_get(shadow);
2392 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN(1UL << (30));
2393
2394 eb->trampoline = trampoline;
2395 eb->batch_start_offset = 0;
2396
2397secure_batch:
2398 if (batch) {
2399 if (intel_context_is_parallel(eb->context))
2400 return -EINVAL22;
2401
2402 eb->batches[0] = &eb->vma[eb->buffer_count++];
2403 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN(1UL << (30));
2404 eb->batches[0]->vma = i915_vma_get(batch);
2405 }
2406 return 0;
2407}
2408
2409static int eb_request_submit(struct i915_execbuffer *eb,
2410 struct i915_request *rq,
2411 struct i915_vma *batch,
2412 u64 batch_len)
2413{
2414 int err;
2415
2416 if (intel_context_nopreempt(rq->context))
2417 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
2418
2419 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET(1<<8)) {
2420 err = i915_reset_gen7_sol_offsets(rq);
2421 if (err)
2422 return err;
2423 }
2424
2425 /*
2426 * After we completed waiting for other engines (using HW semaphores)
2427 * then we can signal that this request/batch is ready to run. This
2428 * allows us to determine if the batch is still waiting on the GPU
2429 * or actually running by checking the breadcrumb.
2430 */
2431 if (rq->context->engine->emit_init_breadcrumb) {
2432 err = rq->context->engine->emit_init_breadcrumb(rq);
2433 if (err)
2434 return err;
2435 }
2436
2437 err = rq->context->engine->emit_bb_start(rq,
2438 batch->node.start +
2439 eb->batch_start_offset,
2440 batch_len,
2441 eb->batch_flags);
2442 if (err)
2443 return err;
2444
2445 if (eb->trampoline) {
2446 GEM_BUG_ON(intel_context_is_parallel(rq->context))((void)0);
2447 GEM_BUG_ON(eb->batch_start_offset)((void)0);
2448 err = rq->context->engine->emit_bb_start(rq,
2449 eb->trampoline->node.start +
2450 batch_len, 0, 0);
2451 if (err)
2452 return err;
2453 }
2454
2455 return 0;
2456}
2457
2458static int eb_submit(struct i915_execbuffer *eb)
2459{
2460 unsigned int i;
2461 int err;
2462
2463 err = eb_move_to_gpu(eb);
2464
2465 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
2466 if (!eb->requests[i])
2467 break;
2468
2469 trace_i915_request_queue(eb->requests[i], eb->batch_flags);
2470 if (!err)
2471 err = eb_request_submit(eb, eb->requests[i],
2472 eb->batches[i]->vma,
2473 eb->batch_len[i]);
2474 }
2475
2476 return err;
2477}
2478
2479static int num_vcs_engines(struct drm_i915_privateinteldrm_softc *i915)
2480{
2481 return hweight_long(VDBOX_MASK(to_gt(i915))({ unsigned int first__ = (VCS0); unsigned int count__ = (8);
((to_gt(i915))->info.engine_mask & (((~0UL) >> (
64 - (first__ + count__ - 1) - 1)) & ((~0UL) << (first__
)))) >> first__; })
);
2482}
2483
2484/*
2485 * Find one BSD ring to dispatch the corresponding BSD command.
2486 * The engine index is returned.
2487 */
2488static unsigned int
2489gen8_dispatch_bsd_engine(struct drm_i915_privateinteldrm_softc *dev_priv,
2490 struct drm_file *file)
2491{
2492 struct drm_i915_file_private *file_priv = file->driver_priv;
2493
2494 /* Check whether the file_priv has already selected one ring. */
2495 if ((int)file_priv->bsd_engine < 0)
2496 file_priv->bsd_engine =
2497 prandom_u32_max(num_vcs_engines(dev_priv));
2498
2499 return file_priv->bsd_engine;
2500}
2501
2502static const enum intel_engine_id user_ring_map[] = {
2503 [I915_EXEC_DEFAULT(0<<0)] = RCS0,
2504 [I915_EXEC_RENDER(1<<0)] = RCS0,
2505 [I915_EXEC_BLT(3<<0)] = BCS0,
2506 [I915_EXEC_BSD(2<<0)] = VCS0,
2507 [I915_EXEC_VEBOX(4<<0)] = VECS0
2508};
2509
2510static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
2511{
2512 struct intel_ring *ring = ce->ring;
2513 struct intel_timeline *tl = ce->timeline;
2514 struct i915_request *rq;
2515
2516 /*
2517 * Completely unscientific finger-in-the-air estimates for suitable
2518 * maximum user request size (to avoid blocking) and then backoff.
2519 */
2520 if (intel_ring_update_space(ring) >= PAGE_SIZE(1 << 12))
2521 return NULL((void *)0);
2522
2523 /*
2524 * Find a request that after waiting upon, there will be at least half
2525 * the ring available. The hysteresis allows us to compete for the
2526 * shared ring and should mean that we sleep less often prior to
2527 * claiming our resources, but not so long that the ring completely
2528 * drains before we can submit our next request.
2529 */
2530 list_for_each_entry(rq, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}); &rq->link != (&tl->requests); rq = ({ const
__typeof( ((__typeof(*rq) *)0)->link ) *__mptr = (rq->
link.next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), link) );}))
{
2531 if (rq->ring != ring)
2532 continue;
2533
2534 if (__intel_ring_space(rq->postfix,
2535 ring->emit, ring->size) > ring->size / 2)
2536 break;
2537 }
2538 if (&rq->link == &tl->requests)
2539 return NULL((void *)0); /* weird, we will check again later for real */
2540
2541 return i915_request_get(rq);
2542}
2543
2544static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
2545 bool_Bool throttle)
2546{
2547 struct intel_timeline *tl;
2548 struct i915_request *rq = NULL((void *)0);
2549
2550 /*
2551 * Take a local wakeref for preparing to dispatch the execbuf as
2552 * we expect to access the hardware fairly frequently in the
2553 * process, and require the engine to be kept awake between accesses.
2554 * Upon dispatch, we acquire another prolonged wakeref that we hold
2555 * until the timeline is idle, which in turn releases the wakeref
2556 * taken on the engine, and the parent device.
2557 */
2558 tl = intel_context_timeline_lock(ce);
2559 if (IS_ERR(tl))
2560 return PTR_ERR(tl);
2561
2562 intel_context_enter(ce);
2563 if (throttle)
2564 rq = eb_throttle(eb, ce);
2565 intel_context_timeline_unlock(tl);
2566
2567 if (rq) {
2568#ifdef __linux__
2569 bool_Bool nonblock = eb->file->filp->f_flags & O_NONBLOCK0x0004;
2570#else
2571 bool_Bool nonblock = eb->file->filp->f_flag & FNONBLOCK0x0004;
2572#endif
2573 long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT(0x7fffffff);
2574
2575 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE(1UL << (0)),
2576 timeout) < 0) {
2577 i915_request_put(rq);
2578
2579 /*
2580 * Error path, cannot use intel_context_timeline_lock as
2581 * that is user interruptable and this clean up step
2582 * must be done.
2583 */
2584 mutex_lock(&ce->timeline->mutex)rw_enter_write(&ce->timeline->mutex);
2585 intel_context_exit(ce);
2586 mutex_unlock(&ce->timeline->mutex)rw_exit_write(&ce->timeline->mutex);
2587
2588 if (nonblock)
2589 return -EWOULDBLOCK35;
2590 else
2591 return -EINTR4;
2592 }
2593 i915_request_put(rq);
2594 }
2595
2596 return 0;
2597}
2598
2599static int eb_pin_engine(struct i915_execbuffer *eb, bool_Bool throttle)
2600{
2601 struct intel_context *ce = eb->context, *child;
2602 int err;
2603 int i = 0, j = 0;
2604
2605 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED)((void)0);
2606
2607 if (unlikely(intel_context_is_banned(ce))__builtin_expect(!!(intel_context_is_banned(ce)), 0))
2608 return -EIO5;
2609
2610 /*
2611 * Pinning the contexts may generate requests in order to acquire
2612 * GGTT space, so do this first before we reserve a seqno for
2613 * ourselves.
2614 */
2615 err = intel_context_pin_ww(ce, &eb->ww);
2616 if (err)
2617 return err;
2618 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2619 err = intel_context_pin_ww(child, &eb->ww);
2620 GEM_BUG_ON(err)((void)0); /* perma-pinned should incr a counter */
2621 }
2622
2623 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2624 err = eb_pin_timeline(eb, child, throttle);
2625 if (err)
2626 goto unwind;
2627 ++i;
2628 }
2629 err = eb_pin_timeline(eb, ce, throttle);
2630 if (err)
2631 goto unwind;
2632
2633 eb->args->flags |= __EXEC_ENGINE_PINNED(1UL << (30));
2634 return 0;
2635
2636unwind:
2637 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2638 if (j++ < i) {
2639 mutex_lock(&child->timeline->mutex)rw_enter_write(&child->timeline->mutex);
2640 intel_context_exit(child);
2641 mutex_unlock(&child->timeline->mutex)rw_exit_write(&child->timeline->mutex);
2642 }
2643 }
2644 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2645 intel_context_unpin(child);
2646 intel_context_unpin(ce);
2647 return err;
2648}
2649
2650static void eb_unpin_engine(struct i915_execbuffer *eb)
2651{
2652 struct intel_context *ce = eb->context, *child;
2653
2654 if (!(eb->args->flags & __EXEC_ENGINE_PINNED(1UL << (30))))
2655 return;
2656
2657 eb->args->flags &= ~__EXEC_ENGINE_PINNED(1UL << (30));
2658
2659 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2660 mutex_lock(&child->timeline->mutex)rw_enter_write(&child->timeline->mutex);
2661 intel_context_exit(child);
2662 mutex_unlock(&child->timeline->mutex)rw_exit_write(&child->timeline->mutex);
2663
2664 intel_context_unpin(child);
2665 }
2666
2667 mutex_lock(&ce->timeline->mutex)rw_enter_write(&ce->timeline->mutex);
2668 intel_context_exit(ce);
2669 mutex_unlock(&ce->timeline->mutex)rw_exit_write(&ce->timeline->mutex);
2670
2671 intel_context_unpin(ce);
2672}
2673
2674static unsigned int
2675eb_select_legacy_ring(struct i915_execbuffer *eb)
2676{
2677 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2678 struct drm_i915_gem_execbuffer2 *args = eb->args;
2679 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK(0x3f);
2680
2681 if (user_ring_id != I915_EXEC_BSD(2<<0) &&
2682 (args->flags & I915_EXEC_BSD_MASK(3 << (13)))) {
2683 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2684 "execbuf with non bsd ring but with invalid "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2685 "bsd dispatch flags: %d\n", (int)(args->flags))__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
;
2686 return -1;
2687 }
2688
2689 if (user_ring_id == I915_EXEC_BSD(2<<0) && num_vcs_engines(i915) > 1) {
2690 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK(3 << (13));
2691
2692 if (bsd_idx == I915_EXEC_BSD_DEFAULT(0 << (13))) {
2693 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
2694 } else if (bsd_idx >= I915_EXEC_BSD_RING1(1 << (13)) &&
2695 bsd_idx <= I915_EXEC_BSD_RING2(2 << (13))) {
2696 bsd_idx >>= I915_EXEC_BSD_SHIFT(13);
2697 bsd_idx--;
2698 } else {
2699 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2700 "execbuf with unknown bsd ring: %u\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2701 bsd_idx)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
;
2702 return -1;
2703 }
2704
2705 return _VCS(bsd_idx)(VCS0 + (bsd_idx));
2706 }
2707
2708 if (user_ring_id >= ARRAY_SIZE(user_ring_map)(sizeof((user_ring_map)) / sizeof((user_ring_map)[0]))) {
2709 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
2710 user_ring_id)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
;
2711 return -1;
2712 }
2713
2714 return user_ring_map[user_ring_id];
2715}
2716
2717static int
2718eb_select_engine(struct i915_execbuffer *eb)
2719{
2720 struct intel_context *ce, *child;
2721 unsigned int idx;
2722 int err;
2723
2724 if (i915_gem_context_user_engines(eb->gem_context))
2725 idx = eb->args->flags & I915_EXEC_RING_MASK(0x3f);
2726 else
2727 idx = eb_select_legacy_ring(eb);
2728
2729 ce = i915_gem_context_get_engine(eb->gem_context, idx);
2730 if (IS_ERR(ce))
2731 return PTR_ERR(ce);
2732
2733 if (intel_context_is_parallel(ce)) {
2734 if (eb->buffer_count < ce->parallel.number_children + 1) {
2735 intel_context_put(ce);
2736 return -EINVAL22;
2737 }
2738 if (eb->batch_start_offset || eb->args->batch_len) {
2739 intel_context_put(ce);
2740 return -EINVAL22;
2741 }
2742 }
2743 eb->num_batches = ce->parallel.number_children + 1;
2744
2745 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2746 intel_context_get(child);
2747 intel_gt_pm_get(ce->engine->gt);
2748
2749 if (!test_bit(CONTEXT_ALLOC_BIT1, &ce->flags)) {
2750 err = intel_context_alloc_state(ce);
2751 if (err)
2752 goto err;
2753 }
2754 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2755 if (!test_bit(CONTEXT_ALLOC_BIT1, &child->flags)) {
2756 err = intel_context_alloc_state(child);
2757 if (err)
2758 goto err;
2759 }
2760 }
2761
2762 /*
2763 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2764 * EIO if the GPU is already wedged.
2765 */
2766 err = intel_gt_terminally_wedged(ce->engine->gt);
2767 if (err)
2768 goto err;
2769
2770 if (!i915_vm_tryget(ce->vm)) {
2771 err = -ENOENT2;
2772 goto err;
2773 }
2774
2775 eb->context = ce;
2776 eb->gt = ce->engine->gt;
2777
2778 /*
2779 * Make sure engine pool stays alive even if we call intel_context_put
2780 * during ww handling. The pool is destroyed when last pm reference
2781 * is dropped, which breaks our -EDEADLK handling.
2782 */
2783 return err;
2784
2785err:
2786 intel_gt_pm_put(ce->engine->gt);
2787 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2788 intel_context_put(child);
2789 intel_context_put(ce);
2790 return err;
2791}
2792
2793static void
2794eb_put_engine(struct i915_execbuffer *eb)
2795{
2796 struct intel_context *child;
2797
2798 i915_vm_put(eb->context->vm);
2799 intel_gt_pm_put(eb->gt);
2800 for_each_child(eb->context, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(eb->context)->parallel.
child_list)->next); (__typeof(*child) *)( (char *)__mptr -
__builtin_offsetof(__typeof(*child), parallel.child_link) );
}); &child->parallel.child_link != (&(eb->context
)->parallel.child_list); child = ({ const __typeof( ((__typeof
(*child) *)0)->parallel.child_link ) *__mptr = (child->
parallel.child_link.next); (__typeof(*child) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*child), parallel.child_link) )
;}))
2801 intel_context_put(child);
2802 intel_context_put(eb->context);
2803}
2804
2805static void
2806__free_fence_array(struct eb_fence *fences, unsigned int n)
2807{
2808 while (n--) {
2809 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)({ unsigned long __v = (unsigned long)(fences[n].syncobj); (typeof
(fences[n].syncobj))(__v & -(1UL << (2))); })
);
2810 dma_fence_put(fences[n].dma_fence);
2811 dma_fence_chain_free(fences[n].chain_fence);
2812 }
2813 kvfree(fences);
2814}
2815
2816static int
2817add_timeline_fence_array(struct i915_execbuffer *eb,
2818 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
2819{
2820 struct drm_i915_gem_exec_fence __user *user_fences;
2821 u64 __user *user_values;
2822 struct eb_fence *f;
2823 u64 nfences;
2824 int err = 0;
2825
2826 nfences = timeline_fences->fence_count;
2827 if (!nfences)
2828 return 0;
2829
2830 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2831 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2832 if (nfences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2833 ULONG_MAX / sizeof(*user_fences),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2834 SIZE_MAX / sizeof(*f))({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
- eb->num_fences)
2835 return -EINVAL22;
2836
2837 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr)((void *)(uintptr_t)(timeline_fences->handles_ptr));
2838 if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
2839 return -EFAULT14;
2840
2841 user_values = u64_to_user_ptr(timeline_fences->values_ptr)((void *)(uintptr_t)(timeline_fences->values_ptr));
2842 if (!access_ok(user_values, nfences * sizeof(*user_values)))
2843 return -EFAULT14;
2844
2845#ifdef __linux__
2846 f = krealloc(eb->fences,
2847 (eb->num_fences + nfences) * sizeof(*f),
2848 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2849 if (!f)
2850 return -ENOMEM12;
2851#else
2852 f = kmalloc((eb->num_fences + nfences) * sizeof(*f),
2853 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2854 if (!f)
2855 return -ENOMEM12;
2856 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2857 kfree(eb->fences);
2858#endif
2859
2860 eb->fences = f;
2861 f += eb->num_fences;
2862
2863#ifdef notyet
2864 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
2865 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
;
2866#endif
2867
2868 while (nfences--) {
2869 struct drm_i915_gem_exec_fence user_fence;
2870 struct drm_syncobj *syncobj;
2871 struct dma_fence *fence = NULL((void *)0);
2872 u64 point;
2873
2874 if (__copy_from_user(&user_fence,
2875 user_fences++,
2876 sizeof(user_fence)))
2877 return -EFAULT14;
2878
2879 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
2880 return -EINVAL22;
2881
2882 if (__get_user(point, user_values++)-copyin((user_values++), &((point)), sizeof((point))))
2883 return -EFAULT14;
2884
2885 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2886 if (!syncobj) {
2887 DRM_DEBUG("Invalid syncobj handle provided\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Invalid syncobj handle provided\n"
)
;
2888 return -ENOENT2;
2889 }
2890
2891 fence = drm_syncobj_fence_get(syncobj);
2892
2893 if (!fence && user_fence.flags &&
2894 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2895 DRM_DEBUG("Syncobj handle has no fence\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Syncobj handle has no fence\n"
)
;
2896 drm_syncobj_put(syncobj);
2897 return -EINVAL22;
2898 }
2899
2900 if (fence)
2901 err = dma_fence_chain_find_seqno(&fence, point);
2902
2903 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2904 DRM_DEBUG("Syncobj handle missing requested point %llu\n", point)___drm_dbg(((void *)0), DRM_UT_CORE, "Syncobj handle missing requested point %llu\n"
, point)
;
2905 dma_fence_put(fence);
2906 drm_syncobj_put(syncobj);
2907 return err;
2908 }
2909
2910 /*
2911 * A point might have been signaled already and
2912 * garbage collected from the timeline. In this case
2913 * just ignore the point and carry on.
2914 */
2915 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2916 drm_syncobj_put(syncobj);
2917 continue;
2918 }
2919
2920 /*
2921 * For timeline syncobjs we need to preallocate chains for
2922 * later signaling.
2923 */
2924 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1)) {
2925 /*
2926 * Waiting and signaling the same point (when point !=
2927 * 0) would break the timeline.
2928 */
2929 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
2930 DRM_DEBUG("Trying to wait & signal the same timeline point.\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Trying to wait & signal the same timeline point.\n"
)
;
2931 dma_fence_put(fence);
2932 drm_syncobj_put(syncobj);
2933 return -EINVAL22;
2934 }
2935
2936 f->chain_fence = dma_fence_chain_alloc();
2937 if (!f->chain_fence) {
2938 drm_syncobj_put(syncobj);
2939 dma_fence_put(fence);
2940 return -ENOMEM12;
2941 }
2942 } else {
2943 f->chain_fence = NULL((void *)0);
2944 }
2945
2946 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
2947 f->dma_fence = fence;
2948 f->value = point;
2949 f++;
2950 eb->num_fences++;
2951 }
2952
2953 return 0;
2954}
2955
2956static int add_fence_array(struct i915_execbuffer *eb)
2957{
2958 struct drm_i915_gem_execbuffer2 *args = eb->args;
2959 struct drm_i915_gem_exec_fence __user *user;
2960 unsigned long num_fences = args->num_cliprects;
2961 struct eb_fence *f;
2962
2963 if (!(args->flags & I915_EXEC_FENCE_ARRAY(1<<19)))
2964 return 0;
2965
2966 if (!num_fences)
2967 return 0;
2968
2969 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2970 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2971 if (num_fences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2972 ULONG_MAX / sizeof(*user),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2973 SIZE_MAX / sizeof(*f) - eb->num_fences)({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
)
2974 return -EINVAL22;
2975
2976 user = u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr));
2977 if (!access_ok(user, num_fences * sizeof(*user)))
2978 return -EFAULT14;
2979
2980#ifdef __linux__
2981 f = krealloc(eb->fences,
2982 (eb->num_fences + num_fences) * sizeof(*f),
2983 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2984 if (!f)
2985 return -ENOMEM12;
2986#else
2987 f = kmalloc((eb->num_fences + num_fences) * sizeof(*f),
2988 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2989 if (!f)
2990 return -ENOMEM12;
2991 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2992 kfree(eb->fences);
2993#endif
2994
2995 eb->fences = f;
2996 f += eb->num_fences;
2997 while (num_fences--) {
2998 struct drm_i915_gem_exec_fence user_fence;
2999 struct drm_syncobj *syncobj;
3000 struct dma_fence *fence = NULL((void *)0);
3001
3002 if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
3003 return -EFAULT14;
3004
3005 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
3006 return -EINVAL22;
3007
3008 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
3009 if (!syncobj) {
3010 DRM_DEBUG("Invalid syncobj handle provided\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Invalid syncobj handle provided\n"
)
;
3011 return -ENOENT2;
3012 }
3013
3014 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
3015 fence = drm_syncobj_fence_get(syncobj);
3016 if (!fence) {
3017 DRM_DEBUG("Syncobj handle has no fence\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Syncobj handle has no fence\n"
)
;
3018 drm_syncobj_put(syncobj);
3019 return -EINVAL22;
3020 }
3021 }
3022
3023#ifdef notyet
3024 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
3025 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
;
3026#endif
3027
3028 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
3029 f->dma_fence = fence;
3030 f->value = 0;
3031 f->chain_fence = NULL((void *)0);
3032 f++;
3033 eb->num_fences++;
3034 }
3035
3036 return 0;
3037}
3038
3039static void put_fence_array(struct eb_fence *fences, int num_fences)
3040{
3041 if (fences)
3042 __free_fence_array(fences, num_fences);
3043}
3044
3045static int
3046await_fence_array(struct i915_execbuffer *eb,
3047 struct i915_request *rq)
3048{
3049 unsigned int n;
3050 int err;
3051
3052 for (n = 0; n < eb->num_fences; n++) {
3053 struct drm_syncobj *syncobj;
3054 unsigned int flags;
3055
3056 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
Value stored to 'syncobj' is never read
3057
3058 if (!eb->fences[n].dma_fence)
3059 continue;
3060
3061 err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence);
3062 if (err < 0)
3063 return err;
3064 }
3065
3066 return 0;
3067}
3068
3069static void signal_fence_array(const struct i915_execbuffer *eb,
3070 struct dma_fence * const fence)
3071{
3072 unsigned int n;
3073
3074 for (n = 0; n < eb->num_fences; n++) {
3075 struct drm_syncobj *syncobj;
3076 unsigned int flags;
3077
3078 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
3079 if (!(flags & I915_EXEC_FENCE_SIGNAL(1<<1)))
3080 continue;
3081
3082 if (eb->fences[n].chain_fence) {
3083 drm_syncobj_add_point(syncobj,
3084 eb->fences[n].chain_fence,
3085 fence,
3086 eb->fences[n].value);
3087 /*
3088 * The chain's ownership is transferred to the
3089 * timeline.
3090 */
3091 eb->fences[n].chain_fence = NULL((void *)0);
3092 } else {
3093 drm_syncobj_replace_fence(syncobj, fence);
3094 }
3095 }
3096}
3097
3098static int
3099parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
3100{
3101 struct i915_execbuffer *eb = data;
3102 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
3103
3104 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
3105 return -EFAULT14;
3106
3107 return add_timeline_fence_array(eb, &timeline_fences);
3108}
3109
3110static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
3111{
3112 struct i915_request *rq, *rn;
3113
3114 list_for_each_entry_safe(rq, rn, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = (rq->link.next); (__typeof(*rq) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*rq), link) );}); &rq->
link != (&tl->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->link ) *__mptr = (rn->link.next
); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rn), link) );}))
3115 if (rq == end || !i915_request_retire(rq))
3116 break;
3117}
3118
3119static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
3120 int err, bool_Bool last_parallel)
3121{
3122 struct intel_timeline * const tl = i915_request_timeline(rq);
3123 struct i915_sched_attr attr = {};
3124 struct i915_request *prev;
3125
3126 lockdep_assert_held(&tl->mutex)do { (void)(&tl->mutex); } while(0);
3127 lockdep_unpin_lock(&tl->mutex, rq->cookie);
3128
3129 trace_i915_request_add(rq);
3130
3131 prev = __i915_request_commit(rq);
3132
3133 /* Check that the context wasn't destroyed before submission */
3134 if (likely(!intel_context_is_closed(eb->context))__builtin_expect(!!(!intel_context_is_closed(eb->context))
, 1)
) {
3135 attr = eb->gem_context->sched;
3136 } else {
3137 /* Serialise with context_close via the add_to_timeline */
3138 i915_request_set_error_once(rq, -ENOENT2);
3139 __i915_request_skip(rq);
3140 err = -ENOENT2; /* override any transient errors */
3141 }
3142
3143 if (intel_context_is_parallel(eb->context)) {
3144 if (err) {
3145 __i915_request_skip(rq);
3146 set_bit(I915_FENCE_FLAG_SKIP_PARALLEL,
3147 &rq->fence.flags);
3148 }
3149 if (last_parallel)
3150 set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
3151 &rq->fence.flags);
3152 }
3153
3154 __i915_request_queue(rq, &attr);
3155
3156 /* Try to clean up the client's timeline after submitting the request */
3157 if (prev)
3158 retire_requests(tl, prev);
3159
3160 mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
3161
3162 return err;
3163}
3164
3165static int eb_requests_add(struct i915_execbuffer *eb, int err)
3166{
3167 int i;
3168
3169 /*
3170 * We iterate in reverse order of creation to release timeline mutexes in
3171 * same order.
3172 */
3173 for_each_batch_add_order(eb, i)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((i) = (eb)->num_batches - 1; (i) >= 0; --(i))
{
3174 struct i915_request *rq = eb->requests[i];
3175
3176 if (!rq)
3177 continue;
3178 err |= eb_request_add(eb, rq, err, i == 0);
3179 }
3180
3181 return err;
3182}
3183
3184static const i915_user_extension_fn execbuf_extensions[] = {
3185 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES0] = parse_timeline_fences,
3186};
3187
3188static int
3189parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
3190 struct i915_execbuffer *eb)
3191{
3192 if (!(args->flags & I915_EXEC_USE_EXTENSIONS(1 << 21)))
3193 return 0;
3194
3195 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
3196 * have another flag also using it at the same time.
3197 */
3198 if (eb->args->flags & I915_EXEC_FENCE_ARRAY(1<<19))
3199 return -EINVAL22;
3200
3201 if (args->num_cliprects != 0)
3202 return -EINVAL22;
3203
3204 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr)),
3205 execbuf_extensions,
3206 ARRAY_SIZE(execbuf_extensions)(sizeof((execbuf_extensions)) / sizeof((execbuf_extensions)[0
]))
,
3207 eb);
3208}
3209
3210static void eb_requests_get(struct i915_execbuffer *eb)
3211{
3212 unsigned int i;
3213
3214 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3215 if (!eb->requests[i])
3216 break;
3217
3218 i915_request_get(eb->requests[i]);
3219 }
3220}
3221
3222static void eb_requests_put(struct i915_execbuffer *eb)
3223{
3224 unsigned int i;
3225
3226 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3227 if (!eb->requests[i])
3228 break;
3229
3230 i915_request_put(eb->requests[i]);
3231 }
3232}
3233
3234static struct sync_file *
3235eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
3236{
3237 struct sync_file *out_fence = NULL((void *)0);
3238 struct dma_fence_array *fence_array;
3239 struct dma_fence **fences;
3240 unsigned int i;
3241
3242 GEM_BUG_ON(!intel_context_is_parent(eb->context))((void)0);
3243
3244 fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL(0x0001 | 0x0004));
3245 if (!fences)
3246 return ERR_PTR(-ENOMEM12);
3247
3248 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3249 fences[i] = &eb->requests[i]->fence;
3250 __set_bit(I915_FENCE_FLAG_COMPOSITE,
3251 &eb->requests[i]->fence.flags);
3252 }
3253
3254 fence_array = dma_fence_array_create(eb->num_batches,
3255 fences,
3256 eb->context->parallel.fence_context,
3257 eb->context->parallel.seqno++,
3258 false0);
3259 if (!fence_array) {
3260 kfree(fences);
3261 return ERR_PTR(-ENOMEM12);
3262 }
3263
3264 /* Move ownership to the dma_fence_array created above */
3265 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i))
3266 dma_fence_get(fences[i]);
3267
3268 if (out_fence_fd != -1) {
3269 out_fence = sync_file_create(&fence_array->base);
3270 /* sync_file now owns fence_arry, drop creation ref */
3271 dma_fence_put(&fence_array->base);
3272 if (!out_fence)
3273 return ERR_PTR(-ENOMEM12);
3274 }
3275
3276 eb->composite_fence = &fence_array->base;
3277
3278 return out_fence;
3279}
3280
3281static struct sync_file *
3282eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq,
3283 struct dma_fence *in_fence, int out_fence_fd)
3284{
3285 struct sync_file *out_fence = NULL((void *)0);
3286 int err;
3287
3288 if (unlikely(eb->gem_context->syncobj)__builtin_expect(!!(eb->gem_context->syncobj), 0)) {
3289 struct dma_fence *fence;
3290
3291 fence = drm_syncobj_fence_get(eb->gem_context->syncobj);
3292 err = i915_request_await_dma_fence(rq, fence);
3293 dma_fence_put(fence);
3294 if (err)
3295 return ERR_PTR(err);
3296 }
3297
3298 if (in_fence) {
3299 if (eb->args->flags & I915_EXEC_FENCE_SUBMIT(1 << 20))
3300 err = i915_request_await_execution(rq, in_fence);
3301 else
3302 err = i915_request_await_dma_fence(rq, in_fence);
3303 if (err < 0)
3304 return ERR_PTR(err);
3305 }
3306
3307 if (eb->fences) {
3308 err = await_fence_array(eb, rq);
3309 if (err)
3310 return ERR_PTR(err);
3311 }
3312
3313 if (intel_context_is_parallel(eb->context)) {
3314 out_fence = eb_composite_fence_create(eb, out_fence_fd);
3315 if (IS_ERR(out_fence))
3316 return ERR_PTR(-ENOMEM12);
3317 } else if (out_fence_fd != -1) {
3318 out_fence = sync_file_create(&rq->fence);
3319 if (!out_fence)
3320 return ERR_PTR(-ENOMEM12);
3321 }
3322
3323 return out_fence;
3324}
3325
3326static struct intel_context *
3327eb_find_context(struct i915_execbuffer *eb, unsigned int context_number)
3328{
3329 struct intel_context *child;
3330
3331 if (likely(context_number == 0)__builtin_expect(!!(context_number == 0), 1))
3332 return eb->context;
3333
3334 for_each_child(eb->context, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(eb->context)->parallel.
child_list)->next); (__typeof(*child) *)( (char *)__mptr -
__builtin_offsetof(__typeof(*child), parallel.child_link) );
}); &child->parallel.child_link != (&(eb->context
)->parallel.child_list); child = ({ const __typeof( ((__typeof
(*child) *)0)->parallel.child_link ) *__mptr = (child->
parallel.child_link.next); (__typeof(*child) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*child), parallel.child_link) )
;}))
3335 if (!--context_number)
3336 return child;
3337
3338 GEM_BUG_ON("Context not found")((void)0);
3339
3340 return NULL((void *)0);
3341}
3342
3343static struct sync_file *
3344eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
3345 int out_fence_fd)
3346{
3347 struct sync_file *out_fence = NULL((void *)0);
3348 unsigned int i;
3349
3350 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3351 /* Allocate a request for this batch buffer nice and early. */
3352 eb->requests[i] = i915_request_create(eb_find_context(eb, i));
3353 if (IS_ERR(eb->requests[i])) {
3354 out_fence = ERR_CAST(eb->requests[i]);
3355 eb->requests[i] = NULL((void *)0);
3356 return out_fence;
3357 }
3358
3359 /*
3360 * Only the first request added (committed to backend) has to
3361 * take the in fences into account as all subsequent requests
3362 * will have fences inserted inbetween them.
3363 */
3364 if (i + 1 == eb->num_batches) {
3365 out_fence = eb_fences_add(eb, eb->requests[i],
3366 in_fence, out_fence_fd);
3367 if (IS_ERR(out_fence))
3368 return out_fence;
3369 }
3370
3371 /*
3372 * Not really on stack, but we don't want to call
3373 * kfree on the batch_snapshot when we put it, so use the
3374 * _onstack interface.
3375 */
3376 if (eb->batches[i]->vma)
3377 eb->requests[i]->batch_res =
3378 i915_vma_resource_get(eb->batches[i]->vma->resource);
3379 if (eb->batch_pool) {
3380 GEM_BUG_ON(intel_context_is_parallel(eb->context))((void)0);
3381 intel_gt_buffer_pool_mark_active(eb->batch_pool,
3382 eb->requests[i]);
3383 }
3384 }
3385
3386 return out_fence;
3387}
3388
3389static int
3390i915_gem_do_execbuffer(struct drm_device *dev,
3391 struct drm_file *file,
3392 struct drm_i915_gem_execbuffer2 *args,
3393 struct drm_i915_gem_exec_object2 *exec)
3394{
3395 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3396 struct i915_execbuffer eb;
3397 struct dma_fence *in_fence = NULL((void *)0);
3398 struct sync_file *out_fence = NULL((void *)0);
3399 int out_fence_fd = -1;
3400 int err;
3401
3402 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS)extern char _ctassert[(!((~0u << 29) & ~((-((1 <<
21) << 1)) | (3<<6) | (1<<15)))) ? 1 : -1 ]
__attribute__((__unused__))
;
3403 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &extern char _ctassert[(!((~0u << 26) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
3404 ~__EXEC_OBJECT_UNKNOWN_FLAGS)extern char _ctassert[(!((~0u << 26) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
;
3405
3406 eb.i915 = i915;
3407 eb.file = file;
3408 eb.args = args;
3409 if (DBG_FORCE_RELOC0 || !(args->flags & I915_EXEC_NO_RELOC(1<<11)))
3410 args->flags |= __EXEC_HAS_RELOC(1UL << (31));
3411
3412 eb.exec = exec;
3413 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
3414 eb.vma[0].vma = NULL((void *)0);
3415 eb.batch_pool = NULL((void *)0);
3416
3417 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS-((1<<7)<<1);
3418 reloc_cache_init(&eb.reloc_cache, eb.i915);
3419
3420 eb.buffer_count = args->buffer_count;
3421 eb.batch_start_offset = args->batch_start_offset;
3422 eb.trampoline = NULL((void *)0);
3423
3424 eb.fences = NULL((void *)0);
3425 eb.num_fences = 0;
3426
3427 eb_capture_list_clear(&eb);
3428
3429 memset(eb.requests, 0, sizeof(struct i915_request *) *__builtin_memset((eb.requests), (0), (sizeof(struct i915_request
*) * (sizeof((eb.requests)) / sizeof((eb.requests)[0]))))
3430 ARRAY_SIZE(eb.requests))__builtin_memset((eb.requests), (0), (sizeof(struct i915_request
*) * (sizeof((eb.requests)) / sizeof((eb.requests)[0]))))
;
3431 eb.composite_fence = NULL((void *)0);
3432
3433 eb.batch_flags = 0;
3434 if (args->flags & I915_EXEC_SECURE(1<<9)) {
3435 if (GRAPHICS_VER(i915)((&(i915)->__runtime)->graphics.ip.ver) >= 11)
3436 return -ENODEV19;
3437
3438 /* Return -EPERM to trigger fallback code on old binaries. */
3439 if (!HAS_SECURE_BATCHES(i915)(((&(i915)->__runtime)->graphics.ip.ver) < 6))
3440 return -EPERM1;
3441
3442 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN0x1))
3443 return -EPERM1;
3444
3445 eb.batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
3446 }
3447 if (args->flags & I915_EXEC_IS_PINNED(1<<10))
3448 eb.batch_flags |= I915_DISPATCH_PINNED(1UL << (1));
3449
3450 err = parse_execbuf2_extensions(args, &eb);
3451 if (err)
3452 goto err_ext;
3453
3454 err = add_fence_array(&eb);
3455 if (err)
3456 goto err_ext;
3457
3458#define IN_FENCES (I915_EXEC_FENCE_IN(1<<16) | I915_EXEC_FENCE_SUBMIT(1 << 20))
3459 if (args->flags & IN_FENCES) {
3460 if ((args->flags & IN_FENCES) == IN_FENCES)
3461 return -EINVAL22;
3462
3463 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)((u32)(args->rsvd2)));
3464 if (!in_fence) {
3465 err = -EINVAL22;
3466 goto err_ext;
3467 }
3468 }
3469#undef IN_FENCES
3470
3471 if (args->flags & I915_EXEC_FENCE_OUT(1<<17)) {
3472 out_fence_fd = get_unused_fd_flags(O_CLOEXEC0x10000);
3473 if (out_fence_fd < 0) {
3474 err = out_fence_fd;
3475 goto err_in_fence;
3476 }
3477 }
3478
3479 err = eb_create(&eb);
3480 if (err)
3481 goto err_out_fence;
3482
3483 GEM_BUG_ON(!eb.lut_size)((void)0);
3484
3485 err = eb_select_context(&eb);
3486 if (unlikely(err)__builtin_expect(!!(err), 0))
3487 goto err_destroy;
3488
3489 err = eb_select_engine(&eb);
3490 if (unlikely(err)__builtin_expect(!!(err), 0))
3491 goto err_context;
3492
3493 err = eb_lookup_vmas(&eb);
3494 if (err) {
3495 eb_release_vmas(&eb, true1);
3496 goto err_engine;
3497 }
3498
3499 i915_gem_ww_ctx_init(&eb.ww, true1);
3500
3501 err = eb_relocate_parse(&eb);
3502 if (err) {
3503 /*
3504 * If the user expects the execobject.offset and
3505 * reloc.presumed_offset to be an exact match,
3506 * as for using NO_RELOC, then we cannot update
3507 * the execobject.offset until we have completed
3508 * relocation.
3509 */
3510 args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
3511 goto err_vma;
3512 }
3513
3514 ww_acquire_done(&eb.ww.ctx);
3515 err = eb_capture_stage(&eb);
3516 if (err)
3517 goto err_vma;
3518
3519 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
3520 if (IS_ERR(out_fence)) {
3521 err = PTR_ERR(out_fence);
3522 out_fence = NULL((void *)0);
3523 if (eb.requests[0])
3524 goto err_request;
3525 else
3526 goto err_vma;
3527 }
3528
3529 err = eb_submit(&eb);
3530
3531err_request:
3532 eb_requests_get(&eb);
3533 err = eb_requests_add(&eb, err);
3534
3535 if (eb.fences)
3536 signal_fence_array(&eb, eb.composite_fence ?
3537 eb.composite_fence :
3538 &eb.requests[0]->fence);
3539
3540 if (unlikely(eb.gem_context->syncobj)__builtin_expect(!!(eb.gem_context->syncobj), 0)) {
3541 drm_syncobj_replace_fence(eb.gem_context->syncobj,
3542 eb.composite_fence ?
3543 eb.composite_fence :
3544 &eb.requests[0]->fence);
3545 }
3546
3547 if (out_fence) {
3548 if (err == 0) {
3549 fd_install(out_fence_fd, out_fence->file);
3550 args->rsvd2 &= GENMASK_ULL(31, 0)(((~0ULL) >> (64 - (31) - 1)) & ((~0ULL) << (
0)))
; /* keep in-fence */
3551 args->rsvd2 |= (u64)out_fence_fd << 32;
3552 out_fence_fd = -1;
3553 } else {
3554 fput(out_fence->file);
3555 }
3556 }
3557
3558 if (!out_fence && eb.composite_fence)
3559 dma_fence_put(eb.composite_fence);
3560
3561 eb_requests_put(&eb);
3562
3563err_vma:
3564 eb_release_vmas(&eb, true1);
3565 WARN_ON(err == -EDEADLK)({ int __ret = !!(err == -11); if (__ret) printf("WARNING %s failed at %s:%d\n"
, "err == -11", "/usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c"
, 3565); __builtin_expect(!!(__ret), 0); })
;
3566 i915_gem_ww_ctx_fini(&eb.ww);
3567
3568 if (eb.batch_pool)
3569 intel_gt_buffer_pool_put(eb.batch_pool);
3570err_engine:
3571 eb_put_engine(&eb);
3572err_context:
3573 i915_gem_context_put(eb.gem_context);
3574err_destroy:
3575 eb_destroy(&eb);
3576err_out_fence:
3577 if (out_fence_fd != -1)
3578 put_unused_fd(out_fence_fd);
3579err_in_fence:
3580 dma_fence_put(in_fence);
3581err_ext:
3582 put_fence_array(eb.fences, eb.num_fences);
3583 return err;
3584}
3585
3586static size_t eb_element_size(void)
3587{
3588 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
3589}
3590
3591static bool_Bool check_buffer_count(size_t count)
3592{
3593 const size_t sz = eb_element_size();
3594
3595 /*
3596 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
3597 * array size (see eb_create()). Otherwise, we can accept an array as
3598 * large as can be addressed (though use large arrays at your peril)!
3599 */
3600
3601 return !(count < 1 || count > INT_MAX0x7fffffff || count > SIZE_MAX0xffffffffffffffffUL / sz - 1);
3602}
3603
3604int
3605i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
3606 struct drm_file *file)
3607{
3608 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3609 struct drm_i915_gem_execbuffer2 *args = data;
3610 struct drm_i915_gem_exec_object2 *exec2_list;
3611 const size_t count = args->buffer_count;
3612 int err;
3613
3614 if (!check_buffer_count(count)) {
3615 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf2 with %zd buffers\n"
, count)
;
3616 return -EINVAL22;
3617 }
3618
3619 err = i915_gem_check_execbuffer(args);
3620 if (err)
3621 return err;
3622
3623 /* Allocate extra slots for use by the command parser */
3624 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3625 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3626 if (exec2_list == NULL((void *)0)) {
3627 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
3628 count)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
;
3629 return -ENOMEM12;
3630 }
3631 if (copy_from_user(exec2_list,
3632 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr)),
3633 sizeof(*exec2_list) * count)) {
3634 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "copy %zd exec entries failed\n"
, count)
;
3635 kvfree(exec2_list);
3636 return -EFAULT14;
3637 }
3638
3639 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
3640
3641 /*
3642 * Now that we have begun execution of the batchbuffer, we ignore
3643 * any new error after this point. Also given that we have already
3644 * updated the associated relocations, we try to write out the current
3645 * object locations irrespective of any error.
3646 */
3647 if (args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
3648 struct drm_i915_gem_exec_object2 __user *user_exec_list =
3649 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr));
3650 unsigned int i;
3651
3652 /* Copy the new buffer offsets back to the user's exec list. */
3653 /*
3654 * Note: count * sizeof(*user_exec_list) does not overflow,
3655 * because we checked 'count' in check_buffer_count().
3656 *
3657 * And this range already got effectively checked earlier
3658 * when we did the "copy_from_user()" above.
3659 */
3660 if (!user_write_access_begin(user_exec_list,access_ok(user_exec_list, count * sizeof(*user_exec_list))
3661 count * sizeof(*user_exec_list))access_ok(user_exec_list, count * sizeof(*user_exec_list)))
3662 goto end;
3663
3664 for (i = 0; i < args->buffer_count; i++) {
3665 if (!(exec2_list[i].offset & UPDATE(1ULL << (7))))
3666 continue;
3667
3668 exec2_list[i].offset =
3669 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK-(1ULL << (12)));
3670 unsafe_put_user(exec2_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3671 &user_exec_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3672 end_user)({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
;
3673 }
3674end_user:
3675 user_write_access_end();
3676end:;
3677 }
3678
3679 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1));
3680 kvfree(exec2_list);
3681 return err;
3682}