Bug Summary

File:dev/pci/drm/i915/gem/i915_gem_execbuffer.c
Warning:line 366, column 52
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name i915_gem_execbuffer.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2008,2010 Intel Corporation
5 */
6
7#include <linux/dma-resv.h>
8#include <linux/highmem.h>
9#include <linux/sync_file.h>
10#include <linux/uaccess.h>
11
12#include <drm/drm_syncobj.h>
13
14#include <dev/pci/pcivar.h>
15#include <dev/pci/agpvar.h>
16
17#include "display/intel_frontbuffer.h"
18
19#include "gem/i915_gem_ioctls.h"
20#include "gt/intel_context.h"
21#include "gt/intel_gpu_commands.h"
22#include "gt/intel_gt.h"
23#include "gt/intel_gt_buffer_pool.h"
24#include "gt/intel_gt_pm.h"
25#include "gt/intel_ring.h"
26
27#include "pxp/intel_pxp.h"
28
29#include "i915_cmd_parser.h"
30#include "i915_drv.h"
31#include "i915_file_private.h"
32#include "i915_gem_clflush.h"
33#include "i915_gem_context.h"
34#include "i915_gem_evict.h"
35#include "i915_gem_ioctls.h"
36#include "i915_trace.h"
37#include "i915_user_extensions.h"
38
39struct eb_vma {
40 struct i915_vma *vma;
41 unsigned int flags;
42
43 /** This vma's place in the execbuf reservation list */
44 struct drm_i915_gem_exec_object2 *exec;
45 struct list_head bind_link;
46 struct list_head reloc_link;
47
48 struct hlist_node node;
49 u32 handle;
50};
51
52enum {
53 FORCE_CPU_RELOC = 1,
54 FORCE_GTT_RELOC,
55 FORCE_GPU_RELOC,
56#define DBG_FORCE_RELOC0 0 /* choose one of the above! */
57};
58
59/* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
60#define __EXEC_OBJECT_HAS_PIN(1UL << (30)) BIT(30)(1UL << (30))
61#define __EXEC_OBJECT_HAS_FENCE(1UL << (29)) BIT(29)(1UL << (29))
62#define __EXEC_OBJECT_USERPTR_INIT(1UL << (28)) BIT(28)(1UL << (28))
63#define __EXEC_OBJECT_NEEDS_MAP(1UL << (27)) BIT(27)(1UL << (27))
64#define __EXEC_OBJECT_NEEDS_BIAS(1UL << (26)) BIT(26)(1UL << (26))
65#define __EXEC_OBJECT_INTERNAL_FLAGS(~0u << 26) (~0u << 26) /* all of the above + */
66#define __EXEC_OBJECT_RESERVED((1UL << (30)) | (1UL << (29))) (__EXEC_OBJECT_HAS_PIN(1UL << (30)) | __EXEC_OBJECT_HAS_FENCE(1UL << (29)))
67
68#define __EXEC_HAS_RELOC(1UL << (31)) BIT(31)(1UL << (31))
69#define __EXEC_ENGINE_PINNED(1UL << (30)) BIT(30)(1UL << (30))
70#define __EXEC_USERPTR_USED(1UL << (29)) BIT(29)(1UL << (29))
71#define __EXEC_INTERNAL_FLAGS(~0u << 29) (~0u << 29)
72#define UPDATE(1ULL << (7)) PIN_OFFSET_FIXED(1ULL << (7))
73
74#define BATCH_OFFSET_BIAS(256*1024) (256*1024)
75
76#define __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
\
77 (__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1)) | \
78 I915_EXEC_CONSTANTS_MASK(3<<6) | \
79 I915_EXEC_RESOURCE_STREAMER(1<<15))
80
81/* Catch emission of unexpected errors for CI! */
82#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0
83#undef EINVAL22
84#define EINVAL22 ({ \
85 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__)___drm_dbg(((void *)0), DRM_UT_DRIVER, "EINVAL at %s:%d\n", __func__
, 85)
; \
86 22; \
87})
88#endif
89
90/**
91 * DOC: User command execution
92 *
93 * Userspace submits commands to be executed on the GPU as an instruction
94 * stream within a GEM object we call a batchbuffer. This instructions may
95 * refer to other GEM objects containing auxiliary state such as kernels,
96 * samplers, render targets and even secondary batchbuffers. Userspace does
97 * not know where in the GPU memory these objects reside and so before the
98 * batchbuffer is passed to the GPU for execution, those addresses in the
99 * batchbuffer and auxiliary objects are updated. This is known as relocation,
100 * or patching. To try and avoid having to relocate each object on the next
101 * execution, userspace is told the location of those objects in this pass,
102 * but this remains just a hint as the kernel may choose a new location for
103 * any object in the future.
104 *
105 * At the level of talking to the hardware, submitting a batchbuffer for the
106 * GPU to execute is to add content to a buffer from which the HW
107 * command streamer is reading.
108 *
109 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
110 * Execlists, this command is not placed on the same buffer as the
111 * remaining items.
112 *
113 * 2. Add a command to invalidate caches to the buffer.
114 *
115 * 3. Add a batchbuffer start command to the buffer; the start command is
116 * essentially a token together with the GPU address of the batchbuffer
117 * to be executed.
118 *
119 * 4. Add a pipeline flush to the buffer.
120 *
121 * 5. Add a memory write command to the buffer to record when the GPU
122 * is done executing the batchbuffer. The memory write writes the
123 * global sequence number of the request, ``i915_request::global_seqno``;
124 * the i915 driver uses the current value in the register to determine
125 * if the GPU has completed the batchbuffer.
126 *
127 * 6. Add a user interrupt command to the buffer. This command instructs
128 * the GPU to issue an interrupt when the command, pipeline flush and
129 * memory write are completed.
130 *
131 * 7. Inform the hardware of the additional commands added to the buffer
132 * (by updating the tail pointer).
133 *
134 * Processing an execbuf ioctl is conceptually split up into a few phases.
135 *
136 * 1. Validation - Ensure all the pointers, handles and flags are valid.
137 * 2. Reservation - Assign GPU address space for every object
138 * 3. Relocation - Update any addresses to point to the final locations
139 * 4. Serialisation - Order the request with respect to its dependencies
140 * 5. Construction - Construct a request to execute the batchbuffer
141 * 6. Submission (at some point in the future execution)
142 *
143 * Reserving resources for the execbuf is the most complicated phase. We
144 * neither want to have to migrate the object in the address space, nor do
145 * we want to have to update any relocations pointing to this object. Ideally,
146 * we want to leave the object where it is and for all the existing relocations
147 * to match. If the object is given a new address, or if userspace thinks the
148 * object is elsewhere, we have to parse all the relocation entries and update
149 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
150 * all the target addresses in all of its objects match the value in the
151 * relocation entries and that they all match the presumed offsets given by the
152 * list of execbuffer objects. Using this knowledge, we know that if we haven't
153 * moved any buffers, all the relocation entries are valid and we can skip
154 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
155 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
156 *
157 * The addresses written in the objects must match the corresponding
158 * reloc.presumed_offset which in turn must match the corresponding
159 * execobject.offset.
160 *
161 * Any render targets written to in the batch must be flagged with
162 * EXEC_OBJECT_WRITE.
163 *
164 * To avoid stalling, execobject.offset should match the current
165 * address of that object within the active context.
166 *
167 * The reservation is done is multiple phases. First we try and keep any
168 * object already bound in its current location - so as long as meets the
169 * constraints imposed by the new execbuffer. Any object left unbound after the
170 * first pass is then fitted into any available idle space. If an object does
171 * not fit, all objects are removed from the reservation and the process rerun
172 * after sorting the objects into a priority order (more difficult to fit
173 * objects are tried first). Failing that, the entire VM is cleared and we try
174 * to fit the execbuf once last time before concluding that it simply will not
175 * fit.
176 *
177 * A small complication to all of this is that we allow userspace not only to
178 * specify an alignment and a size for the object in the address space, but
179 * we also allow userspace to specify the exact offset. This objects are
180 * simpler to place (the location is known a priori) all we have to do is make
181 * sure the space is available.
182 *
183 * Once all the objects are in place, patching up the buried pointers to point
184 * to the final locations is a fairly simple job of walking over the relocation
185 * entry arrays, looking up the right address and rewriting the value into
186 * the object. Simple! ... The relocation entries are stored in user memory
187 * and so to access them we have to copy them into a local buffer. That copy
188 * has to avoid taking any pagefaults as they may lead back to a GEM object
189 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
190 * the relocation into multiple passes. First we try to do everything within an
191 * atomic context (avoid the pagefaults) which requires that we never wait. If
192 * we detect that we may wait, or if we need to fault, then we have to fallback
193 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
194 * bells yet?) Dropping the mutex means that we lose all the state we have
195 * built up so far for the execbuf and we must reset any global data. However,
196 * we do leave the objects pinned in their final locations - which is a
197 * potential issue for concurrent execbufs. Once we have left the mutex, we can
198 * allocate and copy all the relocation entries into a large array at our
199 * leisure, reacquire the mutex, reclaim all the objects and other state and
200 * then proceed to update any incorrect addresses with the objects.
201 *
202 * As we process the relocation entries, we maintain a record of whether the
203 * object is being written to. Using NORELOC, we expect userspace to provide
204 * this information instead. We also check whether we can skip the relocation
205 * by comparing the expected value inside the relocation entry with the target's
206 * final address. If they differ, we have to map the current object and rewrite
207 * the 4 or 8 byte pointer within.
208 *
209 * Serialising an execbuf is quite simple according to the rules of the GEM
210 * ABI. Execution within each context is ordered by the order of submission.
211 * Writes to any GEM object are in order of submission and are exclusive. Reads
212 * from a GEM object are unordered with respect to other reads, but ordered by
213 * writes. A write submitted after a read cannot occur before the read, and
214 * similarly any read submitted after a write cannot occur before the write.
215 * Writes are ordered between engines such that only one write occurs at any
216 * time (completing any reads beforehand) - using semaphores where available
217 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
218 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
219 * reads before starting, and any read (either using set-domain or pread) must
220 * flush all GPU writes before starting. (Note we only employ a barrier before,
221 * we currently rely on userspace not concurrently starting a new execution
222 * whilst reading or writing to an object. This may be an advantage or not
223 * depending on how much you trust userspace not to shoot themselves in the
224 * foot.) Serialisation may just result in the request being inserted into
225 * a DAG awaiting its turn, but most simple is to wait on the CPU until
226 * all dependencies are resolved.
227 *
228 * After all of that, is just a matter of closing the request and handing it to
229 * the hardware (well, leaving it in a queue to be executed). However, we also
230 * offer the ability for batchbuffers to be run with elevated privileges so
231 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
232 * Before any batch is given extra privileges we first must check that it
233 * contains no nefarious instructions, we check that each instruction is from
234 * our whitelist and all registers are also from an allowed list. We first
235 * copy the user's batchbuffer to a shadow (so that the user doesn't have
236 * access to it, either by the CPU or GPU as we scan it) and then parse each
237 * instruction. If everything is ok, we set a flag telling the hardware to run
238 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
239 */
240
241struct eb_fence {
242 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
243 struct dma_fence *dma_fence;
244 u64 value;
245 struct dma_fence_chain *chain_fence;
246};
247
248struct i915_execbuffer {
249 struct drm_i915_privateinteldrm_softc *i915; /** i915 backpointer */
250 struct drm_file *file; /** per-file lookup tables and limits */
251 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
252 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
253 struct eb_vma *vma;
254
255 struct intel_gt *gt; /* gt for the execbuf */
256 struct intel_context *context; /* logical state for the request */
257 struct i915_gem_context *gem_context; /** caller's context */
258
259 /** our requests to build */
260 struct i915_request *requests[MAX_ENGINE_INSTANCE8 + 1];
261 /** identity of the batch obj/vma */
262 struct eb_vma *batches[MAX_ENGINE_INSTANCE8 + 1];
263 struct i915_vma *trampoline; /** trampoline used for chaining */
264
265 /** used for excl fence in dma_resv objects when > 1 BB submitted */
266 struct dma_fence *composite_fence;
267
268 /** actual size of execobj[] as we may extend it for the cmdparser */
269 unsigned int buffer_count;
270
271 /* number of batches in execbuf IOCTL */
272 unsigned int num_batches;
273
274 /** list of vma not yet bound during reservation phase */
275 struct list_head unbound;
276
277 /** list of vma that have execobj.relocation_count */
278 struct list_head relocs;
279
280 struct i915_gem_ww_ctx ww;
281
282 /**
283 * Track the most recently used object for relocations, as we
284 * frequently have to perform multiple relocations within the same
285 * obj/page
286 */
287 struct reloc_cache {
288 struct drm_mm_node node; /** temporary GTT binding */
289 unsigned long vaddr; /** Current kmap address */
290 unsigned long page; /** Currently mapped page index */
291 unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */
292 bool_Bool use_64bit_reloc : 1;
293 bool_Bool has_llc : 1;
294 bool_Bool has_fence : 1;
295 bool_Bool needs_unfenced : 1;
296
297 struct agp_map *map;
298 bus_space_tag_t iot;
299 bus_space_handle_t ioh;
300 } reloc_cache;
301
302 u64 invalid_flags; /** Set of execobj.flags that are invalid */
303
304 /** Length of batch within object */
305 u64 batch_len[MAX_ENGINE_INSTANCE8 + 1];
306 u32 batch_start_offset; /** Location within object of batch */
307 u32 batch_flags; /** Flags composed for emit_bb_start() */
308 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
309
310 /**
311 * Indicate either the size of the hastable used to resolve
312 * relocation handles, or if negative that we are using a direct
313 * index into the execobj[].
314 */
315 int lut_size;
316 struct hlist_head *buckets; /** ht for relocation handles */
317
318 struct eb_fence *fences;
319 unsigned long num_fences;
320#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1
321 struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE8 + 1];
322#endif
323};
324
325static int eb_parse(struct i915_execbuffer *eb);
326static int eb_pin_engine(struct i915_execbuffer *eb, bool_Bool throttle);
327static void eb_unpin_engine(struct i915_execbuffer *eb);
328static void eb_capture_release(struct i915_execbuffer *eb);
329
330static inline bool_Bool eb_use_cmdparser(const struct i915_execbuffer *eb)
331{
332 return intel_engine_requires_cmd_parser(eb->context->engine) ||
333 (intel_engine_using_cmd_parser(eb->context->engine) &&
334 eb->args->batch_len);
335}
336
337static int eb_create(struct i915_execbuffer *eb)
338{
339 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT(1<<12))) {
39
Assuming the condition is true
40
Taking true branch
340 unsigned int size = 1 + ilog2(eb->buffer_count)((sizeof(eb->buffer_count) <= 4) ? (fls(eb->buffer_count
) - 1) : (flsl(eb->buffer_count) - 1))
;
41
'?' condition is true
341
342 /*
343 * Without a 1:1 association between relocation handles and
344 * the execobject[] index, we instead create a hashtable.
345 * We size it dynamically based on available memory, starting
346 * first with 1:1 assocative hash and scaling back until
347 * the allocation succeeds.
348 *
349 * Later on we use a positive lut_size to indicate we are
350 * using this hashtable, and a negative value to indicate a
351 * direct lookup.
352 */
353 do {
47
Loop condition is true. Execution continues on line 354
354 gfp_t flags;
355
356 /* While we can still reduce the allocation size, don't
357 * raise a warning and allow the allocation to fail.
358 * On the last pass though, we want to try as hard
359 * as possible to perform the allocation and warn
360 * if it fails.
361 */
362 flags = GFP_KERNEL(0x0001 | 0x0004);
363 if (size
47.1
'size' is > 1
> 1
)
42
Assuming 'size' is <= 1
43
Taking false branch
48
Taking true branch
364 flags |= __GFP_NORETRY0 | __GFP_NOWARN0;
365
366 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
49
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long'
367 flags);
368 if (eb->buckets)
44
Assuming field 'buckets' is null
45
Taking false branch
369 break;
370 } while (--size);
46
Value assigned to 'size'
371
372 if (unlikely(!size)__builtin_expect(!!(!size), 0))
373 return -ENOMEM12;
374
375 eb->lut_size = size;
376 } else {
377 eb->lut_size = -eb->buffer_count;
378 }
379
380 return 0;
381}
382
383static bool_Bool
384eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
385 const struct i915_vma *vma,
386 unsigned int flags)
387{
388 if (vma->node.size < entry->pad_to_size)
389 return true1;
390
391 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)(((vma->node.start) & ((entry->alignment) - 1)) == 0
)
)
392 return true1;
393
394 if (flags & EXEC_OBJECT_PINNED(1<<4) &&
395 vma->node.start != entry->offset)
396 return true1;
397
398 if (flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (26)) &&
399 vma->node.start < BATCH_OFFSET_BIAS(256*1024))
400 return true1;
401
402 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)) &&
403 (vma->node.start + vma->node.size + 4095) >> 32)
404 return true1;
405
406 if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (27)) &&
407 !i915_vma_is_map_and_fenceable(vma))
408 return true1;
409
410 return false0;
411}
412
413static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
414 unsigned int exec_flags)
415{
416 u64 pin_flags = 0;
417
418 if (exec_flags & EXEC_OBJECT_NEEDS_GTT(1<<1))
419 pin_flags |= PIN_GLOBAL(1ULL << (10));
420
421 /*
422 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
423 * limit address to the first 4GBs for unflagged objects.
424 */
425 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
426 pin_flags |= PIN_ZONE_4G(1ULL << (4));
427
428 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (27)))
429 pin_flags |= PIN_MAPPABLE(1ULL << (3));
430
431 if (exec_flags & EXEC_OBJECT_PINNED(1<<4))
432 pin_flags |= entry->offset | PIN_OFFSET_FIXED(1ULL << (7));
433 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (26)))
434 pin_flags |= BATCH_OFFSET_BIAS(256*1024) | PIN_OFFSET_BIAS(1ULL << (6));
435
436 return pin_flags;
437}
438
439static inline int
440eb_pin_vma(struct i915_execbuffer *eb,
441 const struct drm_i915_gem_exec_object2 *entry,
442 struct eb_vma *ev)
443{
444 struct i915_vma *vma = ev->vma;
445 u64 pin_flags;
446 int err;
447
448 if (vma->node.size)
449 pin_flags = vma->node.start;
450 else
451 pin_flags = entry->offset & PIN_OFFSET_MASK-(1ULL << (12));
452
453 pin_flags |= PIN_USER(1ULL << (11)) | PIN_NOEVICT(1ULL << (0)) | PIN_OFFSET_FIXED(1ULL << (7)) | PIN_VALIDATE(1ULL << (8));
454 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)__builtin_expect(!!(ev->flags & (1<<1)), 0))
455 pin_flags |= PIN_GLOBAL(1ULL << (10));
456
457 /* Attempt to reuse the current location if available */
458 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags);
459 if (err == -EDEADLK11)
460 return err;
461
462 if (unlikely(err)__builtin_expect(!!(err), 0)) {
463 if (entry->flags & EXEC_OBJECT_PINNED(1<<4))
464 return err;
465
466 /* Failing that pick any _free_ space if suitable */
467 err = i915_vma_pin_ww(vma, &eb->ww,
468 entry->pad_to_size,
469 entry->alignment,
470 eb_pin_flags(entry, ev->flags) |
471 PIN_USER(1ULL << (11)) | PIN_NOEVICT(1ULL << (0)) | PIN_VALIDATE(1ULL << (8)));
472 if (unlikely(err)__builtin_expect(!!(err), 0))
473 return err;
474 }
475
476 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
477 err = i915_vma_pin_fence(vma);
478 if (unlikely(err)__builtin_expect(!!(err), 0))
479 return err;
480
481 if (vma->fence)
482 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (29));
483 }
484
485 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (30));
486 if (eb_vma_misplaced(entry, vma, ev->flags))
487 return -EBADSLT22;
488
489 return 0;
490}
491
492static inline void
493eb_unreserve_vma(struct eb_vma *ev)
494{
495 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)__builtin_expect(!!(ev->flags & (1UL << (29))), 0
)
)
496 __i915_vma_unpin_fence(ev->vma);
497
498 ev->flags &= ~__EXEC_OBJECT_RESERVED((1UL << (30)) | (1UL << (29)));
499}
500
501static int
502eb_validate_vma(struct i915_execbuffer *eb,
503 struct drm_i915_gem_exec_object2 *entry,
504 struct i915_vma *vma)
505{
506 /* Relocations are disallowed for all platforms after TGL-LP. This
507 * also covers all platforms with local memory.
508 */
509 if (entry->relocation_count &&
510 GRAPHICS_VER(eb->i915)((&(eb->i915)->__runtime)->graphics.ip.ver) >= 12 && !IS_TIGERLAKE(eb->i915)IS_PLATFORM(eb->i915, INTEL_TIGERLAKE))
511 return -EINVAL22;
512
513 if (unlikely(entry->flags & eb->invalid_flags)__builtin_expect(!!(entry->flags & eb->invalid_flags
), 0)
)
514 return -EINVAL22;
515
516 if (unlikely(entry->alignment &&__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
517 !is_power_of_2_u64(entry->alignment))__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
)
518 return -EINVAL22;
519
520 /*
521 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
522 * any non-page-aligned or non-canonical addresses.
523 */
524 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
525 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
)
526 return -EINVAL22;
527
528 /* pad_to_size was once a reserved field, so sanitize it */
529 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE(1<<5)) {
530 if (unlikely(offset_in_page(entry->pad_to_size))__builtin_expect(!!(((vaddr_t)(entry->pad_to_size) & (
(1 << 12) - 1))), 0)
)
531 return -EINVAL22;
532 } else {
533 entry->pad_to_size = 0;
534 }
535 /*
536 * From drm_mm perspective address space is continuous,
537 * so from this point we're always using non-canonical
538 * form internally.
539 */
540 entry->offset = gen8_noncanonical_addr(entry->offset);
541
542 if (!eb->reloc_cache.has_fence) {
543 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE(1<<0);
544 } else {
545 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE(1<<0) ||
546 eb->reloc_cache.needs_unfenced) &&
547 i915_gem_object_is_tiled(vma->obj))
548 entry->flags |= EXEC_OBJECT_NEEDS_GTT(1<<1) | __EXEC_OBJECT_NEEDS_MAP(1UL << (27));
549 }
550
551 return 0;
552}
553
554static inline bool_Bool
555is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
556{
557 return eb->args->flags & I915_EXEC_BATCH_FIRST(1<<18) ?
558 buffer_idx < eb->num_batches :
559 buffer_idx >= eb->args->buffer_count - eb->num_batches;
560}
561
562static int
563eb_add_vma(struct i915_execbuffer *eb,
564 unsigned int *current_batch,
565 unsigned int i,
566 struct i915_vma *vma)
567{
568 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
569 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
570 struct eb_vma *ev = &eb->vma[i];
571
572 ev->vma = vma;
573 ev->exec = entry;
574 ev->flags = entry->flags;
575
576 if (eb->lut_size > 0) {
577 ev->handle = entry->handle;
578 hlist_add_head(&ev->node,
579 &eb->buckets[hash_32(entry->handle,
580 eb->lut_size)]);
581 }
582
583 if (entry->relocation_count)
584 list_add_tail(&ev->reloc_link, &eb->relocs);
585
586 /*
587 * SNA is doing fancy tricks with compressing batch buffers, which leads
588 * to negative relocation deltas. Usually that works out ok since the
589 * relocate address is still positive, except when the batch is placed
590 * very low in the GTT. Ensure this doesn't happen.
591 *
592 * Note that actual hangs have only been observed on gen7, but for
593 * paranoia do it everywhere.
594 */
595 if (is_batch_buffer(eb, i)) {
596 if (entry->relocation_count &&
597 !(ev->flags & EXEC_OBJECT_PINNED(1<<4)))
598 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS(1UL << (26));
599 if (eb->reloc_cache.has_fence)
600 ev->flags |= EXEC_OBJECT_NEEDS_FENCE(1<<0);
601
602 eb->batches[*current_batch] = ev;
603
604 if (unlikely(ev->flags & EXEC_OBJECT_WRITE)__builtin_expect(!!(ev->flags & (1<<2)), 0)) {
605 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
606 "Attempting to use self-modifying batch buffer\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
;
607 return -EINVAL22;
608 }
609
610 if (range_overflows_t(u64,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
611 eb->batch_start_offset,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
612 eb->args->batch_len,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
613 ev->vma->size)({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->args->batch_len
)) size__ = ((u64)(eb->args->batch_len)); typeof((u64)(
ev->vma->size)) max__ = ((u64)(ev->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
) {
614 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Attempting to use out-of-bounds batch\n"
)
;
615 return -EINVAL22;
616 }
617
618 if (eb->args->batch_len == 0)
619 eb->batch_len[*current_batch] = ev->vma->size -
620 eb->batch_start_offset;
621 else
622 eb->batch_len[*current_batch] = eb->args->batch_len;
623 if (unlikely(eb->batch_len[*current_batch] == 0)__builtin_expect(!!(eb->batch_len[*current_batch] == 0), 0
)
) { /* impossible! */
624 drm_dbg(&i915->drm, "Invalid batch length\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Invalid batch length\n"
)
;
625 return -EINVAL22;
626 }
627
628 ++*current_batch;
629 }
630
631 return 0;
632}
633
634static inline int use_cpu_reloc(const struct reloc_cache *cache,
635 const struct drm_i915_gem_object *obj)
636{
637 if (!i915_gem_object_has_struct_page(obj))
638 return false0;
639
640 if (DBG_FORCE_RELOC0 == FORCE_CPU_RELOC)
641 return true1;
642
643 if (DBG_FORCE_RELOC0 == FORCE_GTT_RELOC)
644 return false0;
645
646 return (cache->has_llc ||
647 obj->cache_dirty ||
648 obj->cache_level != I915_CACHE_NONE);
649}
650
651static int eb_reserve_vma(struct i915_execbuffer *eb,
652 struct eb_vma *ev,
653 u64 pin_flags)
654{
655 struct drm_i915_gem_exec_object2 *entry = ev->exec;
656 struct i915_vma *vma = ev->vma;
657 int err;
658
659 if (drm_mm_node_allocated(&vma->node) &&
660 eb_vma_misplaced(entry, vma, ev->flags)) {
661 err = i915_vma_unbind(vma);
662 if (err)
663 return err;
664 }
665
666 err = i915_vma_pin_ww(vma, &eb->ww,
667 entry->pad_to_size, entry->alignment,
668 eb_pin_flags(entry, ev->flags) | pin_flags);
669 if (err)
670 return err;
671
672 if (entry->offset != vma->node.start) {
673 entry->offset = vma->node.start | UPDATE(1ULL << (7));
674 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
675 }
676
677 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
678 err = i915_vma_pin_fence(vma);
679 if (unlikely(err)__builtin_expect(!!(err), 0))
680 return err;
681
682 if (vma->fence)
683 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (29));
684 }
685
686 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (30));
687 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags))((void)0);
688
689 return 0;
690}
691
692static bool_Bool eb_unbind(struct i915_execbuffer *eb, bool_Bool force)
693{
694 const unsigned int count = eb->buffer_count;
695 unsigned int i;
696 struct list_head last;
697 bool_Bool unpinned = false0;
698
699 /* Resort *all* the objects into priority order */
700 INIT_LIST_HEAD(&eb->unbound);
701 INIT_LIST_HEAD(&last);
702
703 for (i = 0; i < count; i++) {
704 struct eb_vma *ev = &eb->vma[i];
705 unsigned int flags = ev->flags;
706
707 if (!force && flags & EXEC_OBJECT_PINNED(1<<4) &&
708 flags & __EXEC_OBJECT_HAS_PIN(1UL << (30)))
709 continue;
710
711 unpinned = true1;
712 eb_unreserve_vma(ev);
713
714 if (flags & EXEC_OBJECT_PINNED(1<<4))
715 /* Pinned must have their slot */
716 list_add(&ev->bind_link, &eb->unbound);
717 else if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (27)))
718 /* Map require the lowest 256MiB (aperture) */
719 list_add_tail(&ev->bind_link, &eb->unbound);
720 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
721 /* Prioritise 4GiB region for restricted bo */
722 list_add(&ev->bind_link, &last);
723 else
724 list_add_tail(&ev->bind_link, &last);
725 }
726
727 list_splice_tail(&last, &eb->unbound);
728 return unpinned;
729}
730
731static int eb_reserve(struct i915_execbuffer *eb)
732{
733 struct eb_vma *ev;
734 unsigned int pass;
735 int err = 0;
736 bool_Bool unpinned;
737
738 /*
739 * We have one more buffers that we couldn't bind, which could be due to
740 * various reasons. To resolve this we have 4 passes, with every next
741 * level turning the screws tighter:
742 *
743 * 0. Unbind all objects that do not match the GTT constraints for the
744 * execbuffer (fenceable, mappable, alignment etc). Bind all new
745 * objects. This avoids unnecessary unbinding of later objects in order
746 * to make room for the earlier objects *unless* we need to defragment.
747 *
748 * 1. Reorder the buffers, where objects with the most restrictive
749 * placement requirements go first (ignoring fixed location buffers for
750 * now). For example, objects needing the mappable aperture (the first
751 * 256M of GTT), should go first vs objects that can be placed just
752 * about anywhere. Repeat the previous pass.
753 *
754 * 2. Consider buffers that are pinned at a fixed location. Also try to
755 * evict the entire VM this time, leaving only objects that we were
756 * unable to lock. Try again to bind the buffers. (still using the new
757 * buffer order).
758 *
759 * 3. We likely have object lock contention for one or more stubborn
760 * objects in the VM, for which we need to evict to make forward
761 * progress (perhaps we are fighting the shrinker?). When evicting the
762 * VM this time around, anything that we can't lock we now track using
763 * the busy_bo, using the full lock (after dropping the vm->mutex to
764 * prevent deadlocks), instead of trylock. We then continue to evict the
765 * VM, this time with the stubborn object locked, which we can now
766 * hopefully unbind (if still bound in the VM). Repeat until the VM is
767 * evicted. Finally we should be able bind everything.
768 */
769 for (pass = 0; pass <= 3; pass++) {
770 int pin_flags = PIN_USER(1ULL << (11)) | PIN_VALIDATE(1ULL << (8));
771
772 if (pass == 0)
773 pin_flags |= PIN_NONBLOCK(1ULL << (2));
774
775 if (pass >= 1)
776 unpinned = eb_unbind(eb, pass >= 2);
777
778 if (pass == 2) {
779 err = mutex_lock_interruptible(&eb->context->vm->mutex);
780 if (!err) {
781 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL((void *)0));
782 mutex_unlock(&eb->context->vm->mutex)rw_exit_write(&eb->context->vm->mutex);
783 }
784 if (err)
785 return err;
786 }
787
788 if (pass == 3) {
789retry:
790 err = mutex_lock_interruptible(&eb->context->vm->mutex);
791 if (!err) {
792 struct drm_i915_gem_object *busy_bo = NULL((void *)0);
793
794 err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
795 mutex_unlock(&eb->context->vm->mutex)rw_exit_write(&eb->context->vm->mutex);
796 if (err && busy_bo) {
797 err = i915_gem_object_lock(busy_bo, &eb->ww);
798 i915_gem_object_put(busy_bo);
799 if (!err)
800 goto retry;
801 }
802 }
803 if (err)
804 return err;
805 }
806
807 list_for_each_entry(ev, &eb->unbound, bind_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->bind_link
) *__mptr = ((&eb->unbound)->next); (__typeof(*ev)
*)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), bind_link
) );}); &ev->bind_link != (&eb->unbound); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->bind_link ) *__mptr
= (ev->bind_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), bind_link) );}))
{
808 err = eb_reserve_vma(eb, ev, pin_flags);
809 if (err)
810 break;
811 }
812
813 if (err != -ENOSPC28)
814 break;
815 }
816
817 return err;
818}
819
820static int eb_select_context(struct i915_execbuffer *eb)
821{
822 struct i915_gem_context *ctx;
823
824 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
825 if (unlikely(IS_ERR(ctx))__builtin_expect(!!(IS_ERR(ctx)), 0))
826 return PTR_ERR(ctx);
827
828 eb->gem_context = ctx;
829 if (i915_gem_context_has_full_ppgtt(ctx))
830 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT(1<<1);
831
832 return 0;
833}
834
835static int __eb_add_lut(struct i915_execbuffer *eb,
836 u32 handle, struct i915_vma *vma)
837{
838 struct i915_gem_context *ctx = eb->gem_context;
839 struct i915_lut_handle *lut;
840 int err;
841
842 lut = i915_lut_handle_alloc();
843 if (unlikely(!lut)__builtin_expect(!!(!lut), 0))
844 return -ENOMEM12;
845
846 i915_vma_get(vma);
847 if (!atomic_fetch_inc(&vma->open_count)__sync_fetch_and_add(&vma->open_count, 1))
848 i915_vma_reopen(vma);
849 lut->handle = handle;
850 lut->ctx = ctx;
851
852 /* Check that the context hasn't been closed in the meantime */
853 err = -EINTR4;
854 if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
855 if (likely(!i915_gem_context_is_closed(ctx))__builtin_expect(!!(!i915_gem_context_is_closed(ctx)), 1))
856 err = radix_tree_insert(&ctx->handles_vma, handle, vma);
857 else
858 err = -ENOENT2;
859 if (err == 0) { /* And nor has this handle */
860 struct drm_i915_gem_object *obj = vma->obj;
861
862 spin_lock(&obj->lut_lock)mtx_enter(&obj->lut_lock);
863 if (idr_find(&eb->file->object_idr, handle) == obj) {
864 list_add(&lut->obj_link, &obj->lut_list);
865 } else {
866 radix_tree_delete(&ctx->handles_vma, handle);
867 err = -ENOENT2;
868 }
869 spin_unlock(&obj->lut_lock)mtx_leave(&obj->lut_lock);
870 }
871 mutex_unlock(&ctx->lut_mutex)rw_exit_write(&ctx->lut_mutex);
872 }
873 if (unlikely(err)__builtin_expect(!!(err), 0))
874 goto err;
875
876 return 0;
877
878err:
879 i915_vma_close(vma);
880 i915_vma_put(vma);
881 i915_lut_handle_free(lut);
882 return err;
883}
884
885static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
886{
887 struct i915_address_space *vm = eb->context->vm;
888
889 do {
890 struct drm_i915_gem_object *obj;
891 struct i915_vma *vma;
892 int err;
893
894 rcu_read_lock();
895 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
896 if (likely(vma && vma->vm == vm)__builtin_expect(!!(vma && vma->vm == vm), 1))
897 vma = i915_vma_tryget(vma);
898 rcu_read_unlock();
899 if (likely(vma)__builtin_expect(!!(vma), 1))
900 return vma;
901
902 obj = i915_gem_object_lookup(eb->file, handle);
903 if (unlikely(!obj)__builtin_expect(!!(!obj), 0))
904 return ERR_PTR(-ENOENT2);
905
906 /*
907 * If the user has opted-in for protected-object tracking, make
908 * sure the object encryption can be used.
909 * We only need to do this when the object is first used with
910 * this context, because the context itself will be banned when
911 * the protected objects become invalid.
912 */
913 if (i915_gem_context_uses_protected_content(eb->gem_context) &&
914 i915_gem_object_is_protected(obj)) {
915 err = intel_pxp_key_check(&vm->gt->pxp, obj, true1);
916 if (err) {
917 i915_gem_object_put(obj);
918 return ERR_PTR(err);
919 }
920 }
921
922 vma = i915_vma_instance(obj, vm, NULL((void *)0));
923 if (IS_ERR(vma)) {
924 i915_gem_object_put(obj);
925 return vma;
926 }
927
928 err = __eb_add_lut(eb, handle, vma);
929 if (likely(!err)__builtin_expect(!!(!err), 1))
930 return vma;
931
932 i915_gem_object_put(obj);
933 if (err != -EEXIST17)
934 return ERR_PTR(err);
935 } while (1);
936}
937
938static int eb_lookup_vmas(struct i915_execbuffer *eb)
939{
940 unsigned int i, current_batch = 0;
941 int err = 0;
942
943 INIT_LIST_HEAD(&eb->relocs);
944
945 for (i = 0; i < eb->buffer_count; i++) {
946 struct i915_vma *vma;
947
948 vma = eb_lookup_vma(eb, eb->exec[i].handle);
949 if (IS_ERR(vma)) {
950 err = PTR_ERR(vma);
951 goto err;
952 }
953
954 err = eb_validate_vma(eb, &eb->exec[i], vma);
955 if (unlikely(err)__builtin_expect(!!(err), 0)) {
956 i915_vma_put(vma);
957 goto err;
958 }
959
960 err = eb_add_vma(eb, &current_batch, i, vma);
961 if (err)
962 return err;
963
964 if (i915_gem_object_is_userptr(vma->obj)) {
965 err = i915_gem_object_userptr_submit_init(vma->obj);
966 if (err) {
967 if (i + 1 < eb->buffer_count) {
968 /*
969 * Execbuffer code expects last vma entry to be NULL,
970 * since we already initialized this entry,
971 * set the next value to NULL or we mess up
972 * cleanup handling.
973 */
974 eb->vma[i + 1].vma = NULL((void *)0);
975 }
976
977 return err;
978 }
979
980 eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT(1UL << (28));
981 eb->args->flags |= __EXEC_USERPTR_USED(1UL << (29));
982 }
983 }
984
985 return 0;
986
987err:
988 eb->vma[i].vma = NULL((void *)0);
989 return err;
990}
991
992static int eb_lock_vmas(struct i915_execbuffer *eb)
993{
994 unsigned int i;
995 int err;
996
997 for (i = 0; i < eb->buffer_count; i++) {
998 struct eb_vma *ev = &eb->vma[i];
999 struct i915_vma *vma = ev->vma;
1000
1001 err = i915_gem_object_lock(vma->obj, &eb->ww);
1002 if (err)
1003 return err;
1004 }
1005
1006 return 0;
1007}
1008
1009static int eb_validate_vmas(struct i915_execbuffer *eb)
1010{
1011 unsigned int i;
1012 int err;
1013
1014 INIT_LIST_HEAD(&eb->unbound);
1015
1016 err = eb_lock_vmas(eb);
1017 if (err)
1018 return err;
1019
1020 for (i = 0; i < eb->buffer_count; i++) {
1021 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1022 struct eb_vma *ev = &eb->vma[i];
1023 struct i915_vma *vma = ev->vma;
1024
1025 err = eb_pin_vma(eb, entry, ev);
1026 if (err == -EDEADLK11)
1027 return err;
1028
1029 if (!err) {
1030 if (entry->offset != vma->node.start) {
1031 entry->offset = vma->node.start | UPDATE(1ULL << (7));
1032 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
1033 }
1034 } else {
1035 eb_unreserve_vma(ev);
1036
1037 list_add_tail(&ev->bind_link, &eb->unbound);
1038 if (drm_mm_node_allocated(&vma->node)) {
1039 err = i915_vma_unbind(vma);
1040 if (err)
1041 return err;
1042 }
1043 }
1044
1045 /* Reserve enough slots to accommodate composite fences */
1046 err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
1047 if (err)
1048 return err;
1049
1050 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&((void)0)
1051 eb_vma_misplaced(&eb->exec[i], vma, ev->flags))((void)0);
1052 }
1053
1054 if (!list_empty(&eb->unbound))
1055 return eb_reserve(eb);
1056
1057 return 0;
1058}
1059
1060static struct eb_vma *
1061eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
1062{
1063 if (eb->lut_size < 0) {
1064 if (handle >= -eb->lut_size)
1065 return NULL((void *)0);
1066 return &eb->vma[handle];
1067 } else {
1068 struct hlist_head *head;
1069 struct eb_vma *ev;
1070
1071 head = &eb->buckets[hash_32(handle, eb->lut_size)];
1072 hlist_for_each_entry(ev, head, node)for (ev = (((head)->first) ? ({ const __typeof( ((__typeof
(*ev) *)0)->node ) *__mptr = ((head)->first); (__typeof
(*ev) *)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), node
) );}) : ((void *)0)); ev != ((void *)0); ev = (((ev)->node
.next) ? ({ const __typeof( ((__typeof(*ev) *)0)->node ) *
__mptr = ((ev)->node.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), node) );}) : ((void *)0)
))
{
1073 if (ev->handle == handle)
1074 return ev;
1075 }
1076 return NULL((void *)0);
1077 }
1078}
1079
1080static void eb_release_vmas(struct i915_execbuffer *eb, bool_Bool final)
1081{
1082 const unsigned int count = eb->buffer_count;
1083 unsigned int i;
1084
1085 for (i = 0; i < count; i++) {
1086 struct eb_vma *ev = &eb->vma[i];
1087 struct i915_vma *vma = ev->vma;
1088
1089 if (!vma)
1090 break;
1091
1092 eb_unreserve_vma(ev);
1093
1094 if (final)
1095 i915_vma_put(vma);
1096 }
1097
1098 eb_capture_release(eb);
1099 eb_unpin_engine(eb);
1100}
1101
1102static void eb_destroy(const struct i915_execbuffer *eb)
1103{
1104 if (eb->lut_size > 0)
1105 kfree(eb->buckets);
1106}
1107
1108static inline u64
1109relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
1110 const struct i915_vma *target)
1111{
1112 return gen8_canonical_addr((int)reloc->delta + target->node.start);
1113}
1114
1115static void reloc_cache_init(struct reloc_cache *cache,
1116 struct drm_i915_privateinteldrm_softc *i915)
1117{
1118 cache->page = -1;
1119 cache->vaddr = 0;
1120 /* Must be a variable in the struct to allow GCC to unroll. */
1121 cache->graphics_ver = GRAPHICS_VER(i915)((&(i915)->__runtime)->graphics.ip.ver);
1122 cache->has_llc = HAS_LLC(i915)((&(i915)->__info)->has_llc);
1123 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915)((&(i915)->__info)->has_64bit_reloc);
1124 cache->has_fence = cache->graphics_ver < 4;
1125 cache->needs_unfenced = INTEL_INFO(i915)(&(i915)->__info)->unfenced_needs_alignment;
1126 cache->node.flags = 0;
1127
1128 cache->map = i915->agph;
1129 cache->iot = i915->bst;
1130}
1131
1132static inline void *unmask_page(unsigned long p)
1133{
1134 return (void *)(uintptr_t)(p & LINUX_PAGE_MASK(~((1 << 12) - 1)));
1135}
1136
1137static inline unsigned int unmask_flags(unsigned long p)
1138{
1139 return p & ~LINUX_PAGE_MASK(~((1 << 12) - 1));
1140}
1141
1142#define KMAP0x4 0x4 /* after CLFLUSH_FLAGS */
1143
1144static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
1145{
1146 struct drm_i915_privateinteldrm_softc *i915 =
1147 container_of(cache, struct i915_execbuffer, reloc_cache)({ const __typeof( ((struct i915_execbuffer *)0)->reloc_cache
) *__mptr = (cache); (struct i915_execbuffer *)( (char *)__mptr
- __builtin_offsetof(struct i915_execbuffer, reloc_cache) );
})
->i915;
1148 return to_gt(i915)->ggtt;
1149}
1150
1151static void reloc_cache_unmap(struct reloc_cache *cache)
1152{
1153 void *vaddr;
1154
1155 if (!cache->vaddr)
1156 return;
1157
1158 vaddr = unmask_page(cache->vaddr);
1159 if (cache->vaddr & KMAP0x4)
1160 kunmap_atomic(vaddr);
1161 else
1162#ifdef __linux__
1163 io_mapping_unmap_atomic((void __iomem *)vaddr);
1164#else
1165 agp_unmap_atomic(cache->map, cache->ioh);
1166#endif
1167}
1168
1169static void reloc_cache_remap(struct reloc_cache *cache,
1170 struct drm_i915_gem_object *obj)
1171{
1172 void *vaddr;
1173
1174 if (!cache->vaddr)
1175 return;
1176
1177 if (cache->vaddr & KMAP0x4) {
1178 struct vm_page *page = i915_gem_object_get_page(obj, cache->page);
1179
1180 vaddr = kmap_atomic(page);
1181 cache->vaddr = unmask_flags(cache->vaddr) |
1182 (unsigned long)vaddr;
1183 } else {
1184 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1185 unsigned long offset;
1186
1187 offset = cache->node.start;
1188 if (!drm_mm_node_allocated(&cache->node))
1189 offset += cache->page << PAGE_SHIFT12;
1190
1191#ifdef __linux__
1192 cache->vaddr = (unsigned long)
1193 io_mapping_map_atomic_wc(&ggtt->iomap, offset);
1194#else
1195 agp_map_atomic(cache->map, offset, &cache->ioh);
1196 cache->vaddr = (unsigned long)
1197 bus_space_vaddr(cache->iot, cache->ioh)((cache->iot)->vaddr((cache->ioh)));
1198#endif
1199 }
1200}
1201
1202static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
1203{
1204 void *vaddr;
1205
1206 if (!cache->vaddr)
1207 return;
1208
1209 vaddr = unmask_page(cache->vaddr);
1210 if (cache->vaddr & KMAP0x4) {
1211 struct drm_i915_gem_object *obj =
1212 (struct drm_i915_gem_object *)cache->node.mm;
1213 if (cache->vaddr & CLFLUSH_AFTER(1UL << (1)))
1214 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1215
1216 kunmap_atomic(vaddr);
1217 i915_gem_object_finish_access(obj);
1218 } else {
1219 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1220
1221 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1222#ifdef __linux__
1223 io_mapping_unmap_atomic((void __iomem *)vaddr);
1224#else
1225 agp_unmap_atomic(cache->map, cache->ioh);
1226#endif
1227
1228 if (drm_mm_node_allocated(&cache->node)) {
1229 ggtt->vm.clear_range(&ggtt->vm,
1230 cache->node.start,
1231 cache->node.size);
1232 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1233 drm_mm_remove_node(&cache->node);
1234 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1235 } else {
1236 i915_vma_unpin((struct i915_vma *)cache->node.mm);
1237 }
1238 }
1239
1240 cache->vaddr = 0;
1241 cache->page = -1;
1242}
1243
1244static void *reloc_kmap(struct drm_i915_gem_object *obj,
1245 struct reloc_cache *cache,
1246 unsigned long pageno)
1247{
1248 void *vaddr;
1249 struct vm_page *page;
1250
1251 if (cache->vaddr) {
1252 kunmap_atomic(unmask_page(cache->vaddr));
1253 } else {
1254 unsigned int flushes;
1255 int err;
1256
1257 err = i915_gem_object_prepare_write(obj, &flushes);
1258 if (err)
1259 return ERR_PTR(err);
1260
1261 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS)extern char _ctassert[(!(0x4 & ((1UL << (0)) | (1UL
<< (1))))) ? 1 : -1 ] __attribute__((__unused__))
;
1262 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK)extern char _ctassert[(!((0x4 | ((1UL << (0)) | (1UL <<
(1)))) & (~((1 << 12) - 1)))) ? 1 : -1 ] __attribute__
((__unused__))
;
1263
1264 cache->vaddr = flushes | KMAP0x4;
1265 cache->node.mm = (void *)obj;
1266 if (flushes)
1267 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1268 }
1269
1270 page = i915_gem_object_get_page(obj, pageno);
1271 if (!obj->mm.dirty)
1272 set_page_dirty(page)x86_atomic_clearbits_u32(&page->pg_flags, 0x00000008);
1273
1274 vaddr = kmap_atomic(page);
1275 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1276 cache->page = pageno;
1277
1278 return vaddr;
1279}
1280
1281static void *reloc_iomap(struct i915_vma *batch,
1282 struct i915_execbuffer *eb,
1283 unsigned long page)
1284{
1285 struct drm_i915_gem_object *obj = batch->obj;
1286 struct reloc_cache *cache = &eb->reloc_cache;
1287 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1288 unsigned long offset;
1289 void *vaddr;
1290
1291 if (cache->vaddr) {
1292 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1293#ifdef __linux__
1294 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1295#else
1296 agp_unmap_atomic(cache->map, cache->ioh);
1297#endif
1298 } else {
1299 struct i915_vma *vma = ERR_PTR(-ENODEV19);
1300 int err;
1301
1302 if (i915_gem_object_is_tiled(obj))
1303 return ERR_PTR(-EINVAL22);
1304
1305 if (use_cpu_reloc(cache, obj))
1306 return NULL((void *)0);
1307
1308 err = i915_gem_object_set_to_gtt_domain(obj, true1);
1309 if (err)
1310 return ERR_PTR(err);
1311
1312 /*
1313 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
1314 * VMA from the object list because we no longer pin.
1315 *
1316 * Only attempt to pin the batch buffer to ggtt if the current batch
1317 * is not inside ggtt, or the batch buffer is not misplaced.
1318 */
1319 if (!i915_is_ggtt(batch->vm)((batch->vm)->is_ggtt) ||
1320 !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE(1ULL << (3)))) {
1321 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL((void *)0), 0, 0,
1322 PIN_MAPPABLE(1ULL << (3)) |
1323 PIN_NONBLOCK(1ULL << (2)) /* NOWARN */ |
1324 PIN_NOEVICT(1ULL << (0)));
1325 }
1326
1327 if (vma == ERR_PTR(-EDEADLK11))
1328 return vma;
1329
1330 if (IS_ERR(vma)) {
1331 memset(&cache->node, 0, sizeof(cache->node))__builtin_memset((&cache->node), (0), (sizeof(cache->
node)))
;
1332 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1333 err = drm_mm_insert_node_in_range
1334 (&ggtt->vm.mm, &cache->node,
1335 PAGE_SIZE(1 << 12), 0, I915_COLOR_UNEVICTABLE(-1),
1336 0, ggtt->mappable_end,
1337 DRM_MM_INSERT_LOW);
1338 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1339 if (err) /* no inactive aperture space, use cpu reloc */
1340 return NULL((void *)0);
1341 } else {
1342 cache->node.start = vma->node.start;
1343 cache->node.mm = (void *)vma;
1344 }
1345 }
1346
1347 offset = cache->node.start;
1348 if (drm_mm_node_allocated(&cache->node)) {
1349 ggtt->vm.insert_page(&ggtt->vm,
1350 i915_gem_object_get_dma_address(obj, page),
1351 offset, I915_CACHE_NONE, 0);
1352 } else {
1353 offset += page << PAGE_SHIFT12;
1354 }
1355
1356#ifdef __linux__
1357 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1358 offset);
1359#else
1360 agp_map_atomic(cache->map, offset, &cache->ioh);
1361 vaddr = bus_space_vaddr(cache->iot, cache->ioh)((cache->iot)->vaddr((cache->ioh)));
1362#endif
1363 cache->page = page;
1364 cache->vaddr = (unsigned long)vaddr;
1365
1366 return vaddr;
1367}
1368
1369static void *reloc_vaddr(struct i915_vma *vma,
1370 struct i915_execbuffer *eb,
1371 unsigned long page)
1372{
1373 struct reloc_cache *cache = &eb->reloc_cache;
1374 void *vaddr;
1375
1376 if (cache->page == page) {
1377 vaddr = unmask_page(cache->vaddr);
1378 } else {
1379 vaddr = NULL((void *)0);
1380 if ((cache->vaddr & KMAP0x4) == 0)
1381 vaddr = reloc_iomap(vma, eb, page);
1382 if (!vaddr)
1383 vaddr = reloc_kmap(vma->obj, cache, page);
1384 }
1385
1386 return vaddr;
1387}
1388
1389static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1390{
1391 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))__builtin_expect(!!(flushes & ((1UL << (0)) | (1UL <<
(1)))), 0)
) {
1392 if (flushes & CLFLUSH_BEFORE(1UL << (0)))
1393 drm_clflush_virt_range(addr, sizeof(*addr));
1394
1395 *addr = value;
1396
1397 /*
1398 * Writes to the same cacheline are serialised by the CPU
1399 * (including clflush). On the write path, we only require
1400 * that it hits memory in an orderly fashion and place
1401 * mb barriers at the start and end of the relocation phase
1402 * to ensure ordering of clflush wrt to the system.
1403 */
1404 if (flushes & CLFLUSH_AFTER(1UL << (1)))
1405 drm_clflush_virt_range(addr, sizeof(*addr));
1406 } else
1407 *addr = value;
1408}
1409
1410static u64
1411relocate_entry(struct i915_vma *vma,
1412 const struct drm_i915_gem_relocation_entry *reloc,
1413 struct i915_execbuffer *eb,
1414 const struct i915_vma *target)
1415{
1416 u64 target_addr = relocation_target(reloc, target);
1417 u64 offset = reloc->offset;
1418 bool_Bool wide = eb->reloc_cache.use_64bit_reloc;
1419 void *vaddr;
1420
1421repeat:
1422 vaddr = reloc_vaddr(vma, eb,
1423 offset >> PAGE_SHIFT12);
1424 if (IS_ERR(vaddr))
1425 return PTR_ERR(vaddr);
1426
1427 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)))((void)0);
1428 clflush_write32(vaddr + offset_in_page(offset)((vaddr_t)(offset) & ((1 << 12) - 1)),
1429 lower_32_bits(target_addr)((u32)(target_addr)),
1430 eb->reloc_cache.vaddr);
1431
1432 if (wide) {
1433 offset += sizeof(u32);
1434 target_addr >>= 32;
1435 wide = false0;
1436 goto repeat;
1437 }
1438
1439 return target->node.start | UPDATE(1ULL << (7));
1440}
1441
1442static u64
1443eb_relocate_entry(struct i915_execbuffer *eb,
1444 struct eb_vma *ev,
1445 const struct drm_i915_gem_relocation_entry *reloc)
1446{
1447 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
1448 struct eb_vma *target;
1449 int err;
1450
1451 /* we've already hold a reference to all valid objects */
1452 target = eb_get_vma(eb, reloc->target_handle);
1453 if (unlikely(!target)__builtin_expect(!!(!target), 0))
1454 return -ENOENT2;
1455
1456 /* Validate that the target is in a valid r/w GPU domain */
1457 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))__builtin_expect(!!(reloc->write_domain & (reloc->write_domain
- 1)), 0)
) {
1458 drm_dbg(&i915->drm, "reloc with multiple write domains: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1459 "target %d offset %d "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1460 "read %08x write %08x",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1461 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1462 (int) reloc->offset,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1463 reloc->read_domains,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1464 reloc->write_domain)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1465 return -EINVAL22;
1466 }
1467 if (unlikely((reloc->write_domain | reloc->read_domains)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
1468 & ~I915_GEM_GPU_DOMAINS)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
) {
1469 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1470 "target %d offset %d "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1471 "read %08x write %08x",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1472 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1473 (int) reloc->offset,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1474 reloc->read_domains,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1475 reloc->write_domain)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1476 return -EINVAL22;
1477 }
1478
1479 if (reloc->write_domain) {
1480 target->flags |= EXEC_OBJECT_WRITE(1<<2);
1481
1482 /*
1483 * Sandybridge PPGTT errata: We need a global gtt mapping
1484 * for MI and pipe_control writes because the gpu doesn't
1485 * properly redirect them through the ppgtt for non_secure
1486 * batchbuffers.
1487 */
1488 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION0x00000010 &&
1489 GRAPHICS_VER(eb->i915)((&(eb->i915)->__runtime)->graphics.ip.ver) == 6 &&
1490 !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND((int)(1UL << (10))))) {
1491 struct i915_vma *vma = target->vma;
1492
1493 reloc_cache_unmap(&eb->reloc_cache);
1494 mutex_lock(&vma->vm->mutex)rw_enter_write(&vma->vm->mutex);
1495 err = i915_vma_bind(target->vma,
1496 target->vma->obj->cache_level,
1497 PIN_GLOBAL(1ULL << (10)), NULL((void *)0), NULL((void *)0));
1498 mutex_unlock(&vma->vm->mutex)rw_exit_write(&vma->vm->mutex);
1499 reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
1500 if (err)
1501 return err;
1502 }
1503 }
1504
1505 /*
1506 * If the relocation already has the right value in it, no
1507 * more work needs to be done.
1508 */
1509 if (!DBG_FORCE_RELOC0 &&
1510 gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
1511 return 0;
1512
1513 /* Check that the relocation address is valid... */
1514 if (unlikely(reloc->offset >__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
1515 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
) {
1516 drm_dbg(&i915->drm, "Relocation beyond object bounds: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1517 "target %d offset %d size %d.\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1518 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1519 (int)reloc->offset,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1520 (int)ev->vma->size)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
;
1521 return -EINVAL22;
1522 }
1523 if (unlikely(reloc->offset & 3)__builtin_expect(!!(reloc->offset & 3), 0)) {
1524 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1525 "target %d offset %d.\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1526 reloc->target_handle,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1527 (int)reloc->offset)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
;
1528 return -EINVAL22;
1529 }
1530
1531 /*
1532 * If we write into the object, we need to force the synchronisation
1533 * barrier, either with an asynchronous clflush or if we executed the
1534 * patching using the GPU (though that should be serialised by the
1535 * timeline). To be completely sure, and since we are required to
1536 * do relocations we are already stalling, disable the user's opt
1537 * out of our synchronisation.
1538 */
1539 ev->flags &= ~EXEC_OBJECT_ASYNC(1<<6);
1540
1541 /* and update the user's relocation entry */
1542 return relocate_entry(ev->vma, reloc, eb, target->vma);
1543}
1544
1545static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
1546{
1547#define N_RELOC(x)((x) / sizeof(struct drm_i915_gem_relocation_entry)) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1548 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)((512) / sizeof(struct drm_i915_gem_relocation_entry))];
1549 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1550 struct drm_i915_gem_relocation_entry __user *urelocs =
1551 u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1552 unsigned long remain = entry->relocation_count;
1553
1554 if (unlikely(remain > N_RELOC(ULONG_MAX))__builtin_expect(!!(remain > ((0xffffffffffffffffUL) / sizeof
(struct drm_i915_gem_relocation_entry))), 0)
)
1555 return -EINVAL22;
1556
1557 /*
1558 * We must check that the entire relocation array is safe
1559 * to read. However, if the array is not writable the user loses
1560 * the updated relocation values.
1561 */
1562 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))__builtin_expect(!!(!access_ok(urelocs, remain * sizeof(*urelocs
))), 0)
)
1563 return -EFAULT14;
1564
1565 do {
1566 struct drm_i915_gem_relocation_entry *r = stack;
1567 unsigned int count =
1568 min_t(unsigned long, remain, ARRAY_SIZE(stack))({ unsigned long __min_a = (remain); unsigned long __min_b = (
(sizeof((stack)) / sizeof((stack)[0]))); __min_a < __min_b
? __min_a : __min_b; })
;
1569 unsigned int copied;
1570
1571 /*
1572 * This is the fast path and we cannot handle a pagefault
1573 * whilst holding the struct mutex lest the user pass in the
1574 * relocations contained within a mmaped bo. For in such a case
1575 * we, the page fault handler would call i915_gem_fault() and
1576 * we would try to acquire the struct mutex again. Obviously
1577 * this is bad and so lockdep complains vehemently.
1578 */
1579 pagefault_disable();
1580 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1581 pagefault_enable();
1582 if (unlikely(copied)__builtin_expect(!!(copied), 0)) {
1583 remain = -EFAULT14;
1584 goto out;
1585 }
1586
1587 remain -= count;
1588 do {
1589 u64 offset = eb_relocate_entry(eb, ev, r);
1590
1591 if (likely(offset == 0)__builtin_expect(!!(offset == 0), 1)) {
1592 } else if ((s64)offset < 0) {
1593 remain = (int)offset;
1594 goto out;
1595 } else {
1596 /*
1597 * Note that reporting an error now
1598 * leaves everything in an inconsistent
1599 * state as we have *already* changed
1600 * the relocation value inside the
1601 * object. As we have not changed the
1602 * reloc.presumed_offset or will not
1603 * change the execobject.offset, on the
1604 * call we may not rewrite the value
1605 * inside the object, leaving it
1606 * dangling and causing a GPU hang. Unless
1607 * userspace dynamically rebuilds the
1608 * relocations on each execbuf rather than
1609 * presume a static tree.
1610 *
1611 * We did previously check if the relocations
1612 * were writable (access_ok), an error now
1613 * would be a strange race with mprotect,
1614 * having already demonstrated that we
1615 * can read from this userspace address.
1616 */
1617 offset = gen8_canonical_addr(offset & ~UPDATE(1ULL << (7)));
1618 __put_user(offset,({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
1619 &urelocs[r - stack].presumed_offset)({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
;
1620 }
1621 } while (r++, --count);
1622 urelocs += ARRAY_SIZE(stack)(sizeof((stack)) / sizeof((stack)[0]));
1623 } while (remain);
1624out:
1625 reloc_cache_reset(&eb->reloc_cache, eb);
1626 return remain;
1627}
1628
1629static int
1630eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
1631{
1632 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1633 struct drm_i915_gem_relocation_entry *relocs =
1634 u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
1635 unsigned int i;
1636 int err;
1637
1638 for (i = 0; i < entry->relocation_count; i++) {
1639 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
1640
1641 if ((s64)offset < 0) {
1642 err = (int)offset;
1643 goto err;
1644 }
1645 }
1646 err = 0;
1647err:
1648 reloc_cache_reset(&eb->reloc_cache, eb);
1649 return err;
1650}
1651
1652static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1653{
1654 const char __user *addr, *end;
1655 unsigned long size;
1656 char __maybe_unused__attribute__((__unused__)) c;
1657
1658 size = entry->relocation_count;
1659 if (size == 0)
1660 return 0;
1661
1662 if (size > N_RELOC(ULONG_MAX)((0xffffffffffffffffUL) / sizeof(struct drm_i915_gem_relocation_entry
))
)
1663 return -EINVAL22;
1664
1665 addr = u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1666 size *= sizeof(struct drm_i915_gem_relocation_entry);
1667 if (!access_ok(addr, size))
1668 return -EFAULT14;
1669
1670 end = addr + size;
1671 for (; addr < end; addr += PAGE_SIZE(1 << 12)) {
1672 int err = __get_user(c, addr)-copyin((addr), &((c)), sizeof((c)));
1673 if (err)
1674 return err;
1675 }
1676 return __get_user(c, end - 1)-copyin((end - 1), &((c)), sizeof((c)));
1677}
1678
1679static int eb_copy_relocations(const struct i915_execbuffer *eb)
1680{
1681 struct drm_i915_gem_relocation_entry *relocs;
1682 const unsigned int count = eb->buffer_count;
1683 unsigned int i;
1684 int err;
1685
1686 for (i = 0; i < count; i++) {
1687 const unsigned int nreloc = eb->exec[i].relocation_count;
1688 struct drm_i915_gem_relocation_entry __user *urelocs;
1689 unsigned long size;
1690 unsigned long copied;
1691
1692 if (nreloc == 0)
1693 continue;
1694
1695 err = check_relocations(&eb->exec[i]);
1696 if (err)
1697 goto err;
1698
1699 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr)((void *)(uintptr_t)(eb->exec[i].relocs_ptr));
1700 size = nreloc * sizeof(*relocs);
1701
1702 relocs = kvmalloc_array(size, 1, GFP_KERNEL(0x0001 | 0x0004));
1703 if (!relocs) {
1704 err = -ENOMEM12;
1705 goto err;
1706 }
1707
1708 /* copy_from_user is limited to < 4GiB */
1709 copied = 0;
1710 do {
1711 unsigned int len =
1712 min_t(u64, BIT_ULL(31), size - copied)({ u64 __min_a = ((1ULL << (31))); u64 __min_b = (size -
copied); __min_a < __min_b ? __min_a : __min_b; })
;
1713
1714 if (__copy_from_user((char *)relocs + copied,
1715 (char __user *)urelocs + copied,
1716 len))
1717 goto end;
1718
1719 copied += len;
1720 } while (copied < size);
1721
1722 /*
1723 * As we do not update the known relocation offsets after
1724 * relocating (due to the complexities in lock handling),
1725 * we need to mark them as invalid now so that we force the
1726 * relocation processing next time. Just in case the target
1727 * object is evicted and then rebound into its old
1728 * presumed_offset before the next execbuffer - if that
1729 * happened we would make the mistake of assuming that the
1730 * relocations were valid.
1731 */
1732 if (!user_access_begin(urelocs, size)access_ok(urelocs, size))
1733 goto end;
1734
1735 for (copied = 0; copied < nreloc; copied++)
1736 unsafe_put_user(-1,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1737 &urelocs[copied].presumed_offset,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1738 end_user)({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
;
1739 user_access_end();
1740
1741 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1742 }
1743
1744 return 0;
1745
1746end_user:
1747 user_access_end();
1748end:
1749 kvfree(relocs);
1750 err = -EFAULT14;
1751err:
1752 while (i--) {
1753 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(eb->exec[i].relocs_ptr
); })
;
1754 if (eb->exec[i].relocation_count)
1755 kvfree(relocs);
1756 }
1757 return err;
1758}
1759
1760static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1761{
1762 const unsigned int count = eb->buffer_count;
1763 unsigned int i;
1764
1765 for (i = 0; i < count; i++) {
1766 int err;
1767
1768 err = check_relocations(&eb->exec[i]);
1769 if (err)
1770 return err;
1771 }
1772
1773 return 0;
1774}
1775
1776static int eb_reinit_userptr(struct i915_execbuffer *eb)
1777{
1778 const unsigned int count = eb->buffer_count;
1779 unsigned int i;
1780 int ret;
1781
1782 if (likely(!(eb->args->flags & __EXEC_USERPTR_USED))__builtin_expect(!!(!(eb->args->flags & (1UL <<
(29)))), 1)
)
1783 return 0;
1784
1785 for (i = 0; i < count; i++) {
1786 struct eb_vma *ev = &eb->vma[i];
1787
1788 if (!i915_gem_object_is_userptr(ev->vma->obj))
1789 continue;
1790
1791 ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
1792 if (ret)
1793 return ret;
1794
1795 ev->flags |= __EXEC_OBJECT_USERPTR_INIT(1UL << (28));
1796 }
1797
1798 return 0;
1799}
1800
1801static noinline__attribute__((__noinline__)) int eb_relocate_parse_slow(struct i915_execbuffer *eb)
1802{
1803 bool_Bool have_copy = false0;
1804 struct eb_vma *ev;
1805 int err = 0;
1806
1807repeat:
1808 if (signal_pending(current)(((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" :
"=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_curproc)->p_siglist | (({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p
->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_curproc)->p_sigmask)
) {
1809 err = -ERESTARTSYS4;
1810 goto out;
1811 }
1812
1813 /* We may process another execbuffer during the unlock... */
1814 eb_release_vmas(eb, false0);
1815 i915_gem_ww_ctx_fini(&eb->ww);
1816
1817 /*
1818 * We take 3 passes through the slowpatch.
1819 *
1820 * 1 - we try to just prefault all the user relocation entries and
1821 * then attempt to reuse the atomic pagefault disabled fast path again.
1822 *
1823 * 2 - we copy the user entries to a local buffer here outside of the
1824 * local and allow ourselves to wait upon any rendering before
1825 * relocations
1826 *
1827 * 3 - we already have a local copy of the relocation entries, but
1828 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1829 */
1830 if (!err) {
1831 err = eb_prefault_relocations(eb);
1832 } else if (!have_copy) {
1833 err = eb_copy_relocations(eb);
1834 have_copy = err == 0;
1835 } else {
1836 cond_resched()do { if (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0"
: "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_schedstate.spc_schedflags & 0x0002) yield
(); } while (0)
;
1837 err = 0;
1838 }
1839
1840 if (!err)
1841 err = eb_reinit_userptr(eb);
1842
1843 i915_gem_ww_ctx_init(&eb->ww, true1);
1844 if (err)
1845 goto out;
1846
1847 /* reacquire the objects */
1848repeat_validate:
1849 err = eb_pin_engine(eb, false0);
1850 if (err)
1851 goto err;
1852
1853 err = eb_validate_vmas(eb);
1854 if (err)
1855 goto err;
1856
1857 GEM_BUG_ON(!eb->batches[0])((void)0);
1858
1859 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
1860 if (!have_copy) {
1861 err = eb_relocate_vma(eb, ev);
1862 if (err)
1863 break;
1864 } else {
1865 err = eb_relocate_vma_slow(eb, ev);
1866 if (err)
1867 break;
1868 }
1869 }
1870
1871 if (err == -EDEADLK11)
1872 goto err;
1873
1874 if (err && !have_copy)
1875 goto repeat;
1876
1877 if (err)
1878 goto err;
1879
1880 /* as last step, parse the command buffer */
1881 err = eb_parse(eb);
1882 if (err)
1883 goto err;
1884
1885 /*
1886 * Leave the user relocations as are, this is the painfully slow path,
1887 * and we want to avoid the complication of dropping the lock whilst
1888 * having buffers reserved in the aperture and so causing spurious
1889 * ENOSPC for random operations.
1890 */
1891
1892err:
1893 if (err == -EDEADLK11) {
1894 eb_release_vmas(eb, false0);
1895 err = i915_gem_ww_ctx_backoff(&eb->ww);
1896 if (!err)
1897 goto repeat_validate;
1898 }
1899
1900 if (err == -EAGAIN35)
1901 goto repeat;
1902
1903out:
1904 if (have_copy) {
1905 const unsigned int count = eb->buffer_count;
1906 unsigned int i;
1907
1908 for (i = 0; i < count; i++) {
1909 const struct drm_i915_gem_exec_object2 *entry =
1910 &eb->exec[i];
1911 struct drm_i915_gem_relocation_entry *relocs;
1912
1913 if (!entry->relocation_count)
1914 continue;
1915
1916 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
1917 kvfree(relocs);
1918 }
1919 }
1920
1921 return err;
1922}
1923
1924static int eb_relocate_parse(struct i915_execbuffer *eb)
1925{
1926 int err;
1927 bool_Bool throttle = true1;
1928
1929retry:
1930 err = eb_pin_engine(eb, throttle);
1931 if (err) {
1932 if (err != -EDEADLK11)
1933 return err;
1934
1935 goto err;
1936 }
1937
1938 /* only throttle once, even if we didn't need to throttle */
1939 throttle = false0;
1940
1941 err = eb_validate_vmas(eb);
1942 if (err == -EAGAIN35)
1943 goto slow;
1944 else if (err)
1945 goto err;
1946
1947 /* The objects are in their final locations, apply the relocations. */
1948 if (eb->args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
1949 struct eb_vma *ev;
1950
1951 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
1952 err = eb_relocate_vma(eb, ev);
1953 if (err)
1954 break;
1955 }
1956
1957 if (err == -EDEADLK11)
1958 goto err;
1959 else if (err)
1960 goto slow;
1961 }
1962
1963 if (!err)
1964 err = eb_parse(eb);
1965
1966err:
1967 if (err == -EDEADLK11) {
1968 eb_release_vmas(eb, false0);
1969 err = i915_gem_ww_ctx_backoff(&eb->ww);
1970 if (!err)
1971 goto retry;
1972 }
1973
1974 return err;
1975
1976slow:
1977 err = eb_relocate_parse_slow(eb);
1978 if (err)
1979 /*
1980 * If the user expects the execobject.offset and
1981 * reloc.presumed_offset to be an exact match,
1982 * as for using NO_RELOC, then we cannot update
1983 * the execobject.offset until we have completed
1984 * relocation.
1985 */
1986 eb->args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
1987
1988 return err;
1989}
1990
1991/*
1992 * Using two helper loops for the order of which requests / batches are created
1993 * and added the to backend. Requests are created in order from the parent to
1994 * the last child. Requests are added in the reverse order, from the last child
1995 * to parent. This is done for locking reasons as the timeline lock is acquired
1996 * during request creation and released when the request is added to the
1997 * backend. To make lockdep happy (see intel_context_timeline_lock) this must be
1998 * the ordering.
1999 */
2000#define for_each_batch_create_order(_eb, _i)for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i)) \
2001 for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i))
2002#define for_each_batch_add_order(_eb, _i)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i
))
\
2003 BUILD_BUG_ON(!typecheck(int, _i))extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
))
; \
2004 for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))
2005
2006static struct i915_request *
2007eb_find_first_request_added(struct i915_execbuffer *eb)
2008{
2009 int i;
2010
2011 for_each_batch_add_order(eb, i)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((i) = (eb)->num_batches - 1; (i) >= 0; --(i))
2012 if (eb->requests[i])
2013 return eb->requests[i];
2014
2015 GEM_BUG_ON("Request not found")((void)0);
2016
2017 return NULL((void *)0);
2018}
2019
2020#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1
2021
2022/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
2023static int eb_capture_stage(struct i915_execbuffer *eb)
2024{
2025 const unsigned int count = eb->buffer_count;
2026 unsigned int i = count, j;
2027
2028 while (i--) {
2029 struct eb_vma *ev = &eb->vma[i];
2030 struct i915_vma *vma = ev->vma;
2031 unsigned int flags = ev->flags;
2032
2033 if (!(flags & EXEC_OBJECT_CAPTURE(1<<7)))
2034 continue;
2035
2036 if (i915_gem_context_is_recoverable(eb->gem_context) &&
2037 (IS_DGFX(eb->i915)((&(eb->i915)->__info)->is_dgfx) || GRAPHICS_VER_FULL(eb->i915)(((&(eb->i915)->__runtime)->graphics.ip.ver) <<
8 | ((&(eb->i915)->__runtime)->graphics.ip.rel)
)
> IP_VER(12, 0)((12) << 8 | (0))))
2038 return -EINVAL22;
2039
2040 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2041 struct i915_capture_list *capture;
2042
2043 capture = kmalloc(sizeof(*capture), GFP_KERNEL(0x0001 | 0x0004));
2044 if (!capture)
2045 continue;
2046
2047 capture->next = eb->capture_lists[j];
2048 capture->vma_res = i915_vma_resource_get(vma->resource);
2049 eb->capture_lists[j] = capture;
2050 }
2051 }
2052
2053 return 0;
2054}
2055
2056/* Commit once we're in the critical path */
2057static void eb_capture_commit(struct i915_execbuffer *eb)
2058{
2059 unsigned int j;
2060
2061 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2062 struct i915_request *rq = eb->requests[j];
2063
2064 if (!rq)
2065 break;
2066
2067 rq->capture_list = eb->capture_lists[j];
2068 eb->capture_lists[j] = NULL((void *)0);
2069 }
2070}
2071
2072/*
2073 * Release anything that didn't get committed due to errors.
2074 * The capture_list will otherwise be freed at request retire.
2075 */
2076static void eb_capture_release(struct i915_execbuffer *eb)
2077{
2078 unsigned int j;
2079
2080 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2081 if (eb->capture_lists[j]) {
2082 i915_request_free_capture_list(eb->capture_lists[j]);
2083 eb->capture_lists[j] = NULL((void *)0);
2084 }
2085 }
2086}
2087
2088static void eb_capture_list_clear(struct i915_execbuffer *eb)
2089{
2090 memset(eb->capture_lists, 0, sizeof(eb->capture_lists))__builtin_memset((eb->capture_lists), (0), (sizeof(eb->
capture_lists)))
;
2091}
10
Returning without writing to 'eb->buckets', which participates in a condition later
2092
2093#else
2094
2095static int eb_capture_stage(struct i915_execbuffer *eb)
2096{
2097 return 0;
2098}
2099
2100static void eb_capture_commit(struct i915_execbuffer *eb)
2101{
2102}
2103
2104static void eb_capture_release(struct i915_execbuffer *eb)
2105{
2106}
2107
2108static void eb_capture_list_clear(struct i915_execbuffer *eb)
2109{
2110}
2111
2112#endif
2113
2114static int eb_move_to_gpu(struct i915_execbuffer *eb)
2115{
2116 const unsigned int count = eb->buffer_count;
2117 unsigned int i = count;
2118 int err = 0, j;
2119
2120 while (i--) {
2121 struct eb_vma *ev = &eb->vma[i];
2122 struct i915_vma *vma = ev->vma;
2123 unsigned int flags = ev->flags;
2124 struct drm_i915_gem_object *obj = vma->obj;
2125
2126 assert_vma_held(vma)do { (void)(&((vma)->obj->base.resv)->lock.base)
; } while(0)
;
2127
2128 /*
2129 * If the GPU is not _reading_ through the CPU cache, we need
2130 * to make sure that any writes (both previous GPU writes from
2131 * before a change in snooping levels and normal CPU writes)
2132 * caught in that cache are flushed to main memory.
2133 *
2134 * We want to say
2135 * obj->cache_dirty &&
2136 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
2137 * but gcc's optimiser doesn't handle that as well and emits
2138 * two jumps instead of one. Maybe one day...
2139 *
2140 * FIXME: There is also sync flushing in set_pages(), which
2141 * serves a different purpose(some of the time at least).
2142 *
2143 * We should consider:
2144 *
2145 * 1. Rip out the async flush code.
2146 *
2147 * 2. Or make the sync flushing use the async clflush path
2148 * using mandatory fences underneath. Currently the below
2149 * async flush happens after we bind the object.
2150 */
2151 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)__builtin_expect(!!(obj->cache_dirty & ~obj->cache_coherent
), 0)
) {
2152 if (i915_gem_clflush_object(obj, 0))
2153 flags &= ~EXEC_OBJECT_ASYNC(1<<6);
2154 }
2155
2156 /* We only need to await on the first request */
2157 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC(1<<6))) {
2158 err = i915_request_await_object
2159 (eb_find_first_request_added(eb), obj,
2160 flags & EXEC_OBJECT_WRITE(1<<2));
2161 }
2162
2163 for_each_batch_add_order(eb, j)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((j) = (eb)->num_batches - 1; (j) >= 0; --(j))
{
2164 if (err)
2165 break;
2166 if (!eb->requests[j])
2167 continue;
2168
2169 err = _i915_vma_move_to_active(vma, eb->requests[j],
2170 j ? NULL((void *)0) :
2171 eb->composite_fence ?
2172 eb->composite_fence :
2173 &eb->requests[j]->fence,
2174 flags | __EXEC_OBJECT_NO_RESERVE(1UL << (31)));
2175 }
2176 }
2177
2178#ifdef CONFIG_MMU_NOTIFIER
2179 if (!err && (eb->args->flags & __EXEC_USERPTR_USED(1UL << (29)))) {
2180 read_lock(&eb->i915->mm.notifier_lock)mtx_enter(&eb->i915->mm.notifier_lock);
2181
2182 /*
2183 * count is always at least 1, otherwise __EXEC_USERPTR_USED
2184 * could not have been set
2185 */
2186 for (i = 0; i < count; i++) {
2187 struct eb_vma *ev = &eb->vma[i];
2188 struct drm_i915_gem_object *obj = ev->vma->obj;
2189
2190 if (!i915_gem_object_is_userptr(obj))
2191 continue;
2192
2193 err = i915_gem_object_userptr_submit_done(obj);
2194 if (err)
2195 break;
2196 }
2197
2198 read_unlock(&eb->i915->mm.notifier_lock)mtx_leave(&eb->i915->mm.notifier_lock);
2199 }
2200#endif
2201
2202 if (unlikely(err)__builtin_expect(!!(err), 0))
2203 goto err_skip;
2204
2205 /* Unconditionally flush any chipset caches (for streaming writes). */
2206 intel_gt_chipset_flush(eb->gt);
2207 eb_capture_commit(eb);
2208
2209 return 0;
2210
2211err_skip:
2212 for_each_batch_create_order(eb, j)for ((j) = 0; (j) < (eb)->num_batches; ++(j)) {
2213 if (!eb->requests[j])
2214 break;
2215
2216 i915_request_set_error_once(eb->requests[j], err);
2217 }
2218 return err;
2219}
2220
2221static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
2222{
2223 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
)
2224 return -EINVAL22;
2225
2226 /* Kernel clipping was a DRI1 misfeature */
2227 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY(1<<19) |
2228 I915_EXEC_USE_EXTENSIONS(1 << 21)))) {
2229 if (exec->num_cliprects || exec->cliprects_ptr)
2230 return -EINVAL22;
2231 }
2232
2233 if (exec->DR4 == 0xffffffff) {
2234 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n")___drm_dbg(((void *)0), DRM_UT_CORE, "UXA submitting garbage DR4, fixing up\n"
)
;
2235 exec->DR4 = 0;
2236 }
2237 if (exec->DR1 || exec->DR4)
2238 return -EINVAL22;
2239
2240 if ((exec->batch_start_offset | exec->batch_len) & 0x7)
2241 return -EINVAL22;
2242
2243 return 0;
2244}
2245
2246static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
2247{
2248 u32 *cs;
2249 int i;
2250
2251 if (GRAPHICS_VER(rq->engine->i915)((&(rq->engine->i915)->__runtime)->graphics.ip
.ver)
!= 7 || rq->engine->id != RCS0) {
2252 drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n")__drm_dev_dbg(((void *)0), (&rq->engine->i915->drm
) ? (&rq->engine->i915->drm)->dev : ((void *)
0), DRM_UT_DRIVER, "sol reset is gen7/rcs only\n")
;
2253 return -EINVAL22;
2254 }
2255
2256 cs = intel_ring_begin(rq, 4 * 2 + 2);
2257 if (IS_ERR(cs))
2258 return PTR_ERR(cs);
2259
2260 *cs++ = MI_LOAD_REGISTER_IMM(4)(((0x0) << 29) | (0x22) << 23 | (2*(4)-1));
2261 for (i = 0; i < 4; i++) {
2262 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)((const i915_reg_t){ .reg = (0x5280 + (i) * 4) }));
2263 *cs++ = 0;
2264 }
2265 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
2266 intel_ring_advance(rq, cs);
2267
2268 return 0;
2269}
2270
2271static struct i915_vma *
2272shadow_batch_pin(struct i915_execbuffer *eb,
2273 struct drm_i915_gem_object *obj,
2274 struct i915_address_space *vm,
2275 unsigned int flags)
2276{
2277 struct i915_vma *vma;
2278 int err;
2279
2280 vma = i915_vma_instance(obj, vm, NULL((void *)0));
2281 if (IS_ERR(vma))
2282 return vma;
2283
2284 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE(1ULL << (8)));
2285 if (err)
2286 return ERR_PTR(err);
2287
2288 return vma;
2289}
2290
2291static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
2292{
2293 /*
2294 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2295 * batch" bit. Hence we need to pin secure batches into the global gtt.
2296 * hsw should have this fixed, but bdw mucks it up again. */
2297 if (eb->batch_flags & I915_DISPATCH_SECURE(1UL << (0)))
2298 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL((void *)0), 0, 0, PIN_VALIDATE(1ULL << (8)));
2299
2300 return NULL((void *)0);
2301}
2302
2303static int eb_parse(struct i915_execbuffer *eb)
2304{
2305 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2306 struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
2307 struct i915_vma *shadow, *trampoline, *batch;
2308 unsigned long len;
2309 int err;
2310
2311 if (!eb_use_cmdparser(eb)) {
2312 batch = eb_dispatch_secure(eb, eb->batches[0]->vma);
2313 if (IS_ERR(batch))
2314 return PTR_ERR(batch);
2315
2316 goto secure_batch;
2317 }
2318
2319 if (intel_context_is_parallel(eb->context))
2320 return -EINVAL22;
2321
2322 len = eb->batch_len[0];
2323 if (!CMDPARSER_USES_GGTT(eb->i915)(((&(eb->i915)->__runtime)->graphics.ip.ver) == 7
)
) {
2324 /*
2325 * ppGTT backed shadow buffers must be mapped RO, to prevent
2326 * post-scan tampering
2327 */
2328 if (!eb->context->vm->has_read_only) {
2329 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
2330 "Cannot prevent post-scan tampering without RO capable vm\n")__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
;
2331 return -EINVAL22;
2332 }
2333 } else {
2334 len += I915_CMD_PARSER_TRAMPOLINE_SIZE8;
2335 }
2336 if (unlikely(len < eb->batch_len[0])__builtin_expect(!!(len < eb->batch_len[0]), 0)) /* last paranoid check of overflow */
2337 return -EINVAL22;
2338
2339 if (!pool) {
2340 pool = intel_gt_get_buffer_pool(eb->gt, len,
2341 I915_MAP_WB);
2342 if (IS_ERR(pool))
2343 return PTR_ERR(pool);
2344 eb->batch_pool = pool;
2345 }
2346
2347 err = i915_gem_object_lock(pool->obj, &eb->ww);
2348 if (err)
2349 return err;
2350
2351 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER(1ULL << (11)));
2352 if (IS_ERR(shadow))
2353 return PTR_ERR(shadow);
2354
2355 intel_gt_buffer_pool_mark_used(pool);
2356 i915_gem_object_set_readonly(shadow->obj);
2357 shadow->private = pool;
2358
2359 trampoline = NULL((void *)0);
2360 if (CMDPARSER_USES_GGTT(eb->i915)(((&(eb->i915)->__runtime)->graphics.ip.ver) == 7
)
) {
2361 trampoline = shadow;
2362
2363 shadow = shadow_batch_pin(eb, pool->obj,
2364 &eb->gt->ggtt->vm,
2365 PIN_GLOBAL(1ULL << (10)));
2366 if (IS_ERR(shadow))
2367 return PTR_ERR(shadow);
2368
2369 shadow->private = pool;
2370
2371 eb->batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
2372 }
2373
2374 batch = eb_dispatch_secure(eb, shadow);
2375 if (IS_ERR(batch))
2376 return PTR_ERR(batch);
2377
2378 err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
2379 if (err)
2380 return err;
2381
2382 err = intel_engine_cmd_parser(eb->context->engine,
2383 eb->batches[0]->vma,
2384 eb->batch_start_offset,
2385 eb->batch_len[0],
2386 shadow, trampoline);
2387 if (err)
2388 return err;
2389
2390 eb->batches[0] = &eb->vma[eb->buffer_count++];
2391 eb->batches[0]->vma = i915_vma_get(shadow);
2392 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN(1UL << (30));
2393
2394 eb->trampoline = trampoline;
2395 eb->batch_start_offset = 0;
2396
2397secure_batch:
2398 if (batch) {
2399 if (intel_context_is_parallel(eb->context))
2400 return -EINVAL22;
2401
2402 eb->batches[0] = &eb->vma[eb->buffer_count++];
2403 eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN(1UL << (30));
2404 eb->batches[0]->vma = i915_vma_get(batch);
2405 }
2406 return 0;
2407}
2408
2409static int eb_request_submit(struct i915_execbuffer *eb,
2410 struct i915_request *rq,
2411 struct i915_vma *batch,
2412 u64 batch_len)
2413{
2414 int err;
2415
2416 if (intel_context_nopreempt(rq->context))
2417 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
2418
2419 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET(1<<8)) {
2420 err = i915_reset_gen7_sol_offsets(rq);
2421 if (err)
2422 return err;
2423 }
2424
2425 /*
2426 * After we completed waiting for other engines (using HW semaphores)
2427 * then we can signal that this request/batch is ready to run. This
2428 * allows us to determine if the batch is still waiting on the GPU
2429 * or actually running by checking the breadcrumb.
2430 */
2431 if (rq->context->engine->emit_init_breadcrumb) {
2432 err = rq->context->engine->emit_init_breadcrumb(rq);
2433 if (err)
2434 return err;
2435 }
2436
2437 err = rq->context->engine->emit_bb_start(rq,
2438 batch->node.start +
2439 eb->batch_start_offset,
2440 batch_len,
2441 eb->batch_flags);
2442 if (err)
2443 return err;
2444
2445 if (eb->trampoline) {
2446 GEM_BUG_ON(intel_context_is_parallel(rq->context))((void)0);
2447 GEM_BUG_ON(eb->batch_start_offset)((void)0);
2448 err = rq->context->engine->emit_bb_start(rq,
2449 eb->trampoline->node.start +
2450 batch_len, 0, 0);
2451 if (err)
2452 return err;
2453 }
2454
2455 return 0;
2456}
2457
2458static int eb_submit(struct i915_execbuffer *eb)
2459{
2460 unsigned int i;
2461 int err;
2462
2463 err = eb_move_to_gpu(eb);
2464
2465 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
2466 if (!eb->requests[i])
2467 break;
2468
2469 trace_i915_request_queue(eb->requests[i], eb->batch_flags);
2470 if (!err)
2471 err = eb_request_submit(eb, eb->requests[i],
2472 eb->batches[i]->vma,
2473 eb->batch_len[i]);
2474 }
2475
2476 return err;
2477}
2478
2479static int num_vcs_engines(struct drm_i915_privateinteldrm_softc *i915)
2480{
2481 return hweight_long(VDBOX_MASK(to_gt(i915))({ unsigned int first__ = (VCS0); unsigned int count__ = (8);
((to_gt(i915))->info.engine_mask & (((~0UL) >> (
64 - (first__ + count__ - 1) - 1)) & ((~0UL) << (first__
)))) >> first__; })
);
2482}
2483
2484/*
2485 * Find one BSD ring to dispatch the corresponding BSD command.
2486 * The engine index is returned.
2487 */
2488static unsigned int
2489gen8_dispatch_bsd_engine(struct drm_i915_privateinteldrm_softc *dev_priv,
2490 struct drm_file *file)
2491{
2492 struct drm_i915_file_private *file_priv = file->driver_priv;
2493
2494 /* Check whether the file_priv has already selected one ring. */
2495 if ((int)file_priv->bsd_engine < 0)
2496 file_priv->bsd_engine =
2497 prandom_u32_max(num_vcs_engines(dev_priv));
2498
2499 return file_priv->bsd_engine;
2500}
2501
2502static const enum intel_engine_id user_ring_map[] = {
2503 [I915_EXEC_DEFAULT(0<<0)] = RCS0,
2504 [I915_EXEC_RENDER(1<<0)] = RCS0,
2505 [I915_EXEC_BLT(3<<0)] = BCS0,
2506 [I915_EXEC_BSD(2<<0)] = VCS0,
2507 [I915_EXEC_VEBOX(4<<0)] = VECS0
2508};
2509
2510static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
2511{
2512 struct intel_ring *ring = ce->ring;
2513 struct intel_timeline *tl = ce->timeline;
2514 struct i915_request *rq;
2515
2516 /*
2517 * Completely unscientific finger-in-the-air estimates for suitable
2518 * maximum user request size (to avoid blocking) and then backoff.
2519 */
2520 if (intel_ring_update_space(ring) >= PAGE_SIZE(1 << 12))
2521 return NULL((void *)0);
2522
2523 /*
2524 * Find a request that after waiting upon, there will be at least half
2525 * the ring available. The hysteresis allows us to compete for the
2526 * shared ring and should mean that we sleep less often prior to
2527 * claiming our resources, but not so long that the ring completely
2528 * drains before we can submit our next request.
2529 */
2530 list_for_each_entry(rq, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}); &rq->link != (&tl->requests); rq = ({ const
__typeof( ((__typeof(*rq) *)0)->link ) *__mptr = (rq->
link.next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), link) );}))
{
2531 if (rq->ring != ring)
2532 continue;
2533
2534 if (__intel_ring_space(rq->postfix,
2535 ring->emit, ring->size) > ring->size / 2)
2536 break;
2537 }
2538 if (&rq->link == &tl->requests)
2539 return NULL((void *)0); /* weird, we will check again later for real */
2540
2541 return i915_request_get(rq);
2542}
2543
2544static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
2545 bool_Bool throttle)
2546{
2547 struct intel_timeline *tl;
2548 struct i915_request *rq = NULL((void *)0);
2549
2550 /*
2551 * Take a local wakeref for preparing to dispatch the execbuf as
2552 * we expect to access the hardware fairly frequently in the
2553 * process, and require the engine to be kept awake between accesses.
2554 * Upon dispatch, we acquire another prolonged wakeref that we hold
2555 * until the timeline is idle, which in turn releases the wakeref
2556 * taken on the engine, and the parent device.
2557 */
2558 tl = intel_context_timeline_lock(ce);
2559 if (IS_ERR(tl))
2560 return PTR_ERR(tl);
2561
2562 intel_context_enter(ce);
2563 if (throttle)
2564 rq = eb_throttle(eb, ce);
2565 intel_context_timeline_unlock(tl);
2566
2567 if (rq) {
2568#ifdef __linux__
2569 bool_Bool nonblock = eb->file->filp->f_flags & O_NONBLOCK0x0004;
2570#else
2571 bool_Bool nonblock = eb->file->filp->f_flag & FNONBLOCK0x0004;
2572#endif
2573 long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT(0x7fffffff);
2574
2575 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE(1UL << (0)),
2576 timeout) < 0) {
2577 i915_request_put(rq);
2578
2579 /*
2580 * Error path, cannot use intel_context_timeline_lock as
2581 * that is user interruptable and this clean up step
2582 * must be done.
2583 */
2584 mutex_lock(&ce->timeline->mutex)rw_enter_write(&ce->timeline->mutex);
2585 intel_context_exit(ce);
2586 mutex_unlock(&ce->timeline->mutex)rw_exit_write(&ce->timeline->mutex);
2587
2588 if (nonblock)
2589 return -EWOULDBLOCK35;
2590 else
2591 return -EINTR4;
2592 }
2593 i915_request_put(rq);
2594 }
2595
2596 return 0;
2597}
2598
2599static int eb_pin_engine(struct i915_execbuffer *eb, bool_Bool throttle)
2600{
2601 struct intel_context *ce = eb->context, *child;
2602 int err;
2603 int i = 0, j = 0;
2604
2605 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED)((void)0);
2606
2607 if (unlikely(intel_context_is_banned(ce))__builtin_expect(!!(intel_context_is_banned(ce)), 0))
2608 return -EIO5;
2609
2610 /*
2611 * Pinning the contexts may generate requests in order to acquire
2612 * GGTT space, so do this first before we reserve a seqno for
2613 * ourselves.
2614 */
2615 err = intel_context_pin_ww(ce, &eb->ww);
2616 if (err)
2617 return err;
2618 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2619 err = intel_context_pin_ww(child, &eb->ww);
2620 GEM_BUG_ON(err)((void)0); /* perma-pinned should incr a counter */
2621 }
2622
2623 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2624 err = eb_pin_timeline(eb, child, throttle);
2625 if (err)
2626 goto unwind;
2627 ++i;
2628 }
2629 err = eb_pin_timeline(eb, ce, throttle);
2630 if (err)
2631 goto unwind;
2632
2633 eb->args->flags |= __EXEC_ENGINE_PINNED(1UL << (30));
2634 return 0;
2635
2636unwind:
2637 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2638 if (j++ < i) {
2639 mutex_lock(&child->timeline->mutex)rw_enter_write(&child->timeline->mutex);
2640 intel_context_exit(child);
2641 mutex_unlock(&child->timeline->mutex)rw_exit_write(&child->timeline->mutex);
2642 }
2643 }
2644 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2645 intel_context_unpin(child);
2646 intel_context_unpin(ce);
2647 return err;
2648}
2649
2650static void eb_unpin_engine(struct i915_execbuffer *eb)
2651{
2652 struct intel_context *ce = eb->context, *child;
2653
2654 if (!(eb->args->flags & __EXEC_ENGINE_PINNED(1UL << (30))))
2655 return;
2656
2657 eb->args->flags &= ~__EXEC_ENGINE_PINNED(1UL << (30));
2658
2659 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2660 mutex_lock(&child->timeline->mutex)rw_enter_write(&child->timeline->mutex);
2661 intel_context_exit(child);
2662 mutex_unlock(&child->timeline->mutex)rw_exit_write(&child->timeline->mutex);
2663
2664 intel_context_unpin(child);
2665 }
2666
2667 mutex_lock(&ce->timeline->mutex)rw_enter_write(&ce->timeline->mutex);
2668 intel_context_exit(ce);
2669 mutex_unlock(&ce->timeline->mutex)rw_exit_write(&ce->timeline->mutex);
2670
2671 intel_context_unpin(ce);
2672}
2673
2674static unsigned int
2675eb_select_legacy_ring(struct i915_execbuffer *eb)
2676{
2677 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2678 struct drm_i915_gem_execbuffer2 *args = eb->args;
2679 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK(0x3f);
2680
2681 if (user_ring_id != I915_EXEC_BSD(2<<0) &&
2682 (args->flags & I915_EXEC_BSD_MASK(3 << (13)))) {
2683 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2684 "execbuf with non bsd ring but with invalid "__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2685 "bsd dispatch flags: %d\n", (int)(args->flags))__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
;
2686 return -1;
2687 }
2688
2689 if (user_ring_id == I915_EXEC_BSD(2<<0) && num_vcs_engines(i915) > 1) {
2690 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK(3 << (13));
2691
2692 if (bsd_idx == I915_EXEC_BSD_DEFAULT(0 << (13))) {
2693 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
2694 } else if (bsd_idx >= I915_EXEC_BSD_RING1(1 << (13)) &&
2695 bsd_idx <= I915_EXEC_BSD_RING2(2 << (13))) {
2696 bsd_idx >>= I915_EXEC_BSD_SHIFT(13);
2697 bsd_idx--;
2698 } else {
2699 drm_dbg(&i915->drm,__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2700 "execbuf with unknown bsd ring: %u\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2701 bsd_idx)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
;
2702 return -1;
2703 }
2704
2705 return _VCS(bsd_idx)(VCS0 + (bsd_idx));
2706 }
2707
2708 if (user_ring_id >= ARRAY_SIZE(user_ring_map)(sizeof((user_ring_map)) / sizeof((user_ring_map)[0]))) {
2709 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
2710 user_ring_id)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
;
2711 return -1;
2712 }
2713
2714 return user_ring_map[user_ring_id];
2715}
2716
2717static int
2718eb_select_engine(struct i915_execbuffer *eb)
2719{
2720 struct intel_context *ce, *child;
2721 unsigned int idx;
2722 int err;
2723
2724 if (i915_gem_context_user_engines(eb->gem_context))
2725 idx = eb->args->flags & I915_EXEC_RING_MASK(0x3f);
2726 else
2727 idx = eb_select_legacy_ring(eb);
2728
2729 ce = i915_gem_context_get_engine(eb->gem_context, idx);
2730 if (IS_ERR(ce))
2731 return PTR_ERR(ce);
2732
2733 if (intel_context_is_parallel(ce)) {
2734 if (eb->buffer_count < ce->parallel.number_children + 1) {
2735 intel_context_put(ce);
2736 return -EINVAL22;
2737 }
2738 if (eb->batch_start_offset || eb->args->batch_len) {
2739 intel_context_put(ce);
2740 return -EINVAL22;
2741 }
2742 }
2743 eb->num_batches = ce->parallel.number_children + 1;
2744
2745 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2746 intel_context_get(child);
2747 intel_gt_pm_get(ce->engine->gt);
2748
2749 if (!test_bit(CONTEXT_ALLOC_BIT1, &ce->flags)) {
2750 err = intel_context_alloc_state(ce);
2751 if (err)
2752 goto err;
2753 }
2754 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2755 if (!test_bit(CONTEXT_ALLOC_BIT1, &child->flags)) {
2756 err = intel_context_alloc_state(child);
2757 if (err)
2758 goto err;
2759 }
2760 }
2761
2762 /*
2763 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2764 * EIO if the GPU is already wedged.
2765 */
2766 err = intel_gt_terminally_wedged(ce->engine->gt);
2767 if (err)
2768 goto err;
2769
2770 if (!i915_vm_tryget(ce->vm)) {
2771 err = -ENOENT2;
2772 goto err;
2773 }
2774
2775 eb->context = ce;
2776 eb->gt = ce->engine->gt;
2777
2778 /*
2779 * Make sure engine pool stays alive even if we call intel_context_put
2780 * during ww handling. The pool is destroyed when last pm reference
2781 * is dropped, which breaks our -EDEADLK handling.
2782 */
2783 return err;
2784
2785err:
2786 intel_gt_pm_put(ce->engine->gt);
2787 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2788 intel_context_put(child);
2789 intel_context_put(ce);
2790 return err;
2791}
2792
2793static void
2794eb_put_engine(struct i915_execbuffer *eb)
2795{
2796 struct intel_context *child;
2797
2798 i915_vm_put(eb->context->vm);
2799 intel_gt_pm_put(eb->gt);
2800 for_each_child(eb->context, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(eb->context)->parallel.
child_list)->next); (__typeof(*child) *)( (char *)__mptr -
__builtin_offsetof(__typeof(*child), parallel.child_link) );
}); &child->parallel.child_link != (&(eb->context
)->parallel.child_list); child = ({ const __typeof( ((__typeof
(*child) *)0)->parallel.child_link ) *__mptr = (child->
parallel.child_link.next); (__typeof(*child) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*child), parallel.child_link) )
;}))
2801 intel_context_put(child);
2802 intel_context_put(eb->context);
2803}
2804
2805static void
2806__free_fence_array(struct eb_fence *fences, unsigned int n)
2807{
2808 while (n--) {
2809 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)({ unsigned long __v = (unsigned long)(fences[n].syncobj); (typeof
(fences[n].syncobj))(__v & -(1UL << (2))); })
);
2810 dma_fence_put(fences[n].dma_fence);
2811 dma_fence_chain_free(fences[n].chain_fence);
2812 }
2813 kvfree(fences);
2814}
2815
2816static int
2817add_timeline_fence_array(struct i915_execbuffer *eb,
2818 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
2819{
2820 struct drm_i915_gem_exec_fence __user *user_fences;
2821 u64 __user *user_values;
2822 struct eb_fence *f;
2823 u64 nfences;
2824 int err = 0;
2825
2826 nfences = timeline_fences->fence_count;
2827 if (!nfences)
2828 return 0;
2829
2830 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2831 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2832 if (nfences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2833 ULONG_MAX / sizeof(*user_fences),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2834 SIZE_MAX / sizeof(*f))({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
- eb->num_fences)
2835 return -EINVAL22;
2836
2837 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr)((void *)(uintptr_t)(timeline_fences->handles_ptr));
2838 if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
2839 return -EFAULT14;
2840
2841 user_values = u64_to_user_ptr(timeline_fences->values_ptr)((void *)(uintptr_t)(timeline_fences->values_ptr));
2842 if (!access_ok(user_values, nfences * sizeof(*user_values)))
2843 return -EFAULT14;
2844
2845#ifdef __linux__
2846 f = krealloc(eb->fences,
2847 (eb->num_fences + nfences) * sizeof(*f),
2848 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2849 if (!f)
2850 return -ENOMEM12;
2851#else
2852 f = kmalloc((eb->num_fences + nfences) * sizeof(*f),
2853 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2854 if (!f)
2855 return -ENOMEM12;
2856 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2857 kfree(eb->fences);
2858#endif
2859
2860 eb->fences = f;
2861 f += eb->num_fences;
2862
2863#ifdef notyet
2864 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
2865 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
;
2866#endif
2867
2868 while (nfences--) {
2869 struct drm_i915_gem_exec_fence user_fence;
2870 struct drm_syncobj *syncobj;
2871 struct dma_fence *fence = NULL((void *)0);
2872 u64 point;
2873
2874 if (__copy_from_user(&user_fence,
2875 user_fences++,
2876 sizeof(user_fence)))
2877 return -EFAULT14;
2878
2879 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
2880 return -EINVAL22;
2881
2882 if (__get_user(point, user_values++)-copyin((user_values++), &((point)), sizeof((point))))
2883 return -EFAULT14;
2884
2885 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2886 if (!syncobj) {
2887 DRM_DEBUG("Invalid syncobj handle provided\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Invalid syncobj handle provided\n"
)
;
2888 return -ENOENT2;
2889 }
2890
2891 fence = drm_syncobj_fence_get(syncobj);
2892
2893 if (!fence && user_fence.flags &&
2894 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2895 DRM_DEBUG("Syncobj handle has no fence\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Syncobj handle has no fence\n"
)
;
2896 drm_syncobj_put(syncobj);
2897 return -EINVAL22;
2898 }
2899
2900 if (fence)
2901 err = dma_fence_chain_find_seqno(&fence, point);
2902
2903 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2904 DRM_DEBUG("Syncobj handle missing requested point %llu\n", point)___drm_dbg(((void *)0), DRM_UT_CORE, "Syncobj handle missing requested point %llu\n"
, point)
;
2905 dma_fence_put(fence);
2906 drm_syncobj_put(syncobj);
2907 return err;
2908 }
2909
2910 /*
2911 * A point might have been signaled already and
2912 * garbage collected from the timeline. In this case
2913 * just ignore the point and carry on.
2914 */
2915 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2916 drm_syncobj_put(syncobj);
2917 continue;
2918 }
2919
2920 /*
2921 * For timeline syncobjs we need to preallocate chains for
2922 * later signaling.
2923 */
2924 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1)) {
2925 /*
2926 * Waiting and signaling the same point (when point !=
2927 * 0) would break the timeline.
2928 */
2929 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
2930 DRM_DEBUG("Trying to wait & signal the same timeline point.\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Trying to wait & signal the same timeline point.\n"
)
;
2931 dma_fence_put(fence);
2932 drm_syncobj_put(syncobj);
2933 return -EINVAL22;
2934 }
2935
2936 f->chain_fence = dma_fence_chain_alloc();
2937 if (!f->chain_fence) {
2938 drm_syncobj_put(syncobj);
2939 dma_fence_put(fence);
2940 return -ENOMEM12;
2941 }
2942 } else {
2943 f->chain_fence = NULL((void *)0);
2944 }
2945
2946 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
2947 f->dma_fence = fence;
2948 f->value = point;
2949 f++;
2950 eb->num_fences++;
2951 }
2952
2953 return 0;
2954}
2955
2956static int add_fence_array(struct i915_execbuffer *eb)
2957{
2958 struct drm_i915_gem_execbuffer2 *args = eb->args;
2959 struct drm_i915_gem_exec_fence __user *user;
2960 unsigned long num_fences = args->num_cliprects;
2961 struct eb_fence *f;
2962
2963 if (!(args->flags & I915_EXEC_FENCE_ARRAY(1<<19)))
27
Assuming the condition is false
28
Taking false branch
2964 return 0;
2965
2966 if (!num_fences)
29
Assuming 'num_fences' is 0
30
Taking true branch
2967 return 0;
31
Returning without writing to 'eb->buckets', which participates in a condition later
2968
2969 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2970 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2971 if (num_fences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2972 ULONG_MAX / sizeof(*user),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2973 SIZE_MAX / sizeof(*f) - eb->num_fences)({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
)
2974 return -EINVAL22;
2975
2976 user = u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr));
2977 if (!access_ok(user, num_fences * sizeof(*user)))
2978 return -EFAULT14;
2979
2980#ifdef __linux__
2981 f = krealloc(eb->fences,
2982 (eb->num_fences + num_fences) * sizeof(*f),
2983 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2984 if (!f)
2985 return -ENOMEM12;
2986#else
2987 f = kmalloc((eb->num_fences + num_fences) * sizeof(*f),
2988 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2989 if (!f)
2990 return -ENOMEM12;
2991 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2992 kfree(eb->fences);
2993#endif
2994
2995 eb->fences = f;
2996 f += eb->num_fences;
2997 while (num_fences--) {
2998 struct drm_i915_gem_exec_fence user_fence;
2999 struct drm_syncobj *syncobj;
3000 struct dma_fence *fence = NULL((void *)0);
3001
3002 if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
3003 return -EFAULT14;
3004
3005 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
3006 return -EINVAL22;
3007
3008 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
3009 if (!syncobj) {
3010 DRM_DEBUG("Invalid syncobj handle provided\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Invalid syncobj handle provided\n"
)
;
3011 return -ENOENT2;
3012 }
3013
3014 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
3015 fence = drm_syncobj_fence_get(syncobj);
3016 if (!fence) {
3017 DRM_DEBUG("Syncobj handle has no fence\n")___drm_dbg(((void *)0), DRM_UT_CORE, "Syncobj handle has no fence\n"
)
;
3018 drm_syncobj_put(syncobj);
3019 return -EINVAL22;
3020 }
3021 }
3022
3023#ifdef notyet
3024 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
3025 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(64 - 1) & ~(-((1<<1) <<
1)))) ? 1 : -1 ] __attribute__((__unused__))
;
3026#endif
3027
3028 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
3029 f->dma_fence = fence;
3030 f->value = 0;
3031 f->chain_fence = NULL((void *)0);
3032 f++;
3033 eb->num_fences++;
3034 }
3035
3036 return 0;
3037}
3038
3039static void put_fence_array(struct eb_fence *fences, int num_fences)
3040{
3041 if (fences)
3042 __free_fence_array(fences, num_fences);
3043}
3044
3045static int
3046await_fence_array(struct i915_execbuffer *eb,
3047 struct i915_request *rq)
3048{
3049 unsigned int n;
3050 int err;
3051
3052 for (n = 0; n < eb->num_fences; n++) {
3053 struct drm_syncobj *syncobj;
3054 unsigned int flags;
3055
3056 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
3057
3058 if (!eb->fences[n].dma_fence)
3059 continue;
3060
3061 err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence);
3062 if (err < 0)
3063 return err;
3064 }
3065
3066 return 0;
3067}
3068
3069static void signal_fence_array(const struct i915_execbuffer *eb,
3070 struct dma_fence * const fence)
3071{
3072 unsigned int n;
3073
3074 for (n = 0; n < eb->num_fences; n++) {
3075 struct drm_syncobj *syncobj;
3076 unsigned int flags;
3077
3078 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
3079 if (!(flags & I915_EXEC_FENCE_SIGNAL(1<<1)))
3080 continue;
3081
3082 if (eb->fences[n].chain_fence) {
3083 drm_syncobj_add_point(syncobj,
3084 eb->fences[n].chain_fence,
3085 fence,
3086 eb->fences[n].value);
3087 /*
3088 * The chain's ownership is transferred to the
3089 * timeline.
3090 */
3091 eb->fences[n].chain_fence = NULL((void *)0);
3092 } else {
3093 drm_syncobj_replace_fence(syncobj, fence);
3094 }
3095 }
3096}
3097
3098static int
3099parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
3100{
3101 struct i915_execbuffer *eb = data;
3102 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
3103
3104 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
3105 return -EFAULT14;
3106
3107 return add_timeline_fence_array(eb, &timeline_fences);
3108}
3109
3110static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
3111{
3112 struct i915_request *rq, *rn;
3113
3114 list_for_each_entry_safe(rq, rn, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = (rq->link.next); (__typeof(*rq) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*rq), link) );}); &rq->
link != (&tl->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->link ) *__mptr = (rn->link.next
); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rn), link) );}))
3115 if (rq == end || !i915_request_retire(rq))
3116 break;
3117}
3118
3119static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
3120 int err, bool_Bool last_parallel)
3121{
3122 struct intel_timeline * const tl = i915_request_timeline(rq);
3123 struct i915_sched_attr attr = {};
3124 struct i915_request *prev;
3125
3126 lockdep_assert_held(&tl->mutex)do { (void)(&tl->mutex); } while(0);
3127 lockdep_unpin_lock(&tl->mutex, rq->cookie);
3128
3129 trace_i915_request_add(rq);
3130
3131 prev = __i915_request_commit(rq);
3132
3133 /* Check that the context wasn't destroyed before submission */
3134 if (likely(!intel_context_is_closed(eb->context))__builtin_expect(!!(!intel_context_is_closed(eb->context))
, 1)
) {
3135 attr = eb->gem_context->sched;
3136 } else {
3137 /* Serialise with context_close via the add_to_timeline */
3138 i915_request_set_error_once(rq, -ENOENT2);
3139 __i915_request_skip(rq);
3140 err = -ENOENT2; /* override any transient errors */
3141 }
3142
3143 if (intel_context_is_parallel(eb->context)) {
3144 if (err) {
3145 __i915_request_skip(rq);
3146 set_bit(I915_FENCE_FLAG_SKIP_PARALLEL,
3147 &rq->fence.flags);
3148 }
3149 if (last_parallel)
3150 set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
3151 &rq->fence.flags);
3152 }
3153
3154 __i915_request_queue(rq, &attr);
3155
3156 /* Try to clean up the client's timeline after submitting the request */
3157 if (prev)
3158 retire_requests(tl, prev);
3159
3160 mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
3161
3162 return err;
3163}
3164
3165static int eb_requests_add(struct i915_execbuffer *eb, int err)
3166{
3167 int i;
3168
3169 /*
3170 * We iterate in reverse order of creation to release timeline mutexes in
3171 * same order.
3172 */
3173 for_each_batch_add_order(eb, i)extern char _ctassert[(!(!1)) ? 1 : -1 ] __attribute__((__unused__
)); for ((i) = (eb)->num_batches - 1; (i) >= 0; --(i))
{
3174 struct i915_request *rq = eb->requests[i];
3175
3176 if (!rq)
3177 continue;
3178 err |= eb_request_add(eb, rq, err, i == 0);
3179 }
3180
3181 return err;
3182}
3183
3184static const i915_user_extension_fn execbuf_extensions[] = {
3185 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES0] = parse_timeline_fences,
3186};
3187
3188static int
3189parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
3190 struct i915_execbuffer *eb)
3191{
3192 if (!(args->flags & I915_EXEC_USE_EXTENSIONS(1 << 21)))
17
Assuming the condition is false
18
Taking false branch
3193 return 0;
3194
3195 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
3196 * have another flag also using it at the same time.
3197 */
3198 if (eb->args->flags & I915_EXEC_FENCE_ARRAY(1<<19))
19
Assuming the condition is false
20
Taking false branch
3199 return -EINVAL22;
3200
3201 if (args->num_cliprects != 0)
21
Assuming field 'num_cliprects' is equal to 0
22
Taking false branch
3202 return -EINVAL22;
3203
3204 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr)),
3205 execbuf_extensions,
3206 ARRAY_SIZE(execbuf_extensions)(sizeof((execbuf_extensions)) / sizeof((execbuf_extensions)[0
]))
,
3207 eb);
3208}
3209
3210static void eb_requests_get(struct i915_execbuffer *eb)
3211{
3212 unsigned int i;
3213
3214 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3215 if (!eb->requests[i])
3216 break;
3217
3218 i915_request_get(eb->requests[i]);
3219 }
3220}
3221
3222static void eb_requests_put(struct i915_execbuffer *eb)
3223{
3224 unsigned int i;
3225
3226 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3227 if (!eb->requests[i])
3228 break;
3229
3230 i915_request_put(eb->requests[i]);
3231 }
3232}
3233
3234static struct sync_file *
3235eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
3236{
3237 struct sync_file *out_fence = NULL((void *)0);
3238 struct dma_fence_array *fence_array;
3239 struct dma_fence **fences;
3240 unsigned int i;
3241
3242 GEM_BUG_ON(!intel_context_is_parent(eb->context))((void)0);
3243
3244 fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL(0x0001 | 0x0004));
3245 if (!fences)
3246 return ERR_PTR(-ENOMEM12);
3247
3248 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3249 fences[i] = &eb->requests[i]->fence;
3250 __set_bit(I915_FENCE_FLAG_COMPOSITE,
3251 &eb->requests[i]->fence.flags);
3252 }
3253
3254 fence_array = dma_fence_array_create(eb->num_batches,
3255 fences,
3256 eb->context->parallel.fence_context,
3257 eb->context->parallel.seqno++,
3258 false0);
3259 if (!fence_array) {
3260 kfree(fences);
3261 return ERR_PTR(-ENOMEM12);
3262 }
3263
3264 /* Move ownership to the dma_fence_array created above */
3265 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i))
3266 dma_fence_get(fences[i]);
3267
3268 if (out_fence_fd != -1) {
3269 out_fence = sync_file_create(&fence_array->base);
3270 /* sync_file now owns fence_arry, drop creation ref */
3271 dma_fence_put(&fence_array->base);
3272 if (!out_fence)
3273 return ERR_PTR(-ENOMEM12);
3274 }
3275
3276 eb->composite_fence = &fence_array->base;
3277
3278 return out_fence;
3279}
3280
3281static struct sync_file *
3282eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq,
3283 struct dma_fence *in_fence, int out_fence_fd)
3284{
3285 struct sync_file *out_fence = NULL((void *)0);
3286 int err;
3287
3288 if (unlikely(eb->gem_context->syncobj)__builtin_expect(!!(eb->gem_context->syncobj), 0)) {
3289 struct dma_fence *fence;
3290
3291 fence = drm_syncobj_fence_get(eb->gem_context->syncobj);
3292 err = i915_request_await_dma_fence(rq, fence);
3293 dma_fence_put(fence);
3294 if (err)
3295 return ERR_PTR(err);
3296 }
3297
3298 if (in_fence) {
3299 if (eb->args->flags & I915_EXEC_FENCE_SUBMIT(1 << 20))
3300 err = i915_request_await_execution(rq, in_fence);
3301 else
3302 err = i915_request_await_dma_fence(rq, in_fence);
3303 if (err < 0)
3304 return ERR_PTR(err);
3305 }
3306
3307 if (eb->fences) {
3308 err = await_fence_array(eb, rq);
3309 if (err)
3310 return ERR_PTR(err);
3311 }
3312
3313 if (intel_context_is_parallel(eb->context)) {
3314 out_fence = eb_composite_fence_create(eb, out_fence_fd);
3315 if (IS_ERR(out_fence))
3316 return ERR_PTR(-ENOMEM12);
3317 } else if (out_fence_fd != -1) {
3318 out_fence = sync_file_create(&rq->fence);
3319 if (!out_fence)
3320 return ERR_PTR(-ENOMEM12);
3321 }
3322
3323 return out_fence;
3324}
3325
3326static struct intel_context *
3327eb_find_context(struct i915_execbuffer *eb, unsigned int context_number)
3328{
3329 struct intel_context *child;
3330
3331 if (likely(context_number == 0)__builtin_expect(!!(context_number == 0), 1))
3332 return eb->context;
3333
3334 for_each_child(eb->context, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(eb->context)->parallel.
child_list)->next); (__typeof(*child) *)( (char *)__mptr -
__builtin_offsetof(__typeof(*child), parallel.child_link) );
}); &child->parallel.child_link != (&(eb->context
)->parallel.child_list); child = ({ const __typeof( ((__typeof
(*child) *)0)->parallel.child_link ) *__mptr = (child->
parallel.child_link.next); (__typeof(*child) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*child), parallel.child_link) )
;}))
3335 if (!--context_number)
3336 return child;
3337
3338 GEM_BUG_ON("Context not found")((void)0);
3339
3340 return NULL((void *)0);
3341}
3342
3343static struct sync_file *
3344eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
3345 int out_fence_fd)
3346{
3347 struct sync_file *out_fence = NULL((void *)0);
3348 unsigned int i;
3349
3350 for_each_batch_create_order(eb, i)for ((i) = 0; (i) < (eb)->num_batches; ++(i)) {
3351 /* Allocate a request for this batch buffer nice and early. */
3352 eb->requests[i] = i915_request_create(eb_find_context(eb, i));
3353 if (IS_ERR(eb->requests[i])) {
3354 out_fence = ERR_CAST(eb->requests[i]);
3355 eb->requests[i] = NULL((void *)0);
3356 return out_fence;
3357 }
3358
3359 /*
3360 * Only the first request added (committed to backend) has to
3361 * take the in fences into account as all subsequent requests
3362 * will have fences inserted inbetween them.
3363 */
3364 if (i + 1 == eb->num_batches) {
3365 out_fence = eb_fences_add(eb, eb->requests[i],
3366 in_fence, out_fence_fd);
3367 if (IS_ERR(out_fence))
3368 return out_fence;
3369 }
3370
3371 /*
3372 * Not really on stack, but we don't want to call
3373 * kfree on the batch_snapshot when we put it, so use the
3374 * _onstack interface.
3375 */
3376 if (eb->batches[i]->vma)
3377 eb->requests[i]->batch_res =
3378 i915_vma_resource_get(eb->batches[i]->vma->resource);
3379 if (eb->batch_pool) {
3380 GEM_BUG_ON(intel_context_is_parallel(eb->context))((void)0);
3381 intel_gt_buffer_pool_mark_active(eb->batch_pool,
3382 eb->requests[i]);
3383 }
3384 }
3385
3386 return out_fence;
3387}
3388
3389static int
3390i915_gem_do_execbuffer(struct drm_device *dev,
3391 struct drm_file *file,
3392 struct drm_i915_gem_execbuffer2 *args,
3393 struct drm_i915_gem_exec_object2 *exec)
3394{
3395 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3396 struct i915_execbuffer eb;
3397 struct dma_fence *in_fence = NULL((void *)0);
3398 struct sync_file *out_fence = NULL((void *)0);
3399 int out_fence_fd = -1;
3400 int err;
3401
3402 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS)extern char _ctassert[(!((~0u << 29) & ~((-((1 <<
21) << 1)) | (3<<6) | (1<<15)))) ? 1 : -1 ]
__attribute__((__unused__))
;
3403 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &extern char _ctassert[(!((~0u << 26) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
3404 ~__EXEC_OBJECT_UNKNOWN_FLAGS)extern char _ctassert[(!((~0u << 26) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
;
3405
3406 eb.i915 = i915;
3407 eb.file = file;
3408 eb.args = args;
3409 if (DBG_FORCE_RELOC0 || !(args->flags & I915_EXEC_NO_RELOC(1<<11)))
7
Assuming the condition is false
8
Taking false branch
3410 args->flags |= __EXEC_HAS_RELOC(1UL << (31));
3411
3412 eb.exec = exec;
3413 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
3414 eb.vma[0].vma = NULL((void *)0);
3415 eb.batch_pool = NULL((void *)0);
3416
3417 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS-((1<<7)<<1);
3418 reloc_cache_init(&eb.reloc_cache, eb.i915);
3419
3420 eb.buffer_count = args->buffer_count;
3421 eb.batch_start_offset = args->batch_start_offset;
3422 eb.trampoline = NULL((void *)0);
3423
3424 eb.fences = NULL((void *)0);
3425 eb.num_fences = 0;
3426
3427 eb_capture_list_clear(&eb);
9
Calling 'eb_capture_list_clear'
11
Returning from 'eb_capture_list_clear'
3428
3429 memset(eb.requests, 0, sizeof(struct i915_request *) *__builtin_memset((eb.requests), (0), (sizeof(struct i915_request
*) * (sizeof((eb.requests)) / sizeof((eb.requests)[0]))))
3430 ARRAY_SIZE(eb.requests))__builtin_memset((eb.requests), (0), (sizeof(struct i915_request
*) * (sizeof((eb.requests)) / sizeof((eb.requests)[0]))))
;
3431 eb.composite_fence = NULL((void *)0);
3432
3433 eb.batch_flags = 0;
3434 if (args->flags & I915_EXEC_SECURE(1<<9)) {
12
Assuming the condition is false
13
Taking false branch
3435 if (GRAPHICS_VER(i915)((&(i915)->__runtime)->graphics.ip.ver) >= 11)
3436 return -ENODEV19;
3437
3438 /* Return -EPERM to trigger fallback code on old binaries. */
3439 if (!HAS_SECURE_BATCHES(i915)(((&(i915)->__runtime)->graphics.ip.ver) < 6))
3440 return -EPERM1;
3441
3442 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN0x1))
3443 return -EPERM1;
3444
3445 eb.batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
3446 }
3447 if (args->flags & I915_EXEC_IS_PINNED(1<<10))
14
Assuming the condition is false
15
Taking false branch
3448 eb.batch_flags |= I915_DISPATCH_PINNED(1UL << (1));
3449
3450 err = parse_execbuf2_extensions(args, &eb);
16
Calling 'parse_execbuf2_extensions'
23
Returning from 'parse_execbuf2_extensions'
3451 if (err)
24
Assuming 'err' is 0
25
Taking false branch
3452 goto err_ext;
3453
3454 err = add_fence_array(&eb);
26
Calling 'add_fence_array'
32
Returning from 'add_fence_array'
3455 if (err
32.1
'err' is 0
)
33
Taking false branch
3456 goto err_ext;
3457
3458#define IN_FENCES (I915_EXEC_FENCE_IN(1<<16) | I915_EXEC_FENCE_SUBMIT(1 << 20))
3459 if (args->flags & IN_FENCES) {
34
Assuming the condition is false
35
Taking false branch
3460 if ((args->flags & IN_FENCES) == IN_FENCES)
3461 return -EINVAL22;
3462
3463 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)((u32)(args->rsvd2)));
3464 if (!in_fence) {
3465 err = -EINVAL22;
3466 goto err_ext;
3467 }
3468 }
3469#undef IN_FENCES
3470
3471 if (args->flags & I915_EXEC_FENCE_OUT(1<<17)) {
36
Assuming the condition is false
37
Taking false branch
3472 out_fence_fd = get_unused_fd_flags(O_CLOEXEC0x10000);
3473 if (out_fence_fd < 0) {
3474 err = out_fence_fd;
3475 goto err_in_fence;
3476 }
3477 }
3478
3479 err = eb_create(&eb);
38
Calling 'eb_create'
3480 if (err)
3481 goto err_out_fence;
3482
3483 GEM_BUG_ON(!eb.lut_size)((void)0);
3484
3485 err = eb_select_context(&eb);
3486 if (unlikely(err)__builtin_expect(!!(err), 0))
3487 goto err_destroy;
3488
3489 err = eb_select_engine(&eb);
3490 if (unlikely(err)__builtin_expect(!!(err), 0))
3491 goto err_context;
3492
3493 err = eb_lookup_vmas(&eb);
3494 if (err) {
3495 eb_release_vmas(&eb, true1);
3496 goto err_engine;
3497 }
3498
3499 i915_gem_ww_ctx_init(&eb.ww, true1);
3500
3501 err = eb_relocate_parse(&eb);
3502 if (err) {
3503 /*
3504 * If the user expects the execobject.offset and
3505 * reloc.presumed_offset to be an exact match,
3506 * as for using NO_RELOC, then we cannot update
3507 * the execobject.offset until we have completed
3508 * relocation.
3509 */
3510 args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
3511 goto err_vma;
3512 }
3513
3514 ww_acquire_done(&eb.ww.ctx);
3515 err = eb_capture_stage(&eb);
3516 if (err)
3517 goto err_vma;
3518
3519 out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
3520 if (IS_ERR(out_fence)) {
3521 err = PTR_ERR(out_fence);
3522 out_fence = NULL((void *)0);
3523 if (eb.requests[0])
3524 goto err_request;
3525 else
3526 goto err_vma;
3527 }
3528
3529 err = eb_submit(&eb);
3530
3531err_request:
3532 eb_requests_get(&eb);
3533 err = eb_requests_add(&eb, err);
3534
3535 if (eb.fences)
3536 signal_fence_array(&eb, eb.composite_fence ?
3537 eb.composite_fence :
3538 &eb.requests[0]->fence);
3539
3540 if (unlikely(eb.gem_context->syncobj)__builtin_expect(!!(eb.gem_context->syncobj), 0)) {
3541 drm_syncobj_replace_fence(eb.gem_context->syncobj,
3542 eb.composite_fence ?
3543 eb.composite_fence :
3544 &eb.requests[0]->fence);
3545 }
3546
3547 if (out_fence) {
3548 if (err == 0) {
3549 fd_install(out_fence_fd, out_fence->file);
3550 args->rsvd2 &= GENMASK_ULL(31, 0)(((~0ULL) >> (64 - (31) - 1)) & ((~0ULL) << (
0)))
; /* keep in-fence */
3551 args->rsvd2 |= (u64)out_fence_fd << 32;
3552 out_fence_fd = -1;
3553 } else {
3554 fput(out_fence->file);
3555 }
3556 }
3557
3558 if (!out_fence && eb.composite_fence)
3559 dma_fence_put(eb.composite_fence);
3560
3561 eb_requests_put(&eb);
3562
3563err_vma:
3564 eb_release_vmas(&eb, true1);
3565 WARN_ON(err == -EDEADLK)({ int __ret = !!(err == -11); if (__ret) printf("WARNING %s failed at %s:%d\n"
, "err == -11", "/usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c"
, 3565); __builtin_expect(!!(__ret), 0); })
;
3566 i915_gem_ww_ctx_fini(&eb.ww);
3567
3568 if (eb.batch_pool)
3569 intel_gt_buffer_pool_put(eb.batch_pool);
3570err_engine:
3571 eb_put_engine(&eb);
3572err_context:
3573 i915_gem_context_put(eb.gem_context);
3574err_destroy:
3575 eb_destroy(&eb);
3576err_out_fence:
3577 if (out_fence_fd != -1)
3578 put_unused_fd(out_fence_fd);
3579err_in_fence:
3580 dma_fence_put(in_fence);
3581err_ext:
3582 put_fence_array(eb.fences, eb.num_fences);
3583 return err;
3584}
3585
3586static size_t eb_element_size(void)
3587{
3588 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
3589}
3590
3591static bool_Bool check_buffer_count(size_t count)
3592{
3593 const size_t sz = eb_element_size();
3594
3595 /*
3596 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
3597 * array size (see eb_create()). Otherwise, we can accept an array as
3598 * large as can be addressed (though use large arrays at your peril)!
3599 */
3600
3601 return !(count < 1 || count > INT_MAX0x7fffffff || count > SIZE_MAX0xffffffffffffffffUL / sz - 1);
3602}
3603
3604int
3605i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
3606 struct drm_file *file)
3607{
3608 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3609 struct drm_i915_gem_execbuffer2 *args = data;
3610 struct drm_i915_gem_exec_object2 *exec2_list;
3611 const size_t count = args->buffer_count;
3612 int err;
3613
3614 if (!check_buffer_count(count)) {
1
Taking false branch
3615 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "execbuf2 with %zd buffers\n"
, count)
;
3616 return -EINVAL22;
3617 }
3618
3619 err = i915_gem_check_execbuffer(args);
3620 if (err
1.1
'err' is 0
)
2
Taking false branch
3621 return err;
3622
3623 /* Allocate extra slots for use by the command parser */
3624 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3625 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3626 if (exec2_list == NULL((void *)0)) {
3
Assuming 'exec2_list' is not equal to NULL
4
Taking false branch
3627 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
3628 count)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
;
3629 return -ENOMEM12;
3630 }
3631 if (copy_from_user(exec2_list,
5
Taking false branch
3632 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr)),
3633 sizeof(*exec2_list) * count)) {
3634 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count)__drm_dev_dbg(((void *)0), (&i915->drm) ? (&i915->
drm)->dev : ((void *)0), DRM_UT_DRIVER, "copy %zd exec entries failed\n"
, count)
;
3635 kvfree(exec2_list);
3636 return -EFAULT14;
3637 }
3638
3639 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
6
Calling 'i915_gem_do_execbuffer'
3640
3641 /*
3642 * Now that we have begun execution of the batchbuffer, we ignore
3643 * any new error after this point. Also given that we have already
3644 * updated the associated relocations, we try to write out the current
3645 * object locations irrespective of any error.
3646 */
3647 if (args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
3648 struct drm_i915_gem_exec_object2 __user *user_exec_list =
3649 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr));
3650 unsigned int i;
3651
3652 /* Copy the new buffer offsets back to the user's exec list. */
3653 /*
3654 * Note: count * sizeof(*user_exec_list) does not overflow,
3655 * because we checked 'count' in check_buffer_count().
3656 *
3657 * And this range already got effectively checked earlier
3658 * when we did the "copy_from_user()" above.
3659 */
3660 if (!user_write_access_begin(user_exec_list,access_ok(user_exec_list, count * sizeof(*user_exec_list))
3661 count * sizeof(*user_exec_list))access_ok(user_exec_list, count * sizeof(*user_exec_list)))
3662 goto end;
3663
3664 for (i = 0; i < args->buffer_count; i++) {
3665 if (!(exec2_list[i].offset & UPDATE(1ULL << (7))))
3666 continue;
3667
3668 exec2_list[i].offset =
3669 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK-(1ULL << (12)));
3670 unsafe_put_user(exec2_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3671 &user_exec_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3672 end_user)({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
;
3673 }
3674end_user:
3675 user_write_access_end();
3676end:;
3677 }
3678
3679 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1));
3680 kvfree(exec2_list);
3681 return err;
3682}