Bug Summary

File:dev/pci/drm/i915/gem/i915_gem_execbuffer.c
Warning:line 354, column 52
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name i915_gem_execbuffer.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2008,2010 Intel Corporation
5 */
6
7#include <linux/intel-iommu.h>
8#include <linux/dma-resv.h>
9#include <linux/sync_file.h>
10#include <linux/uaccess.h>
11
12#include <drm/drm_syncobj.h>
13
14#include <dev/pci/pcivar.h>
15#include <dev/pci/agpvar.h>
16
17#include "display/intel_frontbuffer.h"
18
19#include "gem/i915_gem_ioctls.h"
20#include "gt/intel_context.h"
21#include "gt/intel_gt.h"
22#include "gt/intel_gt_buffer_pool.h"
23#include "gt/intel_gt_pm.h"
24#include "gt/intel_ring.h"
25
26#include "i915_drv.h"
27#include "i915_gem_clflush.h"
28#include "i915_gem_context.h"
29#include "i915_gem_ioctls.h"
30#include "i915_trace.h"
31#include "i915_user_extensions.h"
32
33struct eb_vma {
34 struct i915_vma *vma;
35 unsigned int flags;
36
37 /** This vma's place in the execbuf reservation list */
38 struct drm_i915_gem_exec_object2 *exec;
39 struct list_head bind_link;
40 struct list_head reloc_link;
41
42 struct hlist_node node;
43 u32 handle;
44};
45
46enum {
47 FORCE_CPU_RELOC = 1,
48 FORCE_GTT_RELOC,
49 FORCE_GPU_RELOC,
50#define DBG_FORCE_RELOC0 0 /* choose one of the above! */
51};
52
53#define __EXEC_OBJECT_HAS_PIN(1UL << (31)) BIT(31)(1UL << (31))
54#define __EXEC_OBJECT_HAS_FENCE(1UL << (30)) BIT(30)(1UL << (30))
55#define __EXEC_OBJECT_NEEDS_MAP(1UL << (29)) BIT(29)(1UL << (29))
56#define __EXEC_OBJECT_NEEDS_BIAS(1UL << (28)) BIT(28)(1UL << (28))
57#define __EXEC_OBJECT_INTERNAL_FLAGS(~0u << 28) (~0u << 28) /* all of the above */
58#define __EXEC_OBJECT_RESERVED((1UL << (31)) | (1UL << (30))) (__EXEC_OBJECT_HAS_PIN(1UL << (31)) | __EXEC_OBJECT_HAS_FENCE(1UL << (30)))
59
60#define __EXEC_HAS_RELOC(1UL << (31)) BIT(31)(1UL << (31))
61#define __EXEC_ENGINE_PINNED(1UL << (30)) BIT(30)(1UL << (30))
62#define __EXEC_INTERNAL_FLAGS(~0u << 30) (~0u << 30)
63#define UPDATE(1ULL << (7)) PIN_OFFSET_FIXED(1ULL << (7))
64
65#define BATCH_OFFSET_BIAS(256*1024) (256*1024)
66
67#define __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
\
68 (__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1)) | \
69 I915_EXEC_CONSTANTS_MASK(3<<6) | \
70 I915_EXEC_RESOURCE_STREAMER(1<<15))
71
72/* Catch emission of unexpected errors for CI! */
73#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0
74#undef EINVAL22
75#define EINVAL22 ({ \
76 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__)__drm_dbg(DRM_UT_DRIVER, "EINVAL at %s:%d\n", __func__, 76); \
77 22; \
78})
79#endif
80
81/**
82 * DOC: User command execution
83 *
84 * Userspace submits commands to be executed on the GPU as an instruction
85 * stream within a GEM object we call a batchbuffer. This instructions may
86 * refer to other GEM objects containing auxiliary state such as kernels,
87 * samplers, render targets and even secondary batchbuffers. Userspace does
88 * not know where in the GPU memory these objects reside and so before the
89 * batchbuffer is passed to the GPU for execution, those addresses in the
90 * batchbuffer and auxiliary objects are updated. This is known as relocation,
91 * or patching. To try and avoid having to relocate each object on the next
92 * execution, userspace is told the location of those objects in this pass,
93 * but this remains just a hint as the kernel may choose a new location for
94 * any object in the future.
95 *
96 * At the level of talking to the hardware, submitting a batchbuffer for the
97 * GPU to execute is to add content to a buffer from which the HW
98 * command streamer is reading.
99 *
100 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
101 * Execlists, this command is not placed on the same buffer as the
102 * remaining items.
103 *
104 * 2. Add a command to invalidate caches to the buffer.
105 *
106 * 3. Add a batchbuffer start command to the buffer; the start command is
107 * essentially a token together with the GPU address of the batchbuffer
108 * to be executed.
109 *
110 * 4. Add a pipeline flush to the buffer.
111 *
112 * 5. Add a memory write command to the buffer to record when the GPU
113 * is done executing the batchbuffer. The memory write writes the
114 * global sequence number of the request, ``i915_request::global_seqno``;
115 * the i915 driver uses the current value in the register to determine
116 * if the GPU has completed the batchbuffer.
117 *
118 * 6. Add a user interrupt command to the buffer. This command instructs
119 * the GPU to issue an interrupt when the command, pipeline flush and
120 * memory write are completed.
121 *
122 * 7. Inform the hardware of the additional commands added to the buffer
123 * (by updating the tail pointer).
124 *
125 * Processing an execbuf ioctl is conceptually split up into a few phases.
126 *
127 * 1. Validation - Ensure all the pointers, handles and flags are valid.
128 * 2. Reservation - Assign GPU address space for every object
129 * 3. Relocation - Update any addresses to point to the final locations
130 * 4. Serialisation - Order the request with respect to its dependencies
131 * 5. Construction - Construct a request to execute the batchbuffer
132 * 6. Submission (at some point in the future execution)
133 *
134 * Reserving resources for the execbuf is the most complicated phase. We
135 * neither want to have to migrate the object in the address space, nor do
136 * we want to have to update any relocations pointing to this object. Ideally,
137 * we want to leave the object where it is and for all the existing relocations
138 * to match. If the object is given a new address, or if userspace thinks the
139 * object is elsewhere, we have to parse all the relocation entries and update
140 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
141 * all the target addresses in all of its objects match the value in the
142 * relocation entries and that they all match the presumed offsets given by the
143 * list of execbuffer objects. Using this knowledge, we know that if we haven't
144 * moved any buffers, all the relocation entries are valid and we can skip
145 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
146 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
147 *
148 * The addresses written in the objects must match the corresponding
149 * reloc.presumed_offset which in turn must match the corresponding
150 * execobject.offset.
151 *
152 * Any render targets written to in the batch must be flagged with
153 * EXEC_OBJECT_WRITE.
154 *
155 * To avoid stalling, execobject.offset should match the current
156 * address of that object within the active context.
157 *
158 * The reservation is done is multiple phases. First we try and keep any
159 * object already bound in its current location - so as long as meets the
160 * constraints imposed by the new execbuffer. Any object left unbound after the
161 * first pass is then fitted into any available idle space. If an object does
162 * not fit, all objects are removed from the reservation and the process rerun
163 * after sorting the objects into a priority order (more difficult to fit
164 * objects are tried first). Failing that, the entire VM is cleared and we try
165 * to fit the execbuf once last time before concluding that it simply will not
166 * fit.
167 *
168 * A small complication to all of this is that we allow userspace not only to
169 * specify an alignment and a size for the object in the address space, but
170 * we also allow userspace to specify the exact offset. This objects are
171 * simpler to place (the location is known a priori) all we have to do is make
172 * sure the space is available.
173 *
174 * Once all the objects are in place, patching up the buried pointers to point
175 * to the final locations is a fairly simple job of walking over the relocation
176 * entry arrays, looking up the right address and rewriting the value into
177 * the object. Simple! ... The relocation entries are stored in user memory
178 * and so to access them we have to copy them into a local buffer. That copy
179 * has to avoid taking any pagefaults as they may lead back to a GEM object
180 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
181 * the relocation into multiple passes. First we try to do everything within an
182 * atomic context (avoid the pagefaults) which requires that we never wait. If
183 * we detect that we may wait, or if we need to fault, then we have to fallback
184 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
185 * bells yet?) Dropping the mutex means that we lose all the state we have
186 * built up so far for the execbuf and we must reset any global data. However,
187 * we do leave the objects pinned in their final locations - which is a
188 * potential issue for concurrent execbufs. Once we have left the mutex, we can
189 * allocate and copy all the relocation entries into a large array at our
190 * leisure, reacquire the mutex, reclaim all the objects and other state and
191 * then proceed to update any incorrect addresses with the objects.
192 *
193 * As we process the relocation entries, we maintain a record of whether the
194 * object is being written to. Using NORELOC, we expect userspace to provide
195 * this information instead. We also check whether we can skip the relocation
196 * by comparing the expected value inside the relocation entry with the target's
197 * final address. If they differ, we have to map the current object and rewrite
198 * the 4 or 8 byte pointer within.
199 *
200 * Serialising an execbuf is quite simple according to the rules of the GEM
201 * ABI. Execution within each context is ordered by the order of submission.
202 * Writes to any GEM object are in order of submission and are exclusive. Reads
203 * from a GEM object are unordered with respect to other reads, but ordered by
204 * writes. A write submitted after a read cannot occur before the read, and
205 * similarly any read submitted after a write cannot occur before the write.
206 * Writes are ordered between engines such that only one write occurs at any
207 * time (completing any reads beforehand) - using semaphores where available
208 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
209 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
210 * reads before starting, and any read (either using set-domain or pread) must
211 * flush all GPU writes before starting. (Note we only employ a barrier before,
212 * we currently rely on userspace not concurrently starting a new execution
213 * whilst reading or writing to an object. This may be an advantage or not
214 * depending on how much you trust userspace not to shoot themselves in the
215 * foot.) Serialisation may just result in the request being inserted into
216 * a DAG awaiting its turn, but most simple is to wait on the CPU until
217 * all dependencies are resolved.
218 *
219 * After all of that, is just a matter of closing the request and handing it to
220 * the hardware (well, leaving it in a queue to be executed). However, we also
221 * offer the ability for batchbuffers to be run with elevated privileges so
222 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
223 * Before any batch is given extra privileges we first must check that it
224 * contains no nefarious instructions, we check that each instruction is from
225 * our whitelist and all registers are also from an allowed list. We first
226 * copy the user's batchbuffer to a shadow (so that the user doesn't have
227 * access to it, either by the CPU or GPU as we scan it) and then parse each
228 * instruction. If everything is ok, we set a flag telling the hardware to run
229 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
230 */
231
232struct eb_fence {
233 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
234 struct dma_fence *dma_fence;
235 u64 value;
236 struct dma_fence_chain *chain_fence;
237};
238
239struct i915_execbuffer {
240 struct drm_i915_privateinteldrm_softc *i915; /** i915 backpointer */
241 struct drm_file *file; /** per-file lookup tables and limits */
242 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
243 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
244 struct eb_vma *vma;
245
246 struct intel_engine_cs *engine; /** engine to queue the request to */
247 struct intel_context *context; /* logical state for the request */
248 struct i915_gem_context *gem_context; /** caller's context */
249
250 struct i915_request *request; /** our request to build */
251 struct eb_vma *batch; /** identity of the batch obj/vma */
252 struct i915_vma *trampoline; /** trampoline used for chaining */
253
254 /** actual size of execobj[] as we may extend it for the cmdparser */
255 unsigned int buffer_count;
256
257 /** list of vma not yet bound during reservation phase */
258 struct list_head unbound;
259
260 /** list of vma that have execobj.relocation_count */
261 struct list_head relocs;
262
263 struct i915_gem_ww_ctx ww;
264
265 /**
266 * Track the most recently used object for relocations, as we
267 * frequently have to perform multiple relocations within the same
268 * obj/page
269 */
270 struct reloc_cache {
271 struct drm_mm_node node; /** temporary GTT binding */
272 unsigned long vaddr; /** Current kmap address */
273 unsigned long page; /** Currently mapped page index */
274 unsigned int gen; /** Cached value of INTEL_GEN */
275 bool_Bool use_64bit_reloc : 1;
276 bool_Bool has_llc : 1;
277 bool_Bool has_fence : 1;
278 bool_Bool needs_unfenced : 1;
279
280 struct i915_request *rq;
281 u32 *rq_cmd;
282 unsigned int rq_size;
283 struct intel_gt_buffer_pool_node *pool;
284
285 struct agp_map *map;
286 bus_space_tag_t iot;
287 bus_space_handle_t ioh;
288 } reloc_cache;
289
290 struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
291 struct intel_context *reloc_context;
292
293 u64 invalid_flags; /** Set of execobj.flags that are invalid */
294 u32 context_flags; /** Set of execobj.flags to insert from the ctx */
295
296 u64 batch_len; /** Length of batch within object */
297 u32 batch_start_offset; /** Location within object of batch */
298 u32 batch_flags; /** Flags composed for emit_bb_start() */
299 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
300
301 /**
302 * Indicate either the size of the hastable used to resolve
303 * relocation handles, or if negative that we are using a direct
304 * index into the execobj[].
305 */
306 int lut_size;
307 struct hlist_head *buckets; /** ht for relocation handles */
308
309 struct eb_fence *fences;
310 unsigned long num_fences;
311};
312
313static int eb_parse(struct i915_execbuffer *eb);
314static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
315 bool_Bool throttle);
316static void eb_unpin_engine(struct i915_execbuffer *eb);
317
318static inline bool_Bool eb_use_cmdparser(const struct i915_execbuffer *eb)
319{
320 return intel_engine_requires_cmd_parser(eb->engine) ||
321 (intel_engine_using_cmd_parser(eb->engine) &&
322 eb->args->batch_len);
323}
324
325static int eb_create(struct i915_execbuffer *eb)
326{
327 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT(1<<12))) {
28
Assuming the condition is true
29
Taking true branch
328 unsigned int size = 1 + ilog2(eb->buffer_count)((sizeof(eb->buffer_count) <= 4) ? (fls(eb->buffer_count
) - 1) : (flsl(eb->buffer_count) - 1))
;
30
'?' condition is true
329
330 /*
331 * Without a 1:1 association between relocation handles and
332 * the execobject[] index, we instead create a hashtable.
333 * We size it dynamically based on available memory, starting
334 * first with 1:1 assocative hash and scaling back until
335 * the allocation succeeds.
336 *
337 * Later on we use a positive lut_size to indicate we are
338 * using this hashtable, and a negative value to indicate a
339 * direct lookup.
340 */
341 do {
36
Loop condition is true. Execution continues on line 342
342 gfp_t flags;
343
344 /* While we can still reduce the allocation size, don't
345 * raise a warning and allow the allocation to fail.
346 * On the last pass though, we want to try as hard
347 * as possible to perform the allocation and warn
348 * if it fails.
349 */
350 flags = GFP_KERNEL(0x0001 | 0x0004);
351 if (size > 1)
31
Assuming 'size' is <= 1
32
Taking false branch
37
Assuming 'size' is > 1
38
Taking true branch
352 flags |= __GFP_NORETRY0 | __GFP_NOWARN0;
353
354 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
39
The result of the left shift is undefined due to shifting by '4294967295', which is greater or equal to the width of type 'unsigned long'
355 flags);
356 if (eb->buckets)
33
Assuming field 'buckets' is null
34
Taking false branch
357 break;
358 } while (--size);
35
Value assigned to 'size'
359
360 if (unlikely(!size)__builtin_expect(!!(!size), 0))
361 return -ENOMEM12;
362
363 eb->lut_size = size;
364 } else {
365 eb->lut_size = -eb->buffer_count;
366 }
367
368 return 0;
369}
370
371static bool_Bool
372eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
373 const struct i915_vma *vma,
374 unsigned int flags)
375{
376 if (vma->node.size < entry->pad_to_size)
377 return true1;
378
379 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)(((vma->node.start) & ((entry->alignment) - 1)) == 0
)
)
380 return true1;
381
382 if (flags & EXEC_OBJECT_PINNED(1<<4) &&
383 vma->node.start != entry->offset)
384 return true1;
385
386 if (flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (28)) &&
387 vma->node.start < BATCH_OFFSET_BIAS(256*1024))
388 return true1;
389
390 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)) &&
391 (vma->node.start + vma->node.size + 4095) >> 32)
392 return true1;
393
394 if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (29)) &&
395 !i915_vma_is_map_and_fenceable(vma))
396 return true1;
397
398 return false0;
399}
400
401static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
402 unsigned int exec_flags)
403{
404 u64 pin_flags = 0;
405
406 if (exec_flags & EXEC_OBJECT_NEEDS_GTT(1<<1))
407 pin_flags |= PIN_GLOBAL(1ULL << (10));
408
409 /*
410 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
411 * limit address to the first 4GBs for unflagged objects.
412 */
413 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
414 pin_flags |= PIN_ZONE_4G(1ULL << (4));
415
416 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (29)))
417 pin_flags |= PIN_MAPPABLE(1ULL << (3));
418
419 if (exec_flags & EXEC_OBJECT_PINNED(1<<4))
420 pin_flags |= entry->offset | PIN_OFFSET_FIXED(1ULL << (7));
421 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (28)))
422 pin_flags |= BATCH_OFFSET_BIAS(256*1024) | PIN_OFFSET_BIAS(1ULL << (6));
423
424 return pin_flags;
425}
426
427static inline bool_Bool
428eb_pin_vma(struct i915_execbuffer *eb,
429 const struct drm_i915_gem_exec_object2 *entry,
430 struct eb_vma *ev)
431{
432 struct i915_vma *vma = ev->vma;
433 u64 pin_flags;
434
435 if (vma->node.size)
436 pin_flags = vma->node.start;
437 else
438 pin_flags = entry->offset & PIN_OFFSET_MASK-(1ULL << (12));
439
440 pin_flags |= PIN_USER(1ULL << (11)) | PIN_NOEVICT(1ULL << (0)) | PIN_OFFSET_FIXED(1ULL << (7));
441 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)__builtin_expect(!!(ev->flags & (1<<1)), 0))
442 pin_flags |= PIN_GLOBAL(1ULL << (10));
443
444 /* Attempt to reuse the current location if available */
445 /* TODO: Add -EDEADLK handling here */
446 if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, 0, 0
, pin_flags)), 0)
) {
447 if (entry->flags & EXEC_OBJECT_PINNED(1<<4))
448 return false0;
449
450 /* Failing that pick any _free_ space if suitable */
451 if (unlikely(i915_vma_pin_ww(vma, &eb->ww,__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
452 entry->pad_to_size,__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
453 entry->alignment,__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
454 eb_pin_flags(entry, ev->flags) |__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
455 PIN_USER | PIN_NOEVICT))__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
)
456 return false0;
457 }
458
459 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
460 if (unlikely(i915_vma_pin_fence(vma))__builtin_expect(!!(i915_vma_pin_fence(vma)), 0)) {
461 i915_vma_unpin(vma);
462 return false0;
463 }
464
465 if (vma->fence)
466 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (30));
467 }
468
469 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (31));
470 return !eb_vma_misplaced(entry, vma, ev->flags);
471}
472
473static inline void
474eb_unreserve_vma(struct eb_vma *ev)
475{
476 if (!(ev->flags & __EXEC_OBJECT_HAS_PIN(1UL << (31))))
477 return;
478
479 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)__builtin_expect(!!(ev->flags & (1UL << (30))), 0
)
)
480 __i915_vma_unpin_fence(ev->vma);
481
482 __i915_vma_unpin(ev->vma);
483 ev->flags &= ~__EXEC_OBJECT_RESERVED((1UL << (31)) | (1UL << (30)));
484}
485
486static int
487eb_validate_vma(struct i915_execbuffer *eb,
488 struct drm_i915_gem_exec_object2 *entry,
489 struct i915_vma *vma)
490{
491 if (unlikely(entry->flags & eb->invalid_flags)__builtin_expect(!!(entry->flags & eb->invalid_flags
), 0)
)
492 return -EINVAL22;
493
494 if (unlikely(entry->alignment &&__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
495 !is_power_of_2_u64(entry->alignment))__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
)
496 return -EINVAL22;
497
498 /*
499 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
500 * any non-page-aligned or non-canonical addresses.
501 */
502 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
503 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
)
504 return -EINVAL22;
505
506 /* pad_to_size was once a reserved field, so sanitize it */
507 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE(1<<5)) {
508 if (unlikely(offset_in_page(entry->pad_to_size))__builtin_expect(!!(((vaddr_t)(entry->pad_to_size) & (
(1 << 12) - 1))), 0)
)
509 return -EINVAL22;
510 } else {
511 entry->pad_to_size = 0;
512 }
513 /*
514 * From drm_mm perspective address space is continuous,
515 * so from this point we're always using non-canonical
516 * form internally.
517 */
518 entry->offset = gen8_noncanonical_addr(entry->offset);
519
520 if (!eb->reloc_cache.has_fence) {
521 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE(1<<0);
522 } else {
523 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE(1<<0) ||
524 eb->reloc_cache.needs_unfenced) &&
525 i915_gem_object_is_tiled(vma->obj))
526 entry->flags |= EXEC_OBJECT_NEEDS_GTT(1<<1) | __EXEC_OBJECT_NEEDS_MAP(1UL << (29));
527 }
528
529 if (!(entry->flags & EXEC_OBJECT_PINNED(1<<4)))
530 entry->flags |= eb->context_flags;
531
532 return 0;
533}
534
535static void
536eb_add_vma(struct i915_execbuffer *eb,
537 unsigned int i, unsigned batch_idx,
538 struct i915_vma *vma)
539{
540 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
541 struct eb_vma *ev = &eb->vma[i];
542
543 GEM_BUG_ON(i915_vma_is_closed(vma))((void)0);
544
545 ev->vma = vma;
546 ev->exec = entry;
547 ev->flags = entry->flags;
548
549 if (eb->lut_size > 0) {
550 ev->handle = entry->handle;
551 hlist_add_head(&ev->node,
552 &eb->buckets[hash_32(entry->handle,
553 eb->lut_size)]);
554 }
555
556 if (entry->relocation_count)
557 list_add_tail(&ev->reloc_link, &eb->relocs);
558
559 /*
560 * SNA is doing fancy tricks with compressing batch buffers, which leads
561 * to negative relocation deltas. Usually that works out ok since the
562 * relocate address is still positive, except when the batch is placed
563 * very low in the GTT. Ensure this doesn't happen.
564 *
565 * Note that actual hangs have only been observed on gen7, but for
566 * paranoia do it everywhere.
567 */
568 if (i == batch_idx) {
569 if (entry->relocation_count &&
570 !(ev->flags & EXEC_OBJECT_PINNED(1<<4)))
571 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS(1UL << (28));
572 if (eb->reloc_cache.has_fence)
573 ev->flags |= EXEC_OBJECT_NEEDS_FENCE(1<<0);
574
575 eb->batch = ev;
576 }
577}
578
579static inline int use_cpu_reloc(const struct reloc_cache *cache,
580 const struct drm_i915_gem_object *obj)
581{
582 if (!i915_gem_object_has_struct_page(obj))
583 return false0;
584
585 if (DBG_FORCE_RELOC0 == FORCE_CPU_RELOC)
586 return true1;
587
588 if (DBG_FORCE_RELOC0 == FORCE_GTT_RELOC)
589 return false0;
590
591 return (cache->has_llc ||
592 obj->cache_dirty ||
593 obj->cache_level != I915_CACHE_NONE);
594}
595
596static int eb_reserve_vma(struct i915_execbuffer *eb,
597 struct eb_vma *ev,
598 u64 pin_flags)
599{
600 struct drm_i915_gem_exec_object2 *entry = ev->exec;
601 struct i915_vma *vma = ev->vma;
602 int err;
603
604 if (drm_mm_node_allocated(&vma->node) &&
605 eb_vma_misplaced(entry, vma, ev->flags)) {
606 err = i915_vma_unbind(vma);
607 if (err)
608 return err;
609 }
610
611 err = i915_vma_pin_ww(vma, &eb->ww,
612 entry->pad_to_size, entry->alignment,
613 eb_pin_flags(entry, ev->flags) | pin_flags);
614 if (err)
615 return err;
616
617 if (entry->offset != vma->node.start) {
618 entry->offset = vma->node.start | UPDATE(1ULL << (7));
619 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
620 }
621
622 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
623 err = i915_vma_pin_fence(vma);
624 if (unlikely(err)__builtin_expect(!!(err), 0)) {
625 i915_vma_unpin(vma);
626 return err;
627 }
628
629 if (vma->fence)
630 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (30));
631 }
632
633 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (31));
634 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags))((void)0);
635
636 return 0;
637}
638
639static int eb_reserve(struct i915_execbuffer *eb)
640{
641 const unsigned int count = eb->buffer_count;
642 unsigned int pin_flags = PIN_USER(1ULL << (11)) | PIN_NONBLOCK(1ULL << (2));
643 struct list_head last;
644 struct eb_vma *ev;
645 unsigned int i, pass;
646 int err = 0;
647
648 /*
649 * Attempt to pin all of the buffers into the GTT.
650 * This is done in 3 phases:
651 *
652 * 1a. Unbind all objects that do not match the GTT constraints for
653 * the execbuffer (fenceable, mappable, alignment etc).
654 * 1b. Increment pin count for already bound objects.
655 * 2. Bind new objects.
656 * 3. Decrement pin count.
657 *
658 * This avoid unnecessary unbinding of later objects in order to make
659 * room for the earlier objects *unless* we need to defragment.
660 */
661 pass = 0;
662 do {
663 list_for_each_entry(ev, &eb->unbound, bind_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->bind_link
) *__mptr = ((&eb->unbound)->next); (__typeof(*ev)
*)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), bind_link
) );}); &ev->bind_link != (&eb->unbound); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->bind_link ) *__mptr
= (ev->bind_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), bind_link) );}))
{
664 err = eb_reserve_vma(eb, ev, pin_flags);
665 if (err)
666 break;
667 }
668 if (err != -ENOSPC28)
669 return err;
670
671 /* Resort *all* the objects into priority order */
672 INIT_LIST_HEAD(&eb->unbound);
673 INIT_LIST_HEAD(&last);
674 for (i = 0; i < count; i++) {
675 unsigned int flags;
676
677 ev = &eb->vma[i];
678 flags = ev->flags;
679 if (flags & EXEC_OBJECT_PINNED(1<<4) &&
680 flags & __EXEC_OBJECT_HAS_PIN(1UL << (31)))
681 continue;
682
683 eb_unreserve_vma(ev);
684
685 if (flags & EXEC_OBJECT_PINNED(1<<4))
686 /* Pinned must have their slot */
687 list_add(&ev->bind_link, &eb->unbound);
688 else if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (29)))
689 /* Map require the lowest 256MiB (aperture) */
690 list_add_tail(&ev->bind_link, &eb->unbound);
691 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
692 /* Prioritise 4GiB region for restricted bo */
693 list_add(&ev->bind_link, &last);
694 else
695 list_add_tail(&ev->bind_link, &last);
696 }
697 list_splice_tail(&last, &eb->unbound);
698
699 switch (pass++) {
700 case 0:
701 break;
702
703 case 1:
704 /* Too fragmented, unbind everything and retry */
705 mutex_lock(&eb->context->vm->mutex)rw_enter_write(&eb->context->vm->mutex);
706 err = i915_gem_evict_vm(eb->context->vm);
707 mutex_unlock(&eb->context->vm->mutex)rw_exit_write(&eb->context->vm->mutex);
708 if (err)
709 return err;
710 break;
711
712 default:
713 return -ENOSPC28;
714 }
715
716 pin_flags = PIN_USER(1ULL << (11));
717 } while (1);
718}
719
720static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
721{
722 if (eb->args->flags & I915_EXEC_BATCH_FIRST(1<<18))
723 return 0;
724 else
725 return eb->buffer_count - 1;
726}
727
728static int eb_select_context(struct i915_execbuffer *eb)
729{
730 struct i915_gem_context *ctx;
731
732 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
733 if (unlikely(!ctx)__builtin_expect(!!(!ctx), 0))
734 return -ENOENT2;
735
736 eb->gem_context = ctx;
737 if (rcu_access_pointer(ctx->vm)(ctx->vm))
738 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT(1<<1);
739
740 eb->context_flags = 0;
741 if (test_bit(UCONTEXT_NO_ZEROMAP0, &ctx->user_flags))
742 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS(1UL << (28));
743
744 return 0;
745}
746
747static int __eb_add_lut(struct i915_execbuffer *eb,
748 u32 handle, struct i915_vma *vma)
749{
750 struct i915_gem_context *ctx = eb->gem_context;
751 struct i915_lut_handle *lut;
752 int err;
753
754 lut = i915_lut_handle_alloc();
755 if (unlikely(!lut)__builtin_expect(!!(!lut), 0))
756 return -ENOMEM12;
757
758 i915_vma_get(vma);
759 if (!atomic_fetch_inc(&vma->open_count)__sync_fetch_and_add(&vma->open_count, 1))
760 i915_vma_reopen(vma);
761 lut->handle = handle;
762 lut->ctx = ctx;
763
764 /* Check that the context hasn't been closed in the meantime */
765 err = -EINTR4;
766 if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
767 struct i915_address_space *vm = rcu_access_pointer(ctx->vm)(ctx->vm);
768
769 if (unlikely(vm && vma->vm != vm)__builtin_expect(!!(vm && vma->vm != vm), 0))
770 err = -EAGAIN35; /* user racing with ctx set-vm */
771 else if (likely(!i915_gem_context_is_closed(ctx))__builtin_expect(!!(!i915_gem_context_is_closed(ctx)), 1))
772 err = radix_tree_insert(&ctx->handles_vma, handle, vma);
773 else
774 err = -ENOENT2;
775 if (err == 0) { /* And nor has this handle */
776 struct drm_i915_gem_object *obj = vma->obj;
777
778 spin_lock(&obj->lut_lock)mtx_enter(&obj->lut_lock);
779 if (idr_find(&eb->file->object_idr, handle) == obj) {
780 list_add(&lut->obj_link, &obj->lut_list);
781 } else {
782 radix_tree_delete(&ctx->handles_vma, handle);
783 err = -ENOENT2;
784 }
785 spin_unlock(&obj->lut_lock)mtx_leave(&obj->lut_lock);
786 }
787 mutex_unlock(&ctx->lut_mutex)rw_exit_write(&ctx->lut_mutex);
788 }
789 if (unlikely(err)__builtin_expect(!!(err), 0))
790 goto err;
791
792 return 0;
793
794err:
795 i915_vma_close(vma);
796 i915_vma_put(vma);
797 i915_lut_handle_free(lut);
798 return err;
799}
800
801static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
802{
803 struct i915_address_space *vm = eb->context->vm;
804
805 do {
806 struct drm_i915_gem_object *obj;
807 struct i915_vma *vma;
808 int err;
809
810 rcu_read_lock();
811 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
812 if (likely(vma && vma->vm == vm)__builtin_expect(!!(vma && vma->vm == vm), 1))
813 vma = i915_vma_tryget(vma);
814 rcu_read_unlock();
815 if (likely(vma)__builtin_expect(!!(vma), 1))
816 return vma;
817
818 obj = i915_gem_object_lookup(eb->file, handle);
819 if (unlikely(!obj)__builtin_expect(!!(!obj), 0))
820 return ERR_PTR(-ENOENT2);
821
822 vma = i915_vma_instance(obj, vm, NULL((void *)0));
823 if (IS_ERR(vma)) {
824 i915_gem_object_put(obj);
825 return vma;
826 }
827
828 err = __eb_add_lut(eb, handle, vma);
829 if (likely(!err)__builtin_expect(!!(!err), 1))
830 return vma;
831
832 i915_gem_object_put(obj);
833 if (err != -EEXIST17)
834 return ERR_PTR(err);
835 } while (1);
836}
837
838static int eb_lookup_vmas(struct i915_execbuffer *eb)
839{
840 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
841 unsigned int batch = eb_batch_index(eb);
842 unsigned int i;
843 int err = 0;
844
845 INIT_LIST_HEAD(&eb->relocs);
846
847 for (i = 0; i < eb->buffer_count; i++) {
848 struct i915_vma *vma;
849
850 vma = eb_lookup_vma(eb, eb->exec[i].handle);
851 if (IS_ERR(vma)) {
852 err = PTR_ERR(vma);
853 goto err;
854 }
855
856 err = eb_validate_vma(eb, &eb->exec[i], vma);
857 if (unlikely(err)__builtin_expect(!!(err), 0)) {
858 i915_vma_put(vma);
859 goto err;
860 }
861
862 eb_add_vma(eb, i, batch, vma);
863 }
864
865 if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)__builtin_expect(!!(eb->batch->flags & (1<<2)
), 0)
) {
866 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
867 "Attempting to use self-modifying batch buffer\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
;
868 return -EINVAL22;
869 }
870
871 if (range_overflows_t(u64,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->batch_len)) size__
= ((u64)(eb->batch_len)); typeof((u64)(eb->batch->vma
->size)) max__ = ((u64)(eb->batch->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
872 eb->batch_start_offset, eb->batch_len,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->batch_len)) size__
= ((u64)(eb->batch_len)); typeof((u64)(eb->batch->vma
->size)) max__ = ((u64)(eb->batch->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
873 eb->batch->vma->size)({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->batch_len)) size__
= ((u64)(eb->batch_len)); typeof((u64)(eb->batch->vma
->size)) max__ = ((u64)(eb->batch->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
) {
874 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Attempting to use out-of-bounds batch\n"
)
;
875 return -EINVAL22;
876 }
877
878 if (eb->batch_len == 0)
879 eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
880 if (unlikely(eb->batch_len == 0)__builtin_expect(!!(eb->batch_len == 0), 0)) { /* impossible! */
881 drm_dbg(&i915->drm, "Invalid batch length\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Invalid batch length\n"
)
;
882 return -EINVAL22;
883 }
884
885 return 0;
886
887err:
888 eb->vma[i].vma = NULL((void *)0);
889 return err;
890}
891
892static int eb_validate_vmas(struct i915_execbuffer *eb)
893{
894 unsigned int i;
895 int err;
896
897 INIT_LIST_HEAD(&eb->unbound);
898
899 for (i = 0; i < eb->buffer_count; i++) {
900 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
901 struct eb_vma *ev = &eb->vma[i];
902 struct i915_vma *vma = ev->vma;
903
904 err = i915_gem_object_lock(vma->obj, &eb->ww);
905 if (err)
906 return err;
907
908 if (eb_pin_vma(eb, entry, ev)) {
909 if (entry->offset != vma->node.start) {
910 entry->offset = vma->node.start | UPDATE(1ULL << (7));
911 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
912 }
913 } else {
914 eb_unreserve_vma(ev);
915
916 list_add_tail(&ev->bind_link, &eb->unbound);
917 if (drm_mm_node_allocated(&vma->node)) {
918 err = i915_vma_unbind(vma);
919 if (err)
920 return err;
921 }
922 }
923
924 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&((void)0)
925 eb_vma_misplaced(&eb->exec[i], vma, ev->flags))((void)0);
926 }
927
928 if (!list_empty(&eb->unbound))
929 return eb_reserve(eb);
930
931 return 0;
932}
933
934static struct eb_vma *
935eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
936{
937 if (eb->lut_size < 0) {
938 if (handle >= -eb->lut_size)
939 return NULL((void *)0);
940 return &eb->vma[handle];
941 } else {
942 struct hlist_head *head;
943 struct eb_vma *ev;
944
945 head = &eb->buckets[hash_32(handle, eb->lut_size)];
946 hlist_for_each_entry(ev, head, node)for (ev = (((head)->first) ? ({ const __typeof( ((__typeof
(*ev) *)0)->node ) *__mptr = ((head)->first); (__typeof
(*ev) *)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), node
) );}) : ((void *)0)); ev != ((void *)0); ev = (((ev)->node
.next) ? ({ const __typeof( ((__typeof(*ev) *)0)->node ) *
__mptr = ((ev)->node.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), node) );}) : ((void *)0)
))
{
947 if (ev->handle == handle)
948 return ev;
949 }
950 return NULL((void *)0);
951 }
952}
953
954static void eb_release_vmas(struct i915_execbuffer *eb, bool_Bool final)
955{
956 const unsigned int count = eb->buffer_count;
957 unsigned int i;
958
959 for (i = 0; i < count; i++) {
960 struct eb_vma *ev = &eb->vma[i];
961 struct i915_vma *vma = ev->vma;
962
963 if (!vma)
964 break;
965
966 eb_unreserve_vma(ev);
967
968 if (final)
969 i915_vma_put(vma);
970 }
971
972 eb_unpin_engine(eb);
973}
974
975static void eb_destroy(const struct i915_execbuffer *eb)
976{
977 GEM_BUG_ON(eb->reloc_cache.rq)((void)0);
978
979 if (eb->lut_size > 0)
980 kfree(eb->buckets);
981}
982
983static inline u64
984relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
985 const struct i915_vma *target)
986{
987 return gen8_canonical_addr((int)reloc->delta + target->node.start);
988}
989
990static void reloc_cache_clear(struct reloc_cache *cache)
991{
992 cache->rq = NULL((void *)0);
993 cache->rq_cmd = NULL((void *)0);
994 cache->pool = NULL((void *)0);
995 cache->rq_size = 0;
996}
997
998static void reloc_cache_init(struct reloc_cache *cache,
999 struct drm_i915_privateinteldrm_softc *i915)
1000{
1001 cache->page = -1;
1002 cache->vaddr = 0;
1003 /* Must be a variable in the struct to allow GCC to unroll. */
1004 cache->gen = INTEL_GEN(i915)((&(i915)->__info)->gen);
1005 cache->has_llc = HAS_LLC(i915)((&(i915)->__info)->has_llc);
1006 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915)((&(i915)->__info)->has_64bit_reloc);
1007 cache->has_fence = cache->gen < 4;
1008 cache->needs_unfenced = INTEL_INFO(i915)(&(i915)->__info)->unfenced_needs_alignment;
1009 cache->node.flags = 0;
1010
1011 cache->map = i915->agph;
1012 cache->iot = i915->bst;
1013
1014 reloc_cache_clear(cache);
1015}
1016
1017static inline void *unmask_page(unsigned long p)
1018{
1019 return (void *)(uintptr_t)(p & LINUX_PAGE_MASK(~((1 << 12) - 1)));
1020}
1021
1022static inline unsigned int unmask_flags(unsigned long p)
1023{
1024 return p & ~LINUX_PAGE_MASK(~((1 << 12) - 1));
1025}
1026
1027#define KMAP0x4 0x4 /* after CLFLUSH_FLAGS */
1028
1029static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
1030{
1031 struct drm_i915_privateinteldrm_softc *i915 =
1032 container_of(cache, struct i915_execbuffer, reloc_cache)({ const __typeof( ((struct i915_execbuffer *)0)->reloc_cache
) *__mptr = (cache); (struct i915_execbuffer *)( (char *)__mptr
- __builtin_offsetof(struct i915_execbuffer, reloc_cache) );
})
->i915;
1033 return &i915->ggtt;
1034}
1035
1036static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
1037{
1038 if (!cache->pool)
1039 return;
1040
1041 /*
1042 * This is a bit nasty, normally we keep objects locked until the end
1043 * of execbuffer, but we already submit this, and have to unlock before
1044 * dropping the reference. Fortunately we can only hold 1 pool node at
1045 * a time, so this should be harmless.
1046 */
1047 i915_gem_ww_unlock_single(cache->pool->obj);
1048 intel_gt_buffer_pool_put(cache->pool);
1049 cache->pool = NULL((void *)0);
1050}
1051
1052static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
1053{
1054 struct drm_i915_gem_object *obj = cache->rq->batch->obj;
1055
1056 GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32))((void)0);
1057 cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END(((0x0a) << 23) | (0));
1058
1059 i915_gem_object_flush_map(obj);
1060 i915_gem_object_unpin_map(obj);
1061
1062 intel_gt_chipset_flush(cache->rq->engine->gt);
1063
1064 i915_request_add(cache->rq);
1065 reloc_cache_put_pool(eb, cache);
1066 reloc_cache_clear(cache);
1067
1068 eb->reloc_pool = NULL((void *)0);
1069}
1070
1071static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
1072{
1073 void *vaddr;
1074
1075 if (cache->rq)
1076 reloc_gpu_flush(eb, cache);
1077
1078 if (!cache->vaddr)
1079 return;
1080
1081 vaddr = unmask_page(cache->vaddr);
1082 if (cache->vaddr & KMAP0x4) {
1083 struct drm_i915_gem_object *obj =
1084 (struct drm_i915_gem_object *)cache->node.mm;
1085 if (cache->vaddr & CLFLUSH_AFTER(1UL << (1)))
1086 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1087
1088 kunmap_atomic(vaddr);
1089 i915_gem_object_finish_access(obj);
1090 } else {
1091 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1092
1093 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1094#ifdef __linux__
1095 io_mapping_unmap_atomic((void __iomem *)vaddr);
1096#else
1097 agp_unmap_atomic(cache->map, cache->ioh);
1098#endif
1099
1100 if (drm_mm_node_allocated(&cache->node)) {
1101 ggtt->vm.clear_range(&ggtt->vm,
1102 cache->node.start,
1103 cache->node.size);
1104 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1105 drm_mm_remove_node(&cache->node);
1106 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1107 } else {
1108 i915_vma_unpin((struct i915_vma *)cache->node.mm);
1109 }
1110 }
1111
1112 cache->vaddr = 0;
1113 cache->page = -1;
1114}
1115
1116static void *reloc_kmap(struct drm_i915_gem_object *obj,
1117 struct reloc_cache *cache,
1118 unsigned long pageno)
1119{
1120 void *vaddr;
1121 struct vm_page *page;
1122
1123 if (cache->vaddr) {
1124 kunmap_atomic(unmask_page(cache->vaddr));
1125 } else {
1126 unsigned int flushes;
1127 int err;
1128
1129 err = i915_gem_object_prepare_write(obj, &flushes);
1130 if (err)
1131 return ERR_PTR(err);
1132
1133 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS)extern char _ctassert[(!(0x4 & ((1UL << (0)) | (1UL
<< (1))))) ? 1 : -1 ] __attribute__((__unused__))
;
1134 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK)extern char _ctassert[(!((0x4 | ((1UL << (0)) | (1UL <<
(1)))) & (~((1 << 12) - 1)))) ? 1 : -1 ] __attribute__
((__unused__))
;
1135
1136 cache->vaddr = flushes | KMAP0x4;
1137 cache->node.mm = (void *)obj;
1138 if (flushes)
1139 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1140 }
1141
1142 page = i915_gem_object_get_page(obj, pageno);
1143 if (!obj->mm.dirty)
1144 set_page_dirty(page)x86_atomic_clearbits_u32(&page->pg_flags, 0x00000008);
1145
1146 vaddr = kmap_atomic(page);
1147 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1148 cache->page = pageno;
1149
1150 return vaddr;
1151}
1152
1153static void *reloc_iomap(struct drm_i915_gem_object *obj,
1154 struct i915_execbuffer *eb,
1155 unsigned long page)
1156{
1157 struct reloc_cache *cache = &eb->reloc_cache;
1158 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1159 unsigned long offset;
1160 void *vaddr;
1161
1162 if (cache->vaddr) {
1163 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1164#ifdef __linux__
1165 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1166#else
1167 agp_unmap_atomic(cache->map, cache->ioh);
1168#endif
1169 } else {
1170 struct i915_vma *vma;
1171 int err;
1172
1173 if (i915_gem_object_is_tiled(obj))
1174 return ERR_PTR(-EINVAL22);
1175
1176 if (use_cpu_reloc(cache, obj))
1177 return NULL((void *)0);
1178
1179 err = i915_gem_object_set_to_gtt_domain(obj, true1);
1180 if (err)
1181 return ERR_PTR(err);
1182
1183 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL((void *)0), 0, 0,
1184 PIN_MAPPABLE(1ULL << (3)) |
1185 PIN_NONBLOCK(1ULL << (2)) /* NOWARN */ |
1186 PIN_NOEVICT(1ULL << (0)));
1187 if (vma == ERR_PTR(-EDEADLK11))
1188 return vma;
1189
1190 if (IS_ERR(vma)) {
1191 memset(&cache->node, 0, sizeof(cache->node))__builtin_memset((&cache->node), (0), (sizeof(cache->
node)))
;
1192 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1193 err = drm_mm_insert_node_in_range
1194 (&ggtt->vm.mm, &cache->node,
1195 PAGE_SIZE(1 << 12), 0, I915_COLOR_UNEVICTABLE(-1),
1196 0, ggtt->mappable_end,
1197 DRM_MM_INSERT_LOW);
1198 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1199 if (err) /* no inactive aperture space, use cpu reloc */
1200 return NULL((void *)0);
1201 } else {
1202 cache->node.start = vma->node.start;
1203 cache->node.mm = (void *)vma;
1204 }
1205 }
1206
1207 offset = cache->node.start;
1208 if (drm_mm_node_allocated(&cache->node)) {
1209 ggtt->vm.insert_page(&ggtt->vm,
1210 i915_gem_object_get_dma_address(obj, page),
1211 offset, I915_CACHE_NONE, 0);
1212 } else {
1213 offset += page << PAGE_SHIFT12;
1214 }
1215
1216#ifdef __linux__
1217 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1218 offset);
1219#else
1220 agp_map_atomic(cache->map, offset, &cache->ioh);
1221 vaddr = bus_space_vaddr(cache->iot, cache->ioh)((cache->iot)->vaddr((cache->ioh)));
1222#endif
1223 cache->page = page;
1224 cache->vaddr = (unsigned long)vaddr;
1225
1226 return vaddr;
1227}
1228
1229static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1230 struct i915_execbuffer *eb,
1231 unsigned long page)
1232{
1233 struct reloc_cache *cache = &eb->reloc_cache;
1234 void *vaddr;
1235
1236 if (cache->page == page) {
1237 vaddr = unmask_page(cache->vaddr);
1238 } else {
1239 vaddr = NULL((void *)0);
1240 if ((cache->vaddr & KMAP0x4) == 0)
1241 vaddr = reloc_iomap(obj, eb, page);
1242 if (!vaddr)
1243 vaddr = reloc_kmap(obj, cache, page);
1244 }
1245
1246 return vaddr;
1247}
1248
1249static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1250{
1251 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))__builtin_expect(!!(flushes & ((1UL << (0)) | (1UL <<
(1)))), 0)
) {
1252 if (flushes & CLFLUSH_BEFORE(1UL << (0))) {
1253 clflushopt(addr);
1254 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1255 }
1256
1257 *addr = value;
1258
1259 /*
1260 * Writes to the same cacheline are serialised by the CPU
1261 * (including clflush). On the write path, we only require
1262 * that it hits memory in an orderly fashion and place
1263 * mb barriers at the start and end of the relocation phase
1264 * to ensure ordering of clflush wrt to the system.
1265 */
1266 if (flushes & CLFLUSH_AFTER(1UL << (1)))
1267 clflushopt(addr);
1268 } else
1269 *addr = value;
1270}
1271
1272static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
1273{
1274 struct drm_i915_gem_object *obj = vma->obj;
1275 int err;
1276
1277 assert_vma_held(vma)do { (void)(&((vma)->resv)->lock.base); } while(0);
1278
1279 if (obj->cache_dirty & ~obj->cache_coherent)
1280 i915_gem_clflush_object(obj, 0);
1281 obj->write_domain = 0;
1282
1283 err = i915_request_await_object(rq, vma->obj, true1);
1284 if (err == 0)
1285 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE(1<<2));
1286
1287 return err;
1288}
1289
1290static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1291 struct intel_engine_cs *engine,
1292 struct i915_vma *vma,
1293 unsigned int len)
1294{
1295 struct reloc_cache *cache = &eb->reloc_cache;
1296 struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
1297 struct i915_request *rq;
1298 struct i915_vma *batch;
1299 u32 *cmd;
1300 int err;
1301
1302 if (!pool) {
1303 pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE(1 << 12));
1304 if (IS_ERR(pool))
1305 return PTR_ERR(pool);
1306 }
1307 eb->reloc_pool = NULL((void *)0);
1308
1309 err = i915_gem_object_lock(pool->obj, &eb->ww);
1310 if (err)
1311 goto err_pool;
1312
1313 cmd = i915_gem_object_pin_map(pool->obj,
1314 cache->has_llc ?
1315 I915_MAP_FORCE_WB :
1316 I915_MAP_FORCE_WC);
1317 if (IS_ERR(cmd)) {
1318 err = PTR_ERR(cmd);
1319 goto err_pool;
1320 }
1321
1322 memset32(cmd, 0, pool->obj->base.size / sizeof(u32));
1323
1324 batch = i915_vma_instance(pool->obj, vma->vm, NULL((void *)0));
1325 if (IS_ERR(batch)) {
1326 err = PTR_ERR(batch);
1327 goto err_unmap;
1328 }
1329
1330 err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER(1ULL << (11)) | PIN_NONBLOCK(1ULL << (2)));
1331 if (err)
1332 goto err_unmap;
1333
1334 if (engine == eb->context->engine) {
1335 rq = i915_request_create(eb->context);
1336 } else {
1337 struct intel_context *ce = eb->reloc_context;
1338
1339 if (!ce) {
1340 ce = intel_context_create(engine);
1341 if (IS_ERR(ce)) {
1342 err = PTR_ERR(ce);
1343 goto err_unpin;
1344 }
1345
1346 i915_vm_put(ce->vm);
1347 ce->vm = i915_vm_get(eb->context->vm);
1348 eb->reloc_context = ce;
1349 }
1350
1351 err = intel_context_pin_ww(ce, &eb->ww);
1352 if (err)
1353 goto err_unpin;
1354
1355 rq = i915_request_create(ce);
1356 intel_context_unpin(ce);
1357 }
1358 if (IS_ERR(rq)) {
1359 err = PTR_ERR(rq);
1360 goto err_unpin;
1361 }
1362
1363 err = intel_gt_buffer_pool_mark_active(pool, rq);
1364 if (err)
1365 goto err_request;
1366
1367 err = reloc_move_to_gpu(rq, vma);
1368 if (err)
1369 goto err_request;
1370
1371 err = eb->engine->emit_bb_start(rq,
1372 batch->node.start, PAGE_SIZE(1 << 12),
1373 cache->gen > 5 ? 0 : I915_DISPATCH_SECURE(1UL << (0)));
1374 if (err)
1375 goto skip_request;
1376
1377 assert_vma_held(batch)do { (void)(&((batch)->resv)->lock.base); } while(0
)
;
1378 err = i915_request_await_object(rq, batch->obj, false0);
1379 if (err == 0)
1380 err = i915_vma_move_to_active(batch, rq, 0);
1381 if (err)
1382 goto skip_request;
1383
1384 rq->batch = batch;
1385 i915_vma_unpin(batch);
1386
1387 cache->rq = rq;
1388 cache->rq_cmd = cmd;
1389 cache->rq_size = 0;
1390 cache->pool = pool;
1391
1392 /* Return with batch mapping (cmd) still pinned */
1393 return 0;
1394
1395skip_request:
1396 i915_request_set_error_once(rq, err);
1397err_request:
1398 i915_request_add(rq);
1399err_unpin:
1400 i915_vma_unpin(batch);
1401err_unmap:
1402 i915_gem_object_unpin_map(pool->obj);
1403err_pool:
1404 eb->reloc_pool = pool;
1405 return err;
1406}
1407
1408static bool_Bool reloc_can_use_engine(const struct intel_engine_cs *engine)
1409{
1410 return engine->class != VIDEO_DECODE_CLASS1 || !IS_GEN(engine->i915, 6)(0 + (&(engine->i915)->__info)->gen == (6));
1411}
1412
1413static u32 *reloc_gpu(struct i915_execbuffer *eb,
1414 struct i915_vma *vma,
1415 unsigned int len)
1416{
1417 struct reloc_cache *cache = &eb->reloc_cache;
1418 u32 *cmd;
1419
1420 if (cache->rq_size > PAGE_SIZE(1 << 12)/sizeof(u32) - (len + 1))
1421 reloc_gpu_flush(eb, cache);
1422
1423 if (unlikely(!cache->rq)__builtin_expect(!!(!cache->rq), 0)) {
1424 int err;
1425 struct intel_engine_cs *engine = eb->engine;
1426
1427 /* If we need to copy for the cmdparser, we will stall anyway */
1428 if (eb_use_cmdparser(eb))
1429 return ERR_PTR(-EWOULDBLOCK35);
1430
1431 if (!reloc_can_use_engine(engine)) {
1432 engine = engine->gt->engine_class[COPY_ENGINE_CLASS3][0];
1433 if (!engine)
1434 return ERR_PTR(-ENODEV19);
1435 }
1436
1437 err = __reloc_gpu_alloc(eb, engine, vma, len);
1438 if (unlikely(err)__builtin_expect(!!(err), 0))
1439 return ERR_PTR(err);
1440 }
1441
1442 cmd = cache->rq_cmd + cache->rq_size;
1443 cache->rq_size += len;
1444
1445 return cmd;
1446}
1447
1448static inline bool_Bool use_reloc_gpu(struct i915_vma *vma)
1449{
1450 if (DBG_FORCE_RELOC0 == FORCE_GPU_RELOC)
1451 return true1;
1452
1453 if (DBG_FORCE_RELOC0)
1454 return false0;
1455
1456 return !dma_resv_test_signaled_rcu(vma->resv, true1);
1457}
1458
1459static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
1460{
1461 struct vm_page *page;
1462 unsigned long addr;
1463
1464 GEM_BUG_ON(vma->pages != vma->obj->mm.pages)((void)0);
1465
1466 page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT12);
1467 addr = PFN_PHYS(page_to_pfn(page))(((((page)->phys_addr) / (1 << 12))) << 12);
1468 GEM_BUG_ON(overflows_type(addr, u32))((void)0); /* expected dma32 */
1469
1470 return addr + offset_in_page(offset)((vaddr_t)(offset) & ((1 << 12) - 1));
1471}
1472
1473static int __reloc_entry_gpu(struct i915_execbuffer *eb,
1474 struct i915_vma *vma,
1475 u64 offset,
1476 u64 target_addr)
1477{
1478 const unsigned int gen = eb->reloc_cache.gen;
1479 unsigned int len;
1480 u32 *batch;
1481 u64 addr;
1482
1483 if (gen >= 8)
1484 len = offset & 7 ? 8 : 5;
1485 else if (gen >= 4)
1486 len = 4;
1487 else
1488 len = 3;
1489
1490 batch = reloc_gpu(eb, vma, len);
1491 if (batch == ERR_PTR(-EDEADLK11))
1492 return -EDEADLK11;
1493 else if (IS_ERR(batch))
1494 return false0;
1495
1496 addr = gen8_canonical_addr(vma->node.start + offset);
1497 if (gen >= 8) {
1498 if (offset & 7) {
1499 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1500 *batch++ = lower_32_bits(addr)((u32)(addr));
1501 *batch++ = upper_32_bits(addr)((u32)(((addr) >> 16) >> 16));
1502 *batch++ = lower_32_bits(target_addr)((u32)(target_addr));
1503
1504 addr = gen8_canonical_addr(addr + 4);
1505
1506 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1507 *batch++ = lower_32_bits(addr)((u32)(addr));
1508 *batch++ = upper_32_bits(addr)((u32)(((addr) >> 16) >> 16));
1509 *batch++ = upper_32_bits(target_addr)((u32)(((target_addr) >> 16) >> 16));
1510 } else {
1511 *batch++ = (MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2)) | (1 << 21)) + 1;
1512 *batch++ = lower_32_bits(addr)((u32)(addr));
1513 *batch++ = upper_32_bits(addr)((u32)(((addr) >> 16) >> 16));
1514 *batch++ = lower_32_bits(target_addr)((u32)(target_addr));
1515 *batch++ = upper_32_bits(target_addr)((u32)(((target_addr) >> 16) >> 16));
1516 }
1517 } else if (gen >= 6) {
1518 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1519 *batch++ = 0;
1520 *batch++ = addr;
1521 *batch++ = target_addr;
1522 } else if (IS_I965G(eb->i915)IS_PLATFORM(eb->i915, INTEL_I965G)) {
1523 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1524 *batch++ = 0;
1525 *batch++ = vma_phys_addr(vma, offset);
1526 *batch++ = target_addr;
1527 } else if (gen >= 4) {
1528 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2)) | MI_USE_GGTT(1 << 22);
1529 *batch++ = 0;
1530 *batch++ = addr;
1531 *batch++ = target_addr;
1532 } else if (gen >= 3 &&
1533 !(IS_I915G(eb->i915)IS_PLATFORM(eb->i915, INTEL_I915G) || IS_I915GM(eb->i915)IS_PLATFORM(eb->i915, INTEL_I915GM))) {
1534 *batch++ = MI_STORE_DWORD_IMM(((0x20) << 23) | (1)) | MI_MEM_VIRTUAL(1 << 22);
1535 *batch++ = addr;
1536 *batch++ = target_addr;
1537 } else {
1538 *batch++ = MI_STORE_DWORD_IMM(((0x20) << 23) | (1));
1539 *batch++ = vma_phys_addr(vma, offset);
1540 *batch++ = target_addr;
1541 }
1542
1543 return true1;
1544}
1545
1546static int reloc_entry_gpu(struct i915_execbuffer *eb,
1547 struct i915_vma *vma,
1548 u64 offset,
1549 u64 target_addr)
1550{
1551 if (eb->reloc_cache.vaddr)
1552 return false0;
1553
1554 if (!use_reloc_gpu(vma))
1555 return false0;
1556
1557 return __reloc_entry_gpu(eb, vma, offset, target_addr);
1558}
1559
1560static u64
1561relocate_entry(struct i915_vma *vma,
1562 const struct drm_i915_gem_relocation_entry *reloc,
1563 struct i915_execbuffer *eb,
1564 const struct i915_vma *target)
1565{
1566 u64 target_addr = relocation_target(reloc, target);
1567 u64 offset = reloc->offset;
1568 int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
1569
1570 if (reloc_gpu < 0)
1571 return reloc_gpu;
1572
1573 if (!reloc_gpu) {
1574 bool_Bool wide = eb->reloc_cache.use_64bit_reloc;
1575 void *vaddr;
1576
1577repeat:
1578 vaddr = reloc_vaddr(vma->obj, eb,
1579 offset >> PAGE_SHIFT12);
1580 if (IS_ERR(vaddr))
1581 return PTR_ERR(vaddr);
1582
1583 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)))((void)0);
1584 clflush_write32(vaddr + offset_in_page(offset)((vaddr_t)(offset) & ((1 << 12) - 1)),
1585 lower_32_bits(target_addr)((u32)(target_addr)),
1586 eb->reloc_cache.vaddr);
1587
1588 if (wide) {
1589 offset += sizeof(u32);
1590 target_addr >>= 32;
1591 wide = false0;
1592 goto repeat;
1593 }
1594 }
1595
1596 return target->node.start | UPDATE(1ULL << (7));
1597}
1598
1599static u64
1600eb_relocate_entry(struct i915_execbuffer *eb,
1601 struct eb_vma *ev,
1602 const struct drm_i915_gem_relocation_entry *reloc)
1603{
1604 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
1605 struct eb_vma *target;
1606 int err;
1607
1608 /* we've already hold a reference to all valid objects */
1609 target = eb_get_vma(eb, reloc->target_handle);
1610 if (unlikely(!target)__builtin_expect(!!(!target), 0))
1611 return -ENOENT2;
1612
1613 /* Validate that the target is in a valid r/w GPU domain */
1614 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))__builtin_expect(!!(reloc->write_domain & (reloc->write_domain
- 1)), 0)
) {
1615 drm_dbg(&i915->drm, "reloc with multiple write domains: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1616 "target %d offset %d "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1617 "read %08x write %08x",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1618 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1619 (int) reloc->offset,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1620 reloc->read_domains,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1621 reloc->write_domain)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1622 return -EINVAL22;
1623 }
1624 if (unlikely((reloc->write_domain | reloc->read_domains)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
1625 & ~I915_GEM_GPU_DOMAINS)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
) {
1626 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1627 "target %d offset %d "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1628 "read %08x write %08x",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1629 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1630 (int) reloc->offset,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1631 reloc->read_domains,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1632 reloc->write_domain)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1633 return -EINVAL22;
1634 }
1635
1636 if (reloc->write_domain) {
1637 target->flags |= EXEC_OBJECT_WRITE(1<<2);
1638
1639 /*
1640 * Sandybridge PPGTT errata: We need a global gtt mapping
1641 * for MI and pipe_control writes because the gpu doesn't
1642 * properly redirect them through the ppgtt for non_secure
1643 * batchbuffers.
1644 */
1645 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION0x00000010 &&
1646 IS_GEN(eb->i915, 6)(0 + (&(eb->i915)->__info)->gen == (6))) {
1647 err = i915_vma_bind(target->vma,
1648 target->vma->obj->cache_level,
1649 PIN_GLOBAL(1ULL << (10)), NULL((void *)0));
1650 if (err)
1651 return err;
1652 }
1653 }
1654
1655 /*
1656 * If the relocation already has the right value in it, no
1657 * more work needs to be done.
1658 */
1659 if (!DBG_FORCE_RELOC0 &&
1660 gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
1661 return 0;
1662
1663 /* Check that the relocation address is valid... */
1664 if (unlikely(reloc->offset >__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
1665 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
) {
1666 drm_dbg(&i915->drm, "Relocation beyond object bounds: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1667 "target %d offset %d size %d.\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1668 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1669 (int)reloc->offset,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1670 (int)ev->vma->size)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
;
1671 return -EINVAL22;
1672 }
1673 if (unlikely(reloc->offset & 3)__builtin_expect(!!(reloc->offset & 3), 0)) {
1674 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1675 "target %d offset %d.\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1676 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1677 (int)reloc->offset)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
;
1678 return -EINVAL22;
1679 }
1680
1681 /*
1682 * If we write into the object, we need to force the synchronisation
1683 * barrier, either with an asynchronous clflush or if we executed the
1684 * patching using the GPU (though that should be serialised by the
1685 * timeline). To be completely sure, and since we are required to
1686 * do relocations we are already stalling, disable the user's opt
1687 * out of our synchronisation.
1688 */
1689 ev->flags &= ~EXEC_OBJECT_ASYNC(1<<6);
1690
1691 /* and update the user's relocation entry */
1692 return relocate_entry(ev->vma, reloc, eb, target->vma);
1693}
1694
1695static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
1696{
1697#define N_RELOC(x)((x) / sizeof(struct drm_i915_gem_relocation_entry)) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1698 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)((512) / sizeof(struct drm_i915_gem_relocation_entry))];
1699 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1700 struct drm_i915_gem_relocation_entry __user *urelocs =
1701 u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1702 unsigned long remain = entry->relocation_count;
1703
1704 if (unlikely(remain > N_RELOC(ULONG_MAX))__builtin_expect(!!(remain > ((0xffffffffffffffffUL) / sizeof
(struct drm_i915_gem_relocation_entry))), 0)
)
1705 return -EINVAL22;
1706
1707 /*
1708 * We must check that the entire relocation array is safe
1709 * to read. However, if the array is not writable the user loses
1710 * the updated relocation values.
1711 */
1712 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))__builtin_expect(!!(!access_ok(urelocs, remain * sizeof(*urelocs
))), 0)
)
1713 return -EFAULT14;
1714
1715 do {
1716 struct drm_i915_gem_relocation_entry *r = stack;
1717 unsigned int count =
1718 min_t(unsigned long, remain, ARRAY_SIZE(stack))({ unsigned long __min_a = (remain); unsigned long __min_b = (
(sizeof((stack)) / sizeof((stack)[0]))); __min_a < __min_b
? __min_a : __min_b; })
;
1719 unsigned int copied;
1720
1721 /*
1722 * This is the fast path and we cannot handle a pagefault
1723 * whilst holding the struct mutex lest the user pass in the
1724 * relocations contained within a mmaped bo. For in such a case
1725 * we, the page fault handler would call i915_gem_fault() and
1726 * we would try to acquire the struct mutex again. Obviously
1727 * this is bad and so lockdep complains vehemently.
1728 */
1729 pagefault_disable();
1730 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1731 pagefault_enable();
1732 if (unlikely(copied)__builtin_expect(!!(copied), 0)) {
1733 remain = -EFAULT14;
1734 goto out;
1735 }
1736
1737 remain -= count;
1738 do {
1739 u64 offset = eb_relocate_entry(eb, ev, r);
1740
1741 if (likely(offset == 0)__builtin_expect(!!(offset == 0), 1)) {
1742 } else if ((s64)offset < 0) {
1743 remain = (int)offset;
1744 goto out;
1745 } else {
1746 /*
1747 * Note that reporting an error now
1748 * leaves everything in an inconsistent
1749 * state as we have *already* changed
1750 * the relocation value inside the
1751 * object. As we have not changed the
1752 * reloc.presumed_offset or will not
1753 * change the execobject.offset, on the
1754 * call we may not rewrite the value
1755 * inside the object, leaving it
1756 * dangling and causing a GPU hang. Unless
1757 * userspace dynamically rebuilds the
1758 * relocations on each execbuf rather than
1759 * presume a static tree.
1760 *
1761 * We did previously check if the relocations
1762 * were writable (access_ok), an error now
1763 * would be a strange race with mprotect,
1764 * having already demonstrated that we
1765 * can read from this userspace address.
1766 */
1767 offset = gen8_canonical_addr(offset & ~UPDATE(1ULL << (7)));
1768 __put_user(offset,({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
1769 &urelocs[r - stack].presumed_offset)({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
;
1770 }
1771 } while (r++, --count);
1772 urelocs += ARRAY_SIZE(stack)(sizeof((stack)) / sizeof((stack)[0]));
1773 } while (remain);
1774out:
1775 reloc_cache_reset(&eb->reloc_cache, eb);
1776 return remain;
1777}
1778
1779static int
1780eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
1781{
1782 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1783 struct drm_i915_gem_relocation_entry *relocs =
1784 u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
1785 unsigned int i;
1786 int err;
1787
1788 for (i = 0; i < entry->relocation_count; i++) {
1789 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
1790
1791 if ((s64)offset < 0) {
1792 err = (int)offset;
1793 goto err;
1794 }
1795 }
1796 err = 0;
1797err:
1798 reloc_cache_reset(&eb->reloc_cache, eb);
1799 return err;
1800}
1801
1802static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1803{
1804 const char __user *addr, *end;
1805 unsigned long size;
1806 char __maybe_unused__attribute__((__unused__)) c;
1807
1808 size = entry->relocation_count;
1809 if (size == 0)
1810 return 0;
1811
1812 if (size > N_RELOC(ULONG_MAX)((0xffffffffffffffffUL) / sizeof(struct drm_i915_gem_relocation_entry
))
)
1813 return -EINVAL22;
1814
1815 addr = u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1816 size *= sizeof(struct drm_i915_gem_relocation_entry);
1817 if (!access_ok(addr, size))
1818 return -EFAULT14;
1819
1820 end = addr + size;
1821 for (; addr < end; addr += PAGE_SIZE(1 << 12)) {
1822 int err = __get_user(c, addr)-copyin((addr), &((c)), sizeof((c)));
1823 if (err)
1824 return err;
1825 }
1826 return __get_user(c, end - 1)-copyin((end - 1), &((c)), sizeof((c)));
1827}
1828
1829static int eb_copy_relocations(const struct i915_execbuffer *eb)
1830{
1831 struct drm_i915_gem_relocation_entry *relocs;
1832 const unsigned int count = eb->buffer_count;
1833 unsigned int i;
1834 int err;
1835
1836 for (i = 0; i < count; i++) {
1837 const unsigned int nreloc = eb->exec[i].relocation_count;
1838 struct drm_i915_gem_relocation_entry __user *urelocs;
1839 unsigned long size;
1840 unsigned long copied;
1841
1842 if (nreloc == 0)
1843 continue;
1844
1845 err = check_relocations(&eb->exec[i]);
1846 if (err)
1847 goto err;
1848
1849 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr)((void *)(uintptr_t)(eb->exec[i].relocs_ptr));
1850 size = nreloc * sizeof(*relocs);
1851
1852 relocs = kvmalloc_array(size, 1, GFP_KERNEL(0x0001 | 0x0004));
1853 if (!relocs) {
1854 err = -ENOMEM12;
1855 goto err;
1856 }
1857
1858 /* copy_from_user is limited to < 4GiB */
1859 copied = 0;
1860 do {
1861 unsigned int len =
1862 min_t(u64, BIT_ULL(31), size - copied)({ u64 __min_a = ((1ULL << (31))); u64 __min_b = (size -
copied); __min_a < __min_b ? __min_a : __min_b; })
;
1863
1864 if (__copy_from_user((char *)relocs + copied,
1865 (char __user *)urelocs + copied,
1866 len))
1867 goto end;
1868
1869 copied += len;
1870 } while (copied < size);
1871
1872 /*
1873 * As we do not update the known relocation offsets after
1874 * relocating (due to the complexities in lock handling),
1875 * we need to mark them as invalid now so that we force the
1876 * relocation processing next time. Just in case the target
1877 * object is evicted and then rebound into its old
1878 * presumed_offset before the next execbuffer - if that
1879 * happened we would make the mistake of assuming that the
1880 * relocations were valid.
1881 */
1882 if (!user_access_begin(urelocs, size)access_ok(urelocs, size))
1883 goto end;
1884
1885 for (copied = 0; copied < nreloc; copied++)
1886 unsafe_put_user(-1,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1887 &urelocs[copied].presumed_offset,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1888 end_user)({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
;
1889 user_access_end();
1890
1891 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1892 }
1893
1894 return 0;
1895
1896end_user:
1897 user_access_end();
1898end:
1899 kvfree(relocs);
1900 err = -EFAULT14;
1901err:
1902 while (i--) {
1903 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(eb->exec[i].relocs_ptr
); })
;
1904 if (eb->exec[i].relocation_count)
1905 kvfree(relocs);
1906 }
1907 return err;
1908}
1909
1910static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1911{
1912 const unsigned int count = eb->buffer_count;
1913 unsigned int i;
1914
1915 for (i = 0; i < count; i++) {
1916 int err;
1917
1918 err = check_relocations(&eb->exec[i]);
1919 if (err)
1920 return err;
1921 }
1922
1923 return 0;
1924}
1925
1926static noinline__attribute__((__noinline__)) int eb_relocate_parse_slow(struct i915_execbuffer *eb,
1927 struct i915_request *rq)
1928{
1929 bool_Bool have_copy = false0;
1930 struct eb_vma *ev;
1931 int err = 0;
1932
1933repeat:
1934 if (signal_pending(current)(((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" :
"=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_curproc)->p_siglist | (({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p
->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_curproc)->p_sigmask)
) {
1935 err = -ERESTARTSYS4;
1936 goto out;
1937 }
1938
1939 /* We may process another execbuffer during the unlock... */
1940 eb_release_vmas(eb, false0);
1941 i915_gem_ww_ctx_fini(&eb->ww);
1942
1943 if (rq) {
1944 /* nonblocking is always false */
1945 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE(1UL << (0)),
1946 MAX_SCHEDULE_TIMEOUT(0x7fffffff)) < 0) {
1947 i915_request_put(rq);
1948 rq = NULL((void *)0);
1949
1950 err = -EINTR4;
1951 goto err_relock;
1952 }
1953
1954 i915_request_put(rq);
1955 rq = NULL((void *)0);
1956 }
1957
1958 /*
1959 * We take 3 passes through the slowpatch.
1960 *
1961 * 1 - we try to just prefault all the user relocation entries and
1962 * then attempt to reuse the atomic pagefault disabled fast path again.
1963 *
1964 * 2 - we copy the user entries to a local buffer here outside of the
1965 * local and allow ourselves to wait upon any rendering before
1966 * relocations
1967 *
1968 * 3 - we already have a local copy of the relocation entries, but
1969 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1970 */
1971 if (!err) {
1972 err = eb_prefault_relocations(eb);
1973 } else if (!have_copy) {
1974 err = eb_copy_relocations(eb);
1975 have_copy = err == 0;
1976 } else {
1977 cond_resched()do { if (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0"
: "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_schedstate.spc_schedflags & 0x0002) yield
(); } while (0)
;
1978 err = 0;
1979 }
1980
1981 if (!err)
1982 flush_workqueue(eb->i915->mm.userptr_wq);
1983
1984err_relock:
1985 i915_gem_ww_ctx_init(&eb->ww, true1);
1986 if (err)
1987 goto out;
1988
1989 /* reacquire the objects */
1990repeat_validate:
1991 rq = eb_pin_engine(eb, false0);
1992 if (IS_ERR(rq)) {
1993 err = PTR_ERR(rq);
1994 rq = NULL((void *)0);
1995 goto err;
1996 }
1997
1998 /* We didn't throttle, should be NULL */
1999 GEM_WARN_ON(rq)({ __builtin_expect(!!(!!(rq)), 0); });
2000
2001 err = eb_validate_vmas(eb);
2002 if (err)
2003 goto err;
2004
2005 GEM_BUG_ON(!eb->batch)((void)0);
2006
2007 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
2008 if (!have_copy) {
2009 pagefault_disable();
2010 err = eb_relocate_vma(eb, ev);
2011 pagefault_enable();
2012 if (err)
2013 break;
2014 } else {
2015 err = eb_relocate_vma_slow(eb, ev);
2016 if (err)
2017 break;
2018 }
2019 }
2020
2021 if (err == -EDEADLK11)
2022 goto err;
2023
2024 if (err && !have_copy)
2025 goto repeat;
2026
2027 if (err)
2028 goto err;
2029
2030 /* as last step, parse the command buffer */
2031 err = eb_parse(eb);
2032 if (err)
2033 goto err;
2034
2035 /*
2036 * Leave the user relocations as are, this is the painfully slow path,
2037 * and we want to avoid the complication of dropping the lock whilst
2038 * having buffers reserved in the aperture and so causing spurious
2039 * ENOSPC for random operations.
2040 */
2041
2042err:
2043 if (err == -EDEADLK11) {
2044 eb_release_vmas(eb, false0);
2045 err = i915_gem_ww_ctx_backoff(&eb->ww);
2046 if (!err)
2047 goto repeat_validate;
2048 }
2049
2050 if (err == -EAGAIN35)
2051 goto repeat;
2052
2053out:
2054 if (have_copy) {
2055 const unsigned int count = eb->buffer_count;
2056 unsigned int i;
2057
2058 for (i = 0; i < count; i++) {
2059 const struct drm_i915_gem_exec_object2 *entry =
2060 &eb->exec[i];
2061 struct drm_i915_gem_relocation_entry *relocs;
2062
2063 if (!entry->relocation_count)
2064 continue;
2065
2066 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
2067 kvfree(relocs);
2068 }
2069 }
2070
2071 if (rq)
2072 i915_request_put(rq);
2073
2074 return err;
2075}
2076
2077static int eb_relocate_parse(struct i915_execbuffer *eb)
2078{
2079 int err;
2080 struct i915_request *rq = NULL((void *)0);
2081 bool_Bool throttle = true1;
2082
2083retry:
2084 rq = eb_pin_engine(eb, throttle);
2085 if (IS_ERR(rq)) {
2086 err = PTR_ERR(rq);
2087 rq = NULL((void *)0);
2088 if (err != -EDEADLK11)
2089 return err;
2090
2091 goto err;
2092 }
2093
2094 if (rq) {
2095#ifdef __linux__
2096 bool_Bool nonblock = eb->file->filp->f_flags & O_NONBLOCK0x0004;
2097#else
2098 bool_Bool nonblock = eb->file->filp->f_flag & FNONBLOCK0x0004;
2099#endif
2100
2101 /* Need to drop all locks now for throttling, take slowpath */
2102 err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE(1UL << (0)), 0);
2103 if (err == -ETIME60) {
2104 if (nonblock) {
2105 err = -EWOULDBLOCK35;
2106 i915_request_put(rq);
2107 goto err;
2108 }
2109 goto slow;
2110 }
2111 i915_request_put(rq);
2112 rq = NULL((void *)0);
2113 }
2114
2115 /* only throttle once, even if we didn't need to throttle */
2116 throttle = false0;
2117
2118 err = eb_validate_vmas(eb);
2119 if (err == -EAGAIN35)
2120 goto slow;
2121 else if (err)
2122 goto err;
2123
2124 /* The objects are in their final locations, apply the relocations. */
2125 if (eb->args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
2126 struct eb_vma *ev;
2127
2128 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
2129 err = eb_relocate_vma(eb, ev);
2130 if (err)
2131 break;
2132 }
2133
2134 if (err == -EDEADLK11)
2135 goto err;
2136 else if (err)
2137 goto slow;
2138 }
2139
2140 if (!err)
2141 err = eb_parse(eb);
2142
2143err:
2144 if (err == -EDEADLK11) {
2145 eb_release_vmas(eb, false0);
2146 err = i915_gem_ww_ctx_backoff(&eb->ww);
2147 if (!err)
2148 goto retry;
2149 }
2150
2151 return err;
2152
2153slow:
2154 err = eb_relocate_parse_slow(eb, rq);
2155 if (err)
2156 /*
2157 * If the user expects the execobject.offset and
2158 * reloc.presumed_offset to be an exact match,
2159 * as for using NO_RELOC, then we cannot update
2160 * the execobject.offset until we have completed
2161 * relocation.
2162 */
2163 eb->args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
2164
2165 return err;
2166}
2167
2168static int eb_move_to_gpu(struct i915_execbuffer *eb)
2169{
2170 const unsigned int count = eb->buffer_count;
2171 unsigned int i = count;
2172 int err = 0;
2173
2174 while (i--) {
2175 struct eb_vma *ev = &eb->vma[i];
2176 struct i915_vma *vma = ev->vma;
2177 unsigned int flags = ev->flags;
2178 struct drm_i915_gem_object *obj = vma->obj;
2179
2180 assert_vma_held(vma)do { (void)(&((vma)->resv)->lock.base); } while(0);
2181
2182 if (flags & EXEC_OBJECT_CAPTURE(1<<7)) {
2183 struct i915_capture_list *capture;
2184
2185 capture = kmalloc(sizeof(*capture), GFP_KERNEL(0x0001 | 0x0004));
2186 if (capture) {
2187 capture->next = eb->request->capture_list;
2188 capture->vma = vma;
2189 eb->request->capture_list = capture;
2190 }
2191 }
2192
2193 /*
2194 * If the GPU is not _reading_ through the CPU cache, we need
2195 * to make sure that any writes (both previous GPU writes from
2196 * before a change in snooping levels and normal CPU writes)
2197 * caught in that cache are flushed to main memory.
2198 *
2199 * We want to say
2200 * obj->cache_dirty &&
2201 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
2202 * but gcc's optimiser doesn't handle that as well and emits
2203 * two jumps instead of one. Maybe one day...
2204 */
2205 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)__builtin_expect(!!(obj->cache_dirty & ~obj->cache_coherent
), 0)
) {
2206 if (i915_gem_clflush_object(obj, 0))
2207 flags &= ~EXEC_OBJECT_ASYNC(1<<6);
2208 }
2209
2210 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC(1<<6))) {
2211 err = i915_request_await_object
2212 (eb->request, obj, flags & EXEC_OBJECT_WRITE(1<<2));
2213 }
2214
2215 if (err == 0)
2216 err = i915_vma_move_to_active(vma, eb->request, flags);
2217 }
2218
2219 if (unlikely(err)__builtin_expect(!!(err), 0))
2220 goto err_skip;
2221
2222 /* Unconditionally flush any chipset caches (for streaming writes). */
2223 intel_gt_chipset_flush(eb->engine->gt);
2224 return 0;
2225
2226err_skip:
2227 i915_request_set_error_once(eb->request, err);
2228 return err;
2229}
2230
2231static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
2232{
2233 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
)
2234 return -EINVAL22;
2235
2236 /* Kernel clipping was a DRI1 misfeature */
2237 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY(1<<19) |
2238 I915_EXEC_USE_EXTENSIONS(1 << 21)))) {
2239 if (exec->num_cliprects || exec->cliprects_ptr)
2240 return -EINVAL22;
2241 }
2242
2243 if (exec->DR4 == 0xffffffff) {
2244 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n")__drm_dbg(DRM_UT_CORE, "UXA submitting garbage DR4, fixing up\n"
)
;
2245 exec->DR4 = 0;
2246 }
2247 if (exec->DR1 || exec->DR4)
2248 return -EINVAL22;
2249
2250 if ((exec->batch_start_offset | exec->batch_len) & 0x7)
2251 return -EINVAL22;
2252
2253 return 0;
2254}
2255
2256static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
2257{
2258 u32 *cs;
2259 int i;
2260
2261 if (!IS_GEN(rq->engine->i915, 7)(0 + (&(rq->engine->i915)->__info)->gen == (7
))
|| rq->engine->id != RCS0) {
2262 drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n")drm_dev_dbg((&rq->engine->i915->drm)->dev, DRM_UT_DRIVER
, "sol reset is gen7/rcs only\n")
;
2263 return -EINVAL22;
2264 }
2265
2266 cs = intel_ring_begin(rq, 4 * 2 + 2);
2267 if (IS_ERR(cs))
2268 return PTR_ERR(cs);
2269
2270 *cs++ = MI_LOAD_REGISTER_IMM(4)(((0x22) << 23) | (2*(4)-1));
2271 for (i = 0; i < 4; i++) {
2272 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)((const i915_reg_t){ .reg = (0x5280 + (i) * 4) }));
2273 *cs++ = 0;
2274 }
2275 *cs++ = MI_NOOP(((0) << 23) | (0));
2276 intel_ring_advance(rq, cs);
2277
2278 return 0;
2279}
2280
2281static struct i915_vma *
2282shadow_batch_pin(struct i915_execbuffer *eb,
2283 struct drm_i915_gem_object *obj,
2284 struct i915_address_space *vm,
2285 unsigned int flags)
2286{
2287 struct i915_vma *vma;
2288 int err;
2289
2290 vma = i915_vma_instance(obj, vm, NULL((void *)0));
2291 if (IS_ERR(vma))
2292 return vma;
2293
2294 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
2295 if (err)
2296 return ERR_PTR(err);
2297
2298 return vma;
2299}
2300
2301static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
2302{
2303 /*
2304 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2305 * batch" bit. Hence we need to pin secure batches into the global gtt.
2306 * hsw should have this fixed, but bdw mucks it up again. */
2307 if (eb->batch_flags & I915_DISPATCH_SECURE(1UL << (0)))
2308 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL((void *)0), 0, 0, 0);
2309
2310 return NULL((void *)0);
2311}
2312
2313static int eb_parse(struct i915_execbuffer *eb)
2314{
2315 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2316 struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
2317 struct i915_vma *shadow, *trampoline, *batch;
2318 unsigned long len;
2319 int err;
2320
2321 if (!eb_use_cmdparser(eb)) {
2322 batch = eb_dispatch_secure(eb, eb->batch->vma);
2323 if (IS_ERR(batch))
2324 return PTR_ERR(batch);
2325
2326 goto secure_batch;
2327 }
2328
2329 len = eb->batch_len;
2330 if (!CMDPARSER_USES_GGTT(eb->i915)(0 + (&(eb->i915)->__info)->gen == (7))) {
2331 /*
2332 * ppGTT backed shadow buffers must be mapped RO, to prevent
2333 * post-scan tampering
2334 */
2335 if (!eb->context->vm->has_read_only) {
2336 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
2337 "Cannot prevent post-scan tampering without RO capable vm\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
;
2338 return -EINVAL22;
2339 }
2340 } else {
2341 len += I915_CMD_PARSER_TRAMPOLINE_SIZE8;
2342 }
2343 if (unlikely(len < eb->batch_len)__builtin_expect(!!(len < eb->batch_len), 0)) /* last paranoid check of overflow */
2344 return -EINVAL22;
2345
2346 if (!pool) {
2347 pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
2348 if (IS_ERR(pool))
2349 return PTR_ERR(pool);
2350 eb->batch_pool = pool;
2351 }
2352
2353 err = i915_gem_object_lock(pool->obj, &eb->ww);
2354 if (err)
2355 goto err;
2356
2357 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER(1ULL << (11)));
2358 if (IS_ERR(shadow)) {
2359 err = PTR_ERR(shadow);
2360 goto err;
2361 }
2362 i915_gem_object_set_readonly(shadow->obj);
2363 shadow->private = pool;
2364
2365 trampoline = NULL((void *)0);
2366 if (CMDPARSER_USES_GGTT(eb->i915)(0 + (&(eb->i915)->__info)->gen == (7))) {
2367 trampoline = shadow;
2368
2369 shadow = shadow_batch_pin(eb, pool->obj,
2370 &eb->engine->gt->ggtt->vm,
2371 PIN_GLOBAL(1ULL << (10)));
2372 if (IS_ERR(shadow)) {
2373 err = PTR_ERR(shadow);
2374 shadow = trampoline;
2375 goto err_shadow;
2376 }
2377 shadow->private = pool;
2378
2379 eb->batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
2380 }
2381
2382 batch = eb_dispatch_secure(eb, shadow);
2383 if (IS_ERR(batch)) {
2384 err = PTR_ERR(batch);
2385 goto err_trampoline;
2386 }
2387
2388 err = intel_engine_cmd_parser(eb->engine,
2389 eb->batch->vma,
2390 eb->batch_start_offset,
2391 eb->batch_len,
2392 shadow, trampoline);
2393 if (err)
2394 goto err_unpin_batch;
2395
2396 eb->batch = &eb->vma[eb->buffer_count++];
2397 eb->batch->vma = i915_vma_get(shadow);
2398 eb->batch->flags = __EXEC_OBJECT_HAS_PIN(1UL << (31));
2399
2400 eb->trampoline = trampoline;
2401 eb->batch_start_offset = 0;
2402
2403secure_batch:
2404 if (batch) {
2405 eb->batch = &eb->vma[eb->buffer_count++];
2406 eb->batch->flags = __EXEC_OBJECT_HAS_PIN(1UL << (31));
2407 eb->batch->vma = i915_vma_get(batch);
2408 }
2409 return 0;
2410
2411err_unpin_batch:
2412 if (batch)
2413 i915_vma_unpin(batch);
2414err_trampoline:
2415 if (trampoline)
2416 i915_vma_unpin(trampoline);
2417err_shadow:
2418 i915_vma_unpin(shadow);
2419err:
2420 return err;
2421}
2422
2423static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
2424{
2425 int err;
2426
2427 err = eb_move_to_gpu(eb);
2428 if (err)
2429 return err;
2430
2431 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET(1<<8)) {
2432 err = i915_reset_gen7_sol_offsets(eb->request);
2433 if (err)
2434 return err;
2435 }
2436
2437 /*
2438 * After we completed waiting for other engines (using HW semaphores)
2439 * then we can signal that this request/batch is ready to run. This
2440 * allows us to determine if the batch is still waiting on the GPU
2441 * or actually running by checking the breadcrumb.
2442 */
2443 if (eb->engine->emit_init_breadcrumb) {
2444 err = eb->engine->emit_init_breadcrumb(eb->request);
2445 if (err)
2446 return err;
2447 }
2448
2449 err = eb->engine->emit_bb_start(eb->request,
2450 batch->node.start +
2451 eb->batch_start_offset,
2452 eb->batch_len,
2453 eb->batch_flags);
2454 if (err)
2455 return err;
2456
2457 if (eb->trampoline) {
2458 GEM_BUG_ON(eb->batch_start_offset)((void)0);
2459 err = eb->engine->emit_bb_start(eb->request,
2460 eb->trampoline->node.start +
2461 eb->batch_len,
2462 0, 0);
2463 if (err)
2464 return err;
2465 }
2466
2467 if (intel_context_nopreempt(eb->context))
2468 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
2469
2470 return 0;
2471}
2472
2473static int num_vcs_engines(const struct drm_i915_privateinteldrm_softc *i915)
2474{
2475 return hweight64(VDBOX_MASK(&i915->gt)({ unsigned int first__ = (VCS0); unsigned int count__ = (4);
((&i915->gt)->info.engine_mask & (((~0UL) >>
(64 - (first__ + count__ - 1) - 1)) & ((~0UL) << (
first__)))) >> first__; })
);
2476}
2477
2478/*
2479 * Find one BSD ring to dispatch the corresponding BSD command.
2480 * The engine index is returned.
2481 */
2482static unsigned int
2483gen8_dispatch_bsd_engine(struct drm_i915_privateinteldrm_softc *dev_priv,
2484 struct drm_file *file)
2485{
2486 struct drm_i915_file_private *file_priv = file->driver_priv;
2487
2488 /* Check whether the file_priv has already selected one ring. */
2489 if ((int)file_priv->bsd_engine < 0)
2490 file_priv->bsd_engine =
2491 get_random_int()arc4random() % num_vcs_engines(dev_priv);
2492
2493 return file_priv->bsd_engine;
2494}
2495
2496static const enum intel_engine_id user_ring_map[] = {
2497 [I915_EXEC_DEFAULT(0<<0)] = RCS0,
2498 [I915_EXEC_RENDER(1<<0)] = RCS0,
2499 [I915_EXEC_BLT(3<<0)] = BCS0,
2500 [I915_EXEC_BSD(2<<0)] = VCS0,
2501 [I915_EXEC_VEBOX(4<<0)] = VECS0
2502};
2503
2504static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
2505{
2506 struct intel_ring *ring = ce->ring;
2507 struct intel_timeline *tl = ce->timeline;
2508 struct i915_request *rq;
2509
2510 /*
2511 * Completely unscientific finger-in-the-air estimates for suitable
2512 * maximum user request size (to avoid blocking) and then backoff.
2513 */
2514 if (intel_ring_update_space(ring) >= PAGE_SIZE(1 << 12))
2515 return NULL((void *)0);
2516
2517 /*
2518 * Find a request that after waiting upon, there will be at least half
2519 * the ring available. The hysteresis allows us to compete for the
2520 * shared ring and should mean that we sleep less often prior to
2521 * claiming our resources, but not so long that the ring completely
2522 * drains before we can submit our next request.
2523 */
2524 list_for_each_entry(rq, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}); &rq->link != (&tl->requests); rq = ({ const
__typeof( ((__typeof(*rq) *)0)->link ) *__mptr = (rq->
link.next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), link) );}))
{
2525 if (rq->ring != ring)
2526 continue;
2527
2528 if (__intel_ring_space(rq->postfix,
2529 ring->emit, ring->size) > ring->size / 2)
2530 break;
2531 }
2532 if (&rq->link == &tl->requests)
2533 return NULL((void *)0); /* weird, we will check again later for real */
2534
2535 return i915_request_get(rq);
2536}
2537
2538static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool_Bool throttle)
2539{
2540 struct intel_context *ce = eb->context;
2541 struct intel_timeline *tl;
2542 struct i915_request *rq = NULL((void *)0);
2543 int err;
2544
2545 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED)((void)0);
2546
2547 if (unlikely(intel_context_is_banned(ce))__builtin_expect(!!(intel_context_is_banned(ce)), 0))
2548 return ERR_PTR(-EIO5);
2549
2550 /*
2551 * Pinning the contexts may generate requests in order to acquire
2552 * GGTT space, so do this first before we reserve a seqno for
2553 * ourselves.
2554 */
2555 err = intel_context_pin_ww(ce, &eb->ww);
2556 if (err)
2557 return ERR_PTR(err);
2558
2559 /*
2560 * Take a local wakeref for preparing to dispatch the execbuf as
2561 * we expect to access the hardware fairly frequently in the
2562 * process, and require the engine to be kept awake between accesses.
2563 * Upon dispatch, we acquire another prolonged wakeref that we hold
2564 * until the timeline is idle, which in turn releases the wakeref
2565 * taken on the engine, and the parent device.
2566 */
2567 tl = intel_context_timeline_lock(ce);
2568 if (IS_ERR(tl)) {
2569 intel_context_unpin(ce);
2570 return ERR_CAST(tl);
2571 }
2572
2573 intel_context_enter(ce);
2574 if (throttle)
2575 rq = eb_throttle(eb, ce);
2576 intel_context_timeline_unlock(tl);
2577
2578 eb->args->flags |= __EXEC_ENGINE_PINNED(1UL << (30));
2579 return rq;
2580}
2581
2582static void eb_unpin_engine(struct i915_execbuffer *eb)
2583{
2584 struct intel_context *ce = eb->context;
2585 struct intel_timeline *tl = ce->timeline;
2586
2587 if (!(eb->args->flags & __EXEC_ENGINE_PINNED(1UL << (30))))
2588 return;
2589
2590 eb->args->flags &= ~__EXEC_ENGINE_PINNED(1UL << (30));
2591
2592 mutex_lock(&tl->mutex)rw_enter_write(&tl->mutex);
2593 intel_context_exit(ce);
2594 mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
2595
2596 intel_context_unpin(ce);
2597}
2598
2599static unsigned int
2600eb_select_legacy_ring(struct i915_execbuffer *eb)
2601{
2602 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2603 struct drm_i915_gem_execbuffer2 *args = eb->args;
2604 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK(0x3f);
2605
2606 if (user_ring_id != I915_EXEC_BSD(2<<0) &&
2607 (args->flags & I915_EXEC_BSD_MASK(3 << (13)))) {
2608 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2609 "execbuf with non bsd ring but with invalid "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2610 "bsd dispatch flags: %d\n", (int)(args->flags))drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
;
2611 return -1;
2612 }
2613
2614 if (user_ring_id == I915_EXEC_BSD(2<<0) && num_vcs_engines(i915) > 1) {
2615 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK(3 << (13));
2616
2617 if (bsd_idx == I915_EXEC_BSD_DEFAULT(0 << (13))) {
2618 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
2619 } else if (bsd_idx >= I915_EXEC_BSD_RING1(1 << (13)) &&
2620 bsd_idx <= I915_EXEC_BSD_RING2(2 << (13))) {
2621 bsd_idx >>= I915_EXEC_BSD_SHIFT(13);
2622 bsd_idx--;
2623 } else {
2624 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2625 "execbuf with unknown bsd ring: %u\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2626 bsd_idx)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
;
2627 return -1;
2628 }
2629
2630 return _VCS(bsd_idx)(VCS0 + (bsd_idx));
2631 }
2632
2633 if (user_ring_id >= ARRAY_SIZE(user_ring_map)(sizeof((user_ring_map)) / sizeof((user_ring_map)[0]))) {
2634 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
2635 user_ring_id)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
;
2636 return -1;
2637 }
2638
2639 return user_ring_map[user_ring_id];
2640}
2641
2642static int
2643eb_select_engine(struct i915_execbuffer *eb)
2644{
2645 struct intel_context *ce;
2646 unsigned int idx;
2647 int err;
2648
2649 if (i915_gem_context_user_engines(eb->gem_context))
2650 idx = eb->args->flags & I915_EXEC_RING_MASK(0x3f);
2651 else
2652 idx = eb_select_legacy_ring(eb);
2653
2654 ce = i915_gem_context_get_engine(eb->gem_context, idx);
2655 if (IS_ERR(ce))
2656 return PTR_ERR(ce);
2657
2658 intel_gt_pm_get(ce->engine->gt);
2659
2660 if (!test_bit(CONTEXT_ALLOC_BIT1, &ce->flags)) {
2661 err = intel_context_alloc_state(ce);
2662 if (err)
2663 goto err;
2664 }
2665
2666 /*
2667 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2668 * EIO if the GPU is already wedged.
2669 */
2670 err = intel_gt_terminally_wedged(ce->engine->gt);
2671 if (err)
2672 goto err;
2673
2674 eb->context = ce;
2675 eb->engine = ce->engine;
2676
2677 /*
2678 * Make sure engine pool stays alive even if we call intel_context_put
2679 * during ww handling. The pool is destroyed when last pm reference
2680 * is dropped, which breaks our -EDEADLK handling.
2681 */
2682 return err;
2683
2684err:
2685 intel_gt_pm_put(ce->engine->gt);
2686 intel_context_put(ce);
2687 return err;
2688}
2689
2690static void
2691eb_put_engine(struct i915_execbuffer *eb)
2692{
2693 intel_gt_pm_put(eb->engine->gt);
2694 intel_context_put(eb->context);
2695}
2696
2697static void
2698__free_fence_array(struct eb_fence *fences, unsigned int n)
2699{
2700 while (n--) {
2701 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)({ unsigned long __v = (unsigned long)(fences[n].syncobj); (typeof
(fences[n].syncobj))(__v & -(1UL << (2))); })
);
2702 dma_fence_put(fences[n].dma_fence);
2703 kfree(fences[n].chain_fence);
2704 }
2705 kvfree(fences);
2706}
2707
2708static int
2709add_timeline_fence_array(struct i915_execbuffer *eb,
2710 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
2711{
2712 struct drm_i915_gem_exec_fence __user *user_fences;
2713 u64 __user *user_values;
2714 struct eb_fence *f;
2715 u64 nfences;
2716 int err = 0;
2717
2718 nfences = timeline_fences->fence_count;
2719 if (!nfences)
2720 return 0;
2721
2722 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2723 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2724 if (nfences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2725 ULONG_MAX / sizeof(*user_fences),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2726 SIZE_MAX / sizeof(*f))({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
- eb->num_fences)
2727 return -EINVAL22;
2728
2729 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr)((void *)(uintptr_t)(timeline_fences->handles_ptr));
2730 if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
2731 return -EFAULT14;
2732
2733 user_values = u64_to_user_ptr(timeline_fences->values_ptr)((void *)(uintptr_t)(timeline_fences->values_ptr));
2734 if (!access_ok(user_values, nfences * sizeof(*user_values)))
2735 return -EFAULT14;
2736
2737#ifdef __linux__
2738 f = krealloc(eb->fences,
2739 (eb->num_fences + nfences) * sizeof(*f),
2740 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2741 if (!f)
2742 return -ENOMEM12;
2743#else
2744 f = kmalloc((eb->num_fences + nfences) * sizeof(*f),
2745 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2746 if (!f)
2747 return -ENOMEM12;
2748 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2749 kfree(eb->fences);
2750#endif
2751
2752 eb->fences = f;
2753 f += eb->num_fences;
2754
2755#ifdef notyet
2756 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
2757 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
;
2758#endif
2759
2760 while (nfences--) {
2761 struct drm_i915_gem_exec_fence user_fence;
2762 struct drm_syncobj *syncobj;
2763 struct dma_fence *fence = NULL((void *)0);
2764 u64 point;
2765
2766 if (__copy_from_user(&user_fence,
2767 user_fences++,
2768 sizeof(user_fence)))
2769 return -EFAULT14;
2770
2771 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
2772 return -EINVAL22;
2773
2774 if (__get_user(point, user_values++)-copyin((user_values++), &((point)), sizeof((point))))
2775 return -EFAULT14;
2776
2777 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2778 if (!syncobj) {
2779 DRM_DEBUG("Invalid syncobj handle provided\n")__drm_dbg(DRM_UT_CORE, "Invalid syncobj handle provided\n");
2780 return -ENOENT2;
2781 }
2782
2783 fence = drm_syncobj_fence_get(syncobj);
2784
2785 if (!fence && user_fence.flags &&
2786 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2787 DRM_DEBUG("Syncobj handle has no fence\n")__drm_dbg(DRM_UT_CORE, "Syncobj handle has no fence\n");
2788 drm_syncobj_put(syncobj);
2789 return -EINVAL22;
2790 }
2791
2792 if (fence)
2793 err = dma_fence_chain_find_seqno(&fence, point);
2794
2795 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2796 DRM_DEBUG("Syncobj handle missing requested point %llu\n", point)__drm_dbg(DRM_UT_CORE, "Syncobj handle missing requested point %llu\n"
, point)
;
2797 dma_fence_put(fence);
2798 drm_syncobj_put(syncobj);
2799 return err;
2800 }
2801
2802 /*
2803 * A point might have been signaled already and
2804 * garbage collected from the timeline. In this case
2805 * just ignore the point and carry on.
2806 */
2807 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2808 drm_syncobj_put(syncobj);
2809 continue;
2810 }
2811
2812 /*
2813 * For timeline syncobjs we need to preallocate chains for
2814 * later signaling.
2815 */
2816 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1)) {
2817 /*
2818 * Waiting and signaling the same point (when point !=
2819 * 0) would break the timeline.
2820 */
2821 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
2822 DRM_DEBUG("Trying to wait & signal the same timeline point.\n")__drm_dbg(DRM_UT_CORE, "Trying to wait & signal the same timeline point.\n"
)
;
2823 dma_fence_put(fence);
2824 drm_syncobj_put(syncobj);
2825 return -EINVAL22;
2826 }
2827
2828 f->chain_fence =
2829 kmalloc(sizeof(*f->chain_fence),
2830 GFP_KERNEL(0x0001 | 0x0004));
2831 if (!f->chain_fence) {
2832 drm_syncobj_put(syncobj);
2833 dma_fence_put(fence);
2834 return -ENOMEM12;
2835 }
2836 } else {
2837 f->chain_fence = NULL((void *)0);
2838 }
2839
2840 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
2841 f->dma_fence = fence;
2842 f->value = point;
2843 f++;
2844 eb->num_fences++;
2845 }
2846
2847 return 0;
2848}
2849
2850static int add_fence_array(struct i915_execbuffer *eb)
2851{
2852 struct drm_i915_gem_execbuffer2 *args = eb->args;
2853 struct drm_i915_gem_exec_fence __user *user;
2854 unsigned long num_fences = args->num_cliprects;
2855 struct eb_fence *f;
2856
2857 if (!(args->flags & I915_EXEC_FENCE_ARRAY(1<<19)))
2858 return 0;
2859
2860 if (!num_fences)
2861 return 0;
2862
2863 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2864 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2865 if (num_fences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2866 ULONG_MAX / sizeof(*user),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2867 SIZE_MAX / sizeof(*f) - eb->num_fences)({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
)
2868 return -EINVAL22;
2869
2870 user = u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr));
2871 if (!access_ok(user, num_fences * sizeof(*user)))
2872 return -EFAULT14;
2873
2874#ifdef __linux__
2875 f = krealloc(eb->fences,
2876 (eb->num_fences + num_fences) * sizeof(*f),
2877 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2878 if (!f)
2879 return -ENOMEM12;
2880#else
2881 f = kmalloc((eb->num_fences + num_fences) * sizeof(*f),
2882 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2883 if (!f)
2884 return -ENOMEM12;
2885 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2886 kfree(eb->fences);
2887#endif
2888
2889 eb->fences = f;
2890 f += eb->num_fences;
2891 while (num_fences--) {
2892 struct drm_i915_gem_exec_fence user_fence;
2893 struct drm_syncobj *syncobj;
2894 struct dma_fence *fence = NULL((void *)0);
2895
2896 if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
2897 return -EFAULT14;
2898
2899 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
2900 return -EINVAL22;
2901
2902 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2903 if (!syncobj) {
2904 DRM_DEBUG("Invalid syncobj handle provided\n")__drm_dbg(DRM_UT_CORE, "Invalid syncobj handle provided\n");
2905 return -ENOENT2;
2906 }
2907
2908 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
2909 fence = drm_syncobj_fence_get(syncobj);
2910 if (!fence) {
2911 DRM_DEBUG("Syncobj handle has no fence\n")__drm_dbg(DRM_UT_CORE, "Syncobj handle has no fence\n");
2912 drm_syncobj_put(syncobj);
2913 return -EINVAL22;
2914 }
2915 }
2916
2917#ifdef notyet
2918 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
2919 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
;
2920#endif
2921
2922 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
2923 f->dma_fence = fence;
2924 f->value = 0;
2925 f->chain_fence = NULL((void *)0);
2926 f++;
2927 eb->num_fences++;
2928 }
2929
2930 return 0;
2931}
2932
2933static void put_fence_array(struct eb_fence *fences, int num_fences)
2934{
2935 if (fences)
2936 __free_fence_array(fences, num_fences);
2937}
2938
2939static int
2940await_fence_array(struct i915_execbuffer *eb)
2941{
2942 unsigned int n;
2943 int err;
2944
2945 for (n = 0; n < eb->num_fences; n++) {
2946 struct drm_syncobj *syncobj;
2947 unsigned int flags;
2948
2949 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
2950
2951 if (!eb->fences[n].dma_fence)
2952 continue;
2953
2954 err = i915_request_await_dma_fence(eb->request,
2955 eb->fences[n].dma_fence);
2956 if (err < 0)
2957 return err;
2958 }
2959
2960 return 0;
2961}
2962
2963static void signal_fence_array(const struct i915_execbuffer *eb)
2964{
2965 struct dma_fence * const fence = &eb->request->fence;
2966 unsigned int n;
2967
2968 for (n = 0; n < eb->num_fences; n++) {
2969 struct drm_syncobj *syncobj;
2970 unsigned int flags;
2971
2972 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
2973 if (!(flags & I915_EXEC_FENCE_SIGNAL(1<<1)))
2974 continue;
2975
2976 if (eb->fences[n].chain_fence) {
2977 drm_syncobj_add_point(syncobj,
2978 eb->fences[n].chain_fence,
2979 fence,
2980 eb->fences[n].value);
2981 /*
2982 * The chain's ownership is transferred to the
2983 * timeline.
2984 */
2985 eb->fences[n].chain_fence = NULL((void *)0);
2986 } else {
2987 drm_syncobj_replace_fence(syncobj, fence);
2988 }
2989 }
2990}
2991
2992static int
2993parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
2994{
2995 struct i915_execbuffer *eb = data;
2996 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
2997
2998 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
2999 return -EFAULT14;
3000
3001 return add_timeline_fence_array(eb, &timeline_fences);
3002}
3003
3004static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
3005{
3006 struct i915_request *rq, *rn;
3007
3008 list_for_each_entry_safe(rq, rn, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = (rq->link.next); (__typeof(*rq) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*rq), link) );}); &rq->
link != (&tl->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->link ) *__mptr = (rn->link.next
); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rn), link) );}))
3009 if (rq == end || !i915_request_retire(rq))
3010 break;
3011}
3012
3013static int eb_request_add(struct i915_execbuffer *eb, int err)
3014{
3015 struct i915_request *rq = eb->request;
3016 struct intel_timeline * const tl = i915_request_timeline(rq);
3017 struct i915_sched_attr attr = {};
3018 struct i915_request *prev;
3019
3020 lockdep_assert_held(&tl->mutex)do { (void)(&tl->mutex); } while(0);
3021 lockdep_unpin_lock(&tl->mutex, rq->cookie);
3022
3023 trace_i915_request_add(rq);
3024
3025 prev = __i915_request_commit(rq);
3026
3027 /* Check that the context wasn't destroyed before submission */
3028 if (likely(!intel_context_is_closed(eb->context))__builtin_expect(!!(!intel_context_is_closed(eb->context))
, 1)
) {
3029 attr = eb->gem_context->sched;
3030 } else {
3031 /* Serialise with context_close via the add_to_timeline */
3032 i915_request_set_error_once(rq, -ENOENT2);
3033 __i915_request_skip(rq);
3034 err = -ENOENT2; /* override any transient errors */
3035 }
3036
3037 __i915_request_queue(rq, &attr);
3038
3039 /* Try to clean up the client's timeline after submitting the request */
3040 if (prev)
3041 retire_requests(tl, prev);
3042
3043 mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
3044
3045 return err;
3046}
3047
3048static const i915_user_extension_fn execbuf_extensions[] = {
3049 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES0] = parse_timeline_fences,
3050};
3051
3052static int
3053parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
3054 struct i915_execbuffer *eb)
3055{
3056 if (!(args->flags & I915_EXEC_USE_EXTENSIONS(1 << 21)))
17
Taking true branch
3057 return 0;
18
Returning without writing to 'eb->buckets', which participates in a condition later
3058
3059 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
3060 * have another flag also using it at the same time.
3061 */
3062 if (eb->args->flags & I915_EXEC_FENCE_ARRAY(1<<19))
3063 return -EINVAL22;
3064
3065 if (args->num_cliprects != 0)
3066 return -EINVAL22;
3067
3068 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr)),
3069 execbuf_extensions,
3070 ARRAY_SIZE(execbuf_extensions)(sizeof((execbuf_extensions)) / sizeof((execbuf_extensions)[0
]))
,
3071 eb);
3072}
3073
3074static int
3075i915_gem_do_execbuffer(struct drm_device *dev,
3076 struct drm_file *file,
3077 struct drm_i915_gem_execbuffer2 *args,
3078 struct drm_i915_gem_exec_object2 *exec)
3079{
3080 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3081 struct i915_execbuffer eb;
3082 struct dma_fence *in_fence = NULL((void *)0);
3083 struct sync_file *out_fence = NULL((void *)0);
3084 struct i915_vma *batch;
3085 int out_fence_fd = -1;
3086 int err;
3087
3088 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS)extern char _ctassert[(!((~0u << 30) & ~((-((1 <<
21) << 1)) | (3<<6) | (1<<15)))) ? 1 : -1 ]
__attribute__((__unused__))
;
3089 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &extern char _ctassert[(!((~0u << 28) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
3090 ~__EXEC_OBJECT_UNKNOWN_FLAGS)extern char _ctassert[(!((~0u << 28) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
;
3091
3092 eb.i915 = i915;
3093 eb.file = file;
3094 eb.args = args;
3095 if (DBG_FORCE_RELOC0 || !(args->flags & I915_EXEC_NO_RELOC(1<<11)))
13
Taking true branch
3096 args->flags |= __EXEC_HAS_RELOC(1UL << (31));
3097
3098 eb.exec = exec;
3099 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
3100 eb.vma[0].vma = NULL((void *)0);
3101 eb.reloc_pool = eb.batch_pool = NULL((void *)0);
3102 eb.reloc_context = NULL((void *)0);
3103
3104 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS-((1<<7)<<1);
3105 reloc_cache_init(&eb.reloc_cache, eb.i915);
3106
3107 eb.buffer_count = args->buffer_count;
3108 eb.batch_start_offset = args->batch_start_offset;
3109 eb.batch_len = args->batch_len;
3110 eb.trampoline = NULL((void *)0);
3111
3112 eb.fences = NULL((void *)0);
3113 eb.num_fences = 0;
3114
3115 eb.batch_flags = 0;
3116 if (args->flags & I915_EXEC_SECURE(1<<9)) {
14
Taking false branch
3117 if (INTEL_GEN(i915)((&(i915)->__info)->gen) >= 11)
3118 return -ENODEV19;
3119
3120 /* Return -EPERM to trigger fallback code on old binaries. */
3121 if (!HAS_SECURE_BATCHES(i915)(((&(i915)->__info)->gen) < 6))
3122 return -EPERM1;
3123
3124 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN0x1))
3125 return -EPERM1;
3126
3127 eb.batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
3128 }
3129 if (args->flags & I915_EXEC_IS_PINNED(1<<10))
15
Taking false branch
3130 eb.batch_flags |= I915_DISPATCH_PINNED(1UL << (1));
3131
3132 err = parse_execbuf2_extensions(args, &eb);
16
Calling 'parse_execbuf2_extensions'
19
Returning from 'parse_execbuf2_extensions'
3133 if (err
19.1
'err' is 0
)
20
Taking false branch
3134 goto err_ext;
3135
3136 err = add_fence_array(&eb);
3137 if (err)
21
Assuming 'err' is 0
22
Taking false branch
3138 goto err_ext;
3139
3140#define IN_FENCES (I915_EXEC_FENCE_IN(1<<16) | I915_EXEC_FENCE_SUBMIT(1 << 20))
3141 if (args->flags & IN_FENCES) {
23
Assuming the condition is false
24
Taking false branch
3142 if ((args->flags & IN_FENCES) == IN_FENCES)
3143 return -EINVAL22;
3144
3145 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)((u32)(args->rsvd2)));
3146 if (!in_fence) {
3147 err = -EINVAL22;
3148 goto err_ext;
3149 }
3150 }
3151#undef IN_FENCES
3152
3153 if (args->flags & I915_EXEC_FENCE_OUT(1<<17)) {
25
Assuming the condition is false
26
Taking false branch
3154 out_fence_fd = get_unused_fd_flags(O_CLOEXEC0x10000);
3155 if (out_fence_fd < 0) {
3156 err = out_fence_fd;
3157 goto err_in_fence;
3158 }
3159 }
3160
3161 err = eb_create(&eb);
27
Calling 'eb_create'
3162 if (err)
3163 goto err_out_fence;
3164
3165 GEM_BUG_ON(!eb.lut_size)((void)0);
3166
3167 err = eb_select_context(&eb);
3168 if (unlikely(err)__builtin_expect(!!(err), 0))
3169 goto err_destroy;
3170
3171 err = eb_select_engine(&eb);
3172 if (unlikely(err)__builtin_expect(!!(err), 0))
3173 goto err_context;
3174
3175 err = eb_lookup_vmas(&eb);
3176 if (err) {
3177 eb_release_vmas(&eb, true1);
3178 goto err_engine;
3179 }
3180
3181 i915_gem_ww_ctx_init(&eb.ww, true1);
3182
3183 err = eb_relocate_parse(&eb);
3184 if (err) {
3185 /*
3186 * If the user expects the execobject.offset and
3187 * reloc.presumed_offset to be an exact match,
3188 * as for using NO_RELOC, then we cannot update
3189 * the execobject.offset until we have completed
3190 * relocation.
3191 */
3192 args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
3193 goto err_vma;
3194 }
3195
3196 ww_acquire_done(&eb.ww.ctx);
3197
3198 batch = eb.batch->vma;
3199
3200 /* All GPU relocation batches must be submitted prior to the user rq */
3201 GEM_BUG_ON(eb.reloc_cache.rq)((void)0);
3202
3203 /* Allocate a request for this batch buffer nice and early. */
3204 eb.request = i915_request_create(eb.context);
3205 if (IS_ERR(eb.request)) {
3206 err = PTR_ERR(eb.request);
3207 goto err_vma;
3208 }
3209
3210 if (in_fence) {
3211 if (args->flags & I915_EXEC_FENCE_SUBMIT(1 << 20))
3212 err = i915_request_await_execution(eb.request,
3213 in_fence,
3214 eb.engine->bond_execute);
3215 else
3216 err = i915_request_await_dma_fence(eb.request,
3217 in_fence);
3218 if (err < 0)
3219 goto err_request;
3220 }
3221
3222 if (eb.fences) {
3223 err = await_fence_array(&eb);
3224 if (err)
3225 goto err_request;
3226 }
3227
3228 if (out_fence_fd != -1) {
3229 out_fence = sync_file_create(&eb.request->fence);
3230 if (!out_fence) {
3231 err = -ENOMEM12;
3232 goto err_request;
3233 }
3234 }
3235
3236 /*
3237 * Whilst this request exists, batch_obj will be on the
3238 * active_list, and so will hold the active reference. Only when this
3239 * request is retired will the the batch_obj be moved onto the
3240 * inactive_list and lose its active reference. Hence we do not need
3241 * to explicitly hold another reference here.
3242 */
3243 eb.request->batch = batch;
3244 if (eb.batch_pool)
3245 intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
3246
3247 trace_i915_request_queue(eb.request, eb.batch_flags);
3248 err = eb_submit(&eb, batch);
3249err_request:
3250 i915_request_get(eb.request);
3251 err = eb_request_add(&eb, err);
3252
3253 if (eb.fences)
3254 signal_fence_array(&eb);
3255
3256 if (out_fence) {
3257 if (err == 0) {
3258 fd_install(out_fence_fd, out_fence->file);
3259 args->rsvd2 &= GENMASK_ULL(31, 0)(((~0ULL) >> (64 - (31) - 1)) & ((~0ULL) << (
0)))
; /* keep in-fence */
3260 args->rsvd2 |= (u64)out_fence_fd << 32;
3261 out_fence_fd = -1;
3262 } else {
3263 fput(out_fence->file);
3264 }
3265 }
3266 i915_request_put(eb.request);
3267
3268err_vma:
3269 eb_release_vmas(&eb, true1);
3270 if (eb.trampoline)
3271 i915_vma_unpin(eb.trampoline);
3272 WARN_ON(err == -EDEADLK)({ int __ret = !!((err == -11)); if (__ret) printf("%s", "WARN_ON("
"err == -11" ")"); __builtin_expect(!!(__ret), 0); })
;
3273 i915_gem_ww_ctx_fini(&eb.ww);
3274
3275 if (eb.batch_pool)
3276 intel_gt_buffer_pool_put(eb.batch_pool);
3277 if (eb.reloc_pool)
3278 intel_gt_buffer_pool_put(eb.reloc_pool);
3279 if (eb.reloc_context)
3280 intel_context_put(eb.reloc_context);
3281err_engine:
3282 eb_put_engine(&eb);
3283err_context:
3284 i915_gem_context_put(eb.gem_context);
3285err_destroy:
3286 eb_destroy(&eb);
3287err_out_fence:
3288 if (out_fence_fd != -1)
3289 put_unused_fd(out_fence_fd);
3290err_in_fence:
3291 dma_fence_put(in_fence);
3292err_ext:
3293 put_fence_array(eb.fences, eb.num_fences);
3294 return err;
3295}
3296
3297static size_t eb_element_size(void)
3298{
3299 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
3300}
3301
3302static bool_Bool check_buffer_count(size_t count)
3303{
3304 const size_t sz = eb_element_size();
3305
3306 /*
3307 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
3308 * array size (see eb_create()). Otherwise, we can accept an array as
3309 * large as can be addressed (though use large arrays at your peril)!
3310 */
3311
3312 return !(count < 1 || count > INT_MAX0x7fffffff || count > SIZE_MAX0xffffffffffffffffUL / sz - 1);
3313}
3314
3315/*
3316 * Legacy execbuffer just creates an exec2 list from the original exec object
3317 * list array and passes it to the real function.
3318 */
3319int
3320i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
3321 struct drm_file *file)
3322{
3323 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3324 struct drm_i915_gem_execbuffer *args = data;
3325 struct drm_i915_gem_execbuffer2 exec2;
3326 struct drm_i915_gem_exec_object *exec_list = NULL((void *)0);
3327 struct drm_i915_gem_exec_object2 *exec2_list = NULL((void *)0);
3328 const size_t count = args->buffer_count;
3329 unsigned int i;
3330 int err;
3331
3332 if (!check_buffer_count(count)) {
1
Taking false branch
3333 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf2 with %zd buffers\n"
, count)
;
3334 return -EINVAL22;
3335 }
3336
3337 exec2.buffers_ptr = args->buffers_ptr;
3338 exec2.buffer_count = args->buffer_count;
3339 exec2.batch_start_offset = args->batch_start_offset;
3340 exec2.batch_len = args->batch_len;
3341 exec2.DR1 = args->DR1;
3342 exec2.DR4 = args->DR4;
3343 exec2.num_cliprects = args->num_cliprects;
3344 exec2.cliprects_ptr = args->cliprects_ptr;
3345 exec2.flags = I915_EXEC_RENDER(1<<0);
3346 i915_execbuffer2_set_context_id(exec2, 0)(exec2).rsvd1 = 0 & (0xffffffff);
3347
3348 err = i915_gem_check_execbuffer(&exec2);
3349 if (err
1.1
'err' is 0
)
2
Taking false branch
3350 return err;
3351
3352 /* Copy in the exec list from userland */
3353 exec_list = kvmalloc_array(count, sizeof(*exec_list),
3354 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3355
3356 /* Allocate extra slots for use by the command parser */
3357 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3358 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3359 if (exec_list == NULL((void *)0) || exec2_list == NULL((void *)0)) {
3
Assuming 'exec_list' is not equal to NULL
4
Assuming 'exec2_list' is not equal to NULL
5
Taking false branch
3360 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %d buffers\n"
, args->buffer_count)
3361 "Failed to allocate exec list for %d buffers\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %d buffers\n"
, args->buffer_count)
3362 args->buffer_count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %d buffers\n"
, args->buffer_count)
;
3363 kvfree(exec_list);
3364 kvfree(exec2_list);
3365 return -ENOMEM12;
3366 }
3367 err = copy_from_user(exec_list,
3368 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr)),
3369 sizeof(*exec_list) * count);
3370 if (err
5.1
'err' is 0
) {
6
Taking false branch
3371 drm_dbg(&i915->drm, "copy %d exec entries failed %d\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "copy %d exec entries failed %d\n"
, args->buffer_count, err)
3372 args->buffer_count, err)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "copy %d exec entries failed %d\n"
, args->buffer_count, err)
;
3373 kvfree(exec_list);
3374 kvfree(exec2_list);
3375 return -EFAULT14;
3376 }
3377
3378 for (i = 0; i
6.1
'i' is < field 'buffer_count'
< args->buffer_count
; i++) {
7
Loop condition is true. Entering loop body
10
Assuming 'i' is >= field 'buffer_count'
11
Loop condition is false. Execution continues on line 3390
3379 exec2_list[i].handle = exec_list[i].handle;
3380 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3381 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3382 exec2_list[i].alignment = exec_list[i].alignment;
3383 exec2_list[i].offset = exec_list[i].offset;
3384 if (INTEL_GEN(to_i915(dev))((&(to_i915(dev))->__info)->gen) < 4)
8
Assuming field 'gen' is >= 4
9
Taking false branch
3385 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE(1<<0);
3386 else
3387 exec2_list[i].flags = 0;
3388 }
3389
3390 err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
12
Calling 'i915_gem_do_execbuffer'
3391 if (exec2.flags & __EXEC_HAS_RELOC(1UL << (31))) {
3392 struct drm_i915_gem_exec_object __user *user_exec_list =
3393 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr));
3394
3395 /* Copy the new buffer offsets back to the user's exec list. */
3396 for (i = 0; i < args->buffer_count; i++) {
3397 if (!(exec2_list[i].offset & UPDATE(1ULL << (7))))
3398 continue;
3399
3400 exec2_list[i].offset =
3401 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK-(1ULL << (12)));
3402 exec2_list[i].offset &= PIN_OFFSET_MASK-(1ULL << (12));
3403 if (__copy_to_user(&user_exec_list[i].offset,
3404 &exec2_list[i].offset,
3405 sizeof(user_exec_list[i].offset)))
3406 break;
3407 }
3408 }
3409
3410 kvfree(exec_list);
3411 kvfree(exec2_list);
3412 return err;
3413}
3414
3415int
3416i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
3417 struct drm_file *file)
3418{
3419 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3420 struct drm_i915_gem_execbuffer2 *args = data;
3421 struct drm_i915_gem_exec_object2 *exec2_list;
3422 const size_t count = args->buffer_count;
3423 int err;
3424
3425 if (!check_buffer_count(count)) {
3426 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf2 with %zd buffers\n"
, count)
;
3427 return -EINVAL22;
3428 }
3429
3430 err = i915_gem_check_execbuffer(args);
3431 if (err)
3432 return err;
3433
3434 /* Allocate extra slots for use by the command parser */
3435 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3436 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3437 if (exec2_list == NULL((void *)0)) {
3438 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
3439 count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
;
3440 return -ENOMEM12;
3441 }
3442 if (copy_from_user(exec2_list,
3443 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr)),
3444 sizeof(*exec2_list) * count)) {
3445 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "copy %zd exec entries failed\n"
, count)
;
3446 kvfree(exec2_list);
3447 return -EFAULT14;
3448 }
3449
3450 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
3451
3452 /*
3453 * Now that we have begun execution of the batchbuffer, we ignore
3454 * any new error after this point. Also given that we have already
3455 * updated the associated relocations, we try to write out the current
3456 * object locations irrespective of any error.
3457 */
3458 if (args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
3459 struct drm_i915_gem_exec_object2 __user *user_exec_list =
3460 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr));
3461 unsigned int i;
3462
3463 /* Copy the new buffer offsets back to the user's exec list. */
3464 /*
3465 * Note: count * sizeof(*user_exec_list) does not overflow,
3466 * because we checked 'count' in check_buffer_count().
3467 *
3468 * And this range already got effectively checked earlier
3469 * when we did the "copy_from_user()" above.
3470 */
3471 if (!user_write_access_begin(user_exec_list,access_ok(user_exec_list, count * sizeof(*user_exec_list))
3472 count * sizeof(*user_exec_list))access_ok(user_exec_list, count * sizeof(*user_exec_list)))
3473 goto end;
3474
3475 for (i = 0; i < args->buffer_count; i++) {
3476 if (!(exec2_list[i].offset & UPDATE(1ULL << (7))))
3477 continue;
3478
3479 exec2_list[i].offset =
3480 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK-(1ULL << (12)));
3481 unsafe_put_user(exec2_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3482 &user_exec_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3483 end_user)({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
;
3484 }
3485end_user:
3486 user_write_access_end();
3487end:;
3488 }
3489
3490 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1));
3491 kvfree(exec2_list);
3492 return err;
3493}
3494
3495#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
3496#include "selftests/i915_gem_execbuffer.c"
3497#endif