Bug Summary

File:dev/pci/drm/i915/gem/i915_gem_execbuffer.c
Warning:line 3143, column 12
Potential leak of memory pointed to by 'eb.fences'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name i915_gem_execbuffer.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c

/usr/src/sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c

1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2008,2010 Intel Corporation
5 */
6
7#include <linux/intel-iommu.h>
8#include <linux/dma-resv.h>
9#include <linux/sync_file.h>
10#include <linux/uaccess.h>
11
12#include <drm/drm_syncobj.h>
13
14#include <dev/pci/pcivar.h>
15#include <dev/pci/agpvar.h>
16
17#include "display/intel_frontbuffer.h"
18
19#include "gem/i915_gem_ioctls.h"
20#include "gt/intel_context.h"
21#include "gt/intel_gt.h"
22#include "gt/intel_gt_buffer_pool.h"
23#include "gt/intel_gt_pm.h"
24#include "gt/intel_ring.h"
25
26#include "i915_drv.h"
27#include "i915_gem_clflush.h"
28#include "i915_gem_context.h"
29#include "i915_gem_ioctls.h"
30#include "i915_trace.h"
31#include "i915_user_extensions.h"
32
33struct eb_vma {
34 struct i915_vma *vma;
35 unsigned int flags;
36
37 /** This vma's place in the execbuf reservation list */
38 struct drm_i915_gem_exec_object2 *exec;
39 struct list_head bind_link;
40 struct list_head reloc_link;
41
42 struct hlist_node node;
43 u32 handle;
44};
45
46enum {
47 FORCE_CPU_RELOC = 1,
48 FORCE_GTT_RELOC,
49 FORCE_GPU_RELOC,
50#define DBG_FORCE_RELOC0 0 /* choose one of the above! */
51};
52
53#define __EXEC_OBJECT_HAS_PIN(1UL << (31)) BIT(31)(1UL << (31))
54#define __EXEC_OBJECT_HAS_FENCE(1UL << (30)) BIT(30)(1UL << (30))
55#define __EXEC_OBJECT_NEEDS_MAP(1UL << (29)) BIT(29)(1UL << (29))
56#define __EXEC_OBJECT_NEEDS_BIAS(1UL << (28)) BIT(28)(1UL << (28))
57#define __EXEC_OBJECT_INTERNAL_FLAGS(~0u << 28) (~0u << 28) /* all of the above */
58#define __EXEC_OBJECT_RESERVED((1UL << (31)) | (1UL << (30))) (__EXEC_OBJECT_HAS_PIN(1UL << (31)) | __EXEC_OBJECT_HAS_FENCE(1UL << (30)))
59
60#define __EXEC_HAS_RELOC(1UL << (31)) BIT(31)(1UL << (31))
61#define __EXEC_ENGINE_PINNED(1UL << (30)) BIT(30)(1UL << (30))
62#define __EXEC_INTERNAL_FLAGS(~0u << 30) (~0u << 30)
63#define UPDATE(1ULL << (7)) PIN_OFFSET_FIXED(1ULL << (7))
64
65#define BATCH_OFFSET_BIAS(256*1024) (256*1024)
66
67#define __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
\
68 (__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1)) | \
69 I915_EXEC_CONSTANTS_MASK(3<<6) | \
70 I915_EXEC_RESOURCE_STREAMER(1<<15))
71
72/* Catch emission of unexpected errors for CI! */
73#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0
74#undef EINVAL22
75#define EINVAL22 ({ \
76 DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__)__drm_dbg(DRM_UT_DRIVER, "EINVAL at %s:%d\n", __func__, 76); \
77 22; \
78})
79#endif
80
81/**
82 * DOC: User command execution
83 *
84 * Userspace submits commands to be executed on the GPU as an instruction
85 * stream within a GEM object we call a batchbuffer. This instructions may
86 * refer to other GEM objects containing auxiliary state such as kernels,
87 * samplers, render targets and even secondary batchbuffers. Userspace does
88 * not know where in the GPU memory these objects reside and so before the
89 * batchbuffer is passed to the GPU for execution, those addresses in the
90 * batchbuffer and auxiliary objects are updated. This is known as relocation,
91 * or patching. To try and avoid having to relocate each object on the next
92 * execution, userspace is told the location of those objects in this pass,
93 * but this remains just a hint as the kernel may choose a new location for
94 * any object in the future.
95 *
96 * At the level of talking to the hardware, submitting a batchbuffer for the
97 * GPU to execute is to add content to a buffer from which the HW
98 * command streamer is reading.
99 *
100 * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
101 * Execlists, this command is not placed on the same buffer as the
102 * remaining items.
103 *
104 * 2. Add a command to invalidate caches to the buffer.
105 *
106 * 3. Add a batchbuffer start command to the buffer; the start command is
107 * essentially a token together with the GPU address of the batchbuffer
108 * to be executed.
109 *
110 * 4. Add a pipeline flush to the buffer.
111 *
112 * 5. Add a memory write command to the buffer to record when the GPU
113 * is done executing the batchbuffer. The memory write writes the
114 * global sequence number of the request, ``i915_request::global_seqno``;
115 * the i915 driver uses the current value in the register to determine
116 * if the GPU has completed the batchbuffer.
117 *
118 * 6. Add a user interrupt command to the buffer. This command instructs
119 * the GPU to issue an interrupt when the command, pipeline flush and
120 * memory write are completed.
121 *
122 * 7. Inform the hardware of the additional commands added to the buffer
123 * (by updating the tail pointer).
124 *
125 * Processing an execbuf ioctl is conceptually split up into a few phases.
126 *
127 * 1. Validation - Ensure all the pointers, handles and flags are valid.
128 * 2. Reservation - Assign GPU address space for every object
129 * 3. Relocation - Update any addresses to point to the final locations
130 * 4. Serialisation - Order the request with respect to its dependencies
131 * 5. Construction - Construct a request to execute the batchbuffer
132 * 6. Submission (at some point in the future execution)
133 *
134 * Reserving resources for the execbuf is the most complicated phase. We
135 * neither want to have to migrate the object in the address space, nor do
136 * we want to have to update any relocations pointing to this object. Ideally,
137 * we want to leave the object where it is and for all the existing relocations
138 * to match. If the object is given a new address, or if userspace thinks the
139 * object is elsewhere, we have to parse all the relocation entries and update
140 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
141 * all the target addresses in all of its objects match the value in the
142 * relocation entries and that they all match the presumed offsets given by the
143 * list of execbuffer objects. Using this knowledge, we know that if we haven't
144 * moved any buffers, all the relocation entries are valid and we can skip
145 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
146 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
147 *
148 * The addresses written in the objects must match the corresponding
149 * reloc.presumed_offset which in turn must match the corresponding
150 * execobject.offset.
151 *
152 * Any render targets written to in the batch must be flagged with
153 * EXEC_OBJECT_WRITE.
154 *
155 * To avoid stalling, execobject.offset should match the current
156 * address of that object within the active context.
157 *
158 * The reservation is done is multiple phases. First we try and keep any
159 * object already bound in its current location - so as long as meets the
160 * constraints imposed by the new execbuffer. Any object left unbound after the
161 * first pass is then fitted into any available idle space. If an object does
162 * not fit, all objects are removed from the reservation and the process rerun
163 * after sorting the objects into a priority order (more difficult to fit
164 * objects are tried first). Failing that, the entire VM is cleared and we try
165 * to fit the execbuf once last time before concluding that it simply will not
166 * fit.
167 *
168 * A small complication to all of this is that we allow userspace not only to
169 * specify an alignment and a size for the object in the address space, but
170 * we also allow userspace to specify the exact offset. This objects are
171 * simpler to place (the location is known a priori) all we have to do is make
172 * sure the space is available.
173 *
174 * Once all the objects are in place, patching up the buried pointers to point
175 * to the final locations is a fairly simple job of walking over the relocation
176 * entry arrays, looking up the right address and rewriting the value into
177 * the object. Simple! ... The relocation entries are stored in user memory
178 * and so to access them we have to copy them into a local buffer. That copy
179 * has to avoid taking any pagefaults as they may lead back to a GEM object
180 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
181 * the relocation into multiple passes. First we try to do everything within an
182 * atomic context (avoid the pagefaults) which requires that we never wait. If
183 * we detect that we may wait, or if we need to fault, then we have to fallback
184 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
185 * bells yet?) Dropping the mutex means that we lose all the state we have
186 * built up so far for the execbuf and we must reset any global data. However,
187 * we do leave the objects pinned in their final locations - which is a
188 * potential issue for concurrent execbufs. Once we have left the mutex, we can
189 * allocate and copy all the relocation entries into a large array at our
190 * leisure, reacquire the mutex, reclaim all the objects and other state and
191 * then proceed to update any incorrect addresses with the objects.
192 *
193 * As we process the relocation entries, we maintain a record of whether the
194 * object is being written to. Using NORELOC, we expect userspace to provide
195 * this information instead. We also check whether we can skip the relocation
196 * by comparing the expected value inside the relocation entry with the target's
197 * final address. If they differ, we have to map the current object and rewrite
198 * the 4 or 8 byte pointer within.
199 *
200 * Serialising an execbuf is quite simple according to the rules of the GEM
201 * ABI. Execution within each context is ordered by the order of submission.
202 * Writes to any GEM object are in order of submission and are exclusive. Reads
203 * from a GEM object are unordered with respect to other reads, but ordered by
204 * writes. A write submitted after a read cannot occur before the read, and
205 * similarly any read submitted after a write cannot occur before the write.
206 * Writes are ordered between engines such that only one write occurs at any
207 * time (completing any reads beforehand) - using semaphores where available
208 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
209 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
210 * reads before starting, and any read (either using set-domain or pread) must
211 * flush all GPU writes before starting. (Note we only employ a barrier before,
212 * we currently rely on userspace not concurrently starting a new execution
213 * whilst reading or writing to an object. This may be an advantage or not
214 * depending on how much you trust userspace not to shoot themselves in the
215 * foot.) Serialisation may just result in the request being inserted into
216 * a DAG awaiting its turn, but most simple is to wait on the CPU until
217 * all dependencies are resolved.
218 *
219 * After all of that, is just a matter of closing the request and handing it to
220 * the hardware (well, leaving it in a queue to be executed). However, we also
221 * offer the ability for batchbuffers to be run with elevated privileges so
222 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
223 * Before any batch is given extra privileges we first must check that it
224 * contains no nefarious instructions, we check that each instruction is from
225 * our whitelist and all registers are also from an allowed list. We first
226 * copy the user's batchbuffer to a shadow (so that the user doesn't have
227 * access to it, either by the CPU or GPU as we scan it) and then parse each
228 * instruction. If everything is ok, we set a flag telling the hardware to run
229 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
230 */
231
232struct eb_fence {
233 struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
234 struct dma_fence *dma_fence;
235 u64 value;
236 struct dma_fence_chain *chain_fence;
237};
238
239struct i915_execbuffer {
240 struct drm_i915_privateinteldrm_softc *i915; /** i915 backpointer */
241 struct drm_file *file; /** per-file lookup tables and limits */
242 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
243 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
244 struct eb_vma *vma;
245
246 struct intel_engine_cs *engine; /** engine to queue the request to */
247 struct intel_context *context; /* logical state for the request */
248 struct i915_gem_context *gem_context; /** caller's context */
249
250 struct i915_request *request; /** our request to build */
251 struct eb_vma *batch; /** identity of the batch obj/vma */
252 struct i915_vma *trampoline; /** trampoline used for chaining */
253
254 /** actual size of execobj[] as we may extend it for the cmdparser */
255 unsigned int buffer_count;
256
257 /** list of vma not yet bound during reservation phase */
258 struct list_head unbound;
259
260 /** list of vma that have execobj.relocation_count */
261 struct list_head relocs;
262
263 struct i915_gem_ww_ctx ww;
264
265 /**
266 * Track the most recently used object for relocations, as we
267 * frequently have to perform multiple relocations within the same
268 * obj/page
269 */
270 struct reloc_cache {
271 struct drm_mm_node node; /** temporary GTT binding */
272 unsigned long vaddr; /** Current kmap address */
273 unsigned long page; /** Currently mapped page index */
274 unsigned int gen; /** Cached value of INTEL_GEN */
275 bool_Bool use_64bit_reloc : 1;
276 bool_Bool has_llc : 1;
277 bool_Bool has_fence : 1;
278 bool_Bool needs_unfenced : 1;
279
280 struct i915_request *rq;
281 u32 *rq_cmd;
282 unsigned int rq_size;
283 struct intel_gt_buffer_pool_node *pool;
284
285 struct agp_map *map;
286 bus_space_tag_t iot;
287 bus_space_handle_t ioh;
288 } reloc_cache;
289
290 struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
291 struct intel_context *reloc_context;
292
293 u64 invalid_flags; /** Set of execobj.flags that are invalid */
294 u32 context_flags; /** Set of execobj.flags to insert from the ctx */
295
296 u64 batch_len; /** Length of batch within object */
297 u32 batch_start_offset; /** Location within object of batch */
298 u32 batch_flags; /** Flags composed for emit_bb_start() */
299 struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
300
301 /**
302 * Indicate either the size of the hastable used to resolve
303 * relocation handles, or if negative that we are using a direct
304 * index into the execobj[].
305 */
306 int lut_size;
307 struct hlist_head *buckets; /** ht for relocation handles */
308
309 struct eb_fence *fences;
310 unsigned long num_fences;
311};
312
313static int eb_parse(struct i915_execbuffer *eb);
314static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb,
315 bool_Bool throttle);
316static void eb_unpin_engine(struct i915_execbuffer *eb);
317
318static inline bool_Bool eb_use_cmdparser(const struct i915_execbuffer *eb)
319{
320 return intel_engine_requires_cmd_parser(eb->engine) ||
321 (intel_engine_using_cmd_parser(eb->engine) &&
322 eb->args->batch_len);
323}
324
325static int eb_create(struct i915_execbuffer *eb)
326{
327 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT(1<<12))) {
328 unsigned int size = 1 + ilog2(eb->buffer_count)((sizeof(eb->buffer_count) <= 4) ? (fls(eb->buffer_count
) - 1) : (flsl(eb->buffer_count) - 1))
;
329
330 /*
331 * Without a 1:1 association between relocation handles and
332 * the execobject[] index, we instead create a hashtable.
333 * We size it dynamically based on available memory, starting
334 * first with 1:1 assocative hash and scaling back until
335 * the allocation succeeds.
336 *
337 * Later on we use a positive lut_size to indicate we are
338 * using this hashtable, and a negative value to indicate a
339 * direct lookup.
340 */
341 do {
342 gfp_t flags;
343
344 /* While we can still reduce the allocation size, don't
345 * raise a warning and allow the allocation to fail.
346 * On the last pass though, we want to try as hard
347 * as possible to perform the allocation and warn
348 * if it fails.
349 */
350 flags = GFP_KERNEL(0x0001 | 0x0004);
351 if (size > 1)
352 flags |= __GFP_NORETRY0 | __GFP_NOWARN0;
353
354 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
355 flags);
356 if (eb->buckets)
357 break;
358 } while (--size);
359
360 if (unlikely(!size)__builtin_expect(!!(!size), 0))
361 return -ENOMEM12;
362
363 eb->lut_size = size;
364 } else {
365 eb->lut_size = -eb->buffer_count;
366 }
367
368 return 0;
369}
370
371static bool_Bool
372eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
373 const struct i915_vma *vma,
374 unsigned int flags)
375{
376 if (vma->node.size < entry->pad_to_size)
377 return true1;
378
379 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)(((vma->node.start) & ((entry->alignment) - 1)) == 0
)
)
380 return true1;
381
382 if (flags & EXEC_OBJECT_PINNED(1<<4) &&
383 vma->node.start != entry->offset)
384 return true1;
385
386 if (flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (28)) &&
387 vma->node.start < BATCH_OFFSET_BIAS(256*1024))
388 return true1;
389
390 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)) &&
391 (vma->node.start + vma->node.size + 4095) >> 32)
392 return true1;
393
394 if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (29)) &&
395 !i915_vma_is_map_and_fenceable(vma))
396 return true1;
397
398 return false0;
399}
400
401static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
402 unsigned int exec_flags)
403{
404 u64 pin_flags = 0;
405
406 if (exec_flags & EXEC_OBJECT_NEEDS_GTT(1<<1))
407 pin_flags |= PIN_GLOBAL(1ULL << (10));
408
409 /*
410 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
411 * limit address to the first 4GBs for unflagged objects.
412 */
413 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
414 pin_flags |= PIN_ZONE_4G(1ULL << (4));
415
416 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (29)))
417 pin_flags |= PIN_MAPPABLE(1ULL << (3));
418
419 if (exec_flags & EXEC_OBJECT_PINNED(1<<4))
420 pin_flags |= entry->offset | PIN_OFFSET_FIXED(1ULL << (7));
421 else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS(1UL << (28)))
422 pin_flags |= BATCH_OFFSET_BIAS(256*1024) | PIN_OFFSET_BIAS(1ULL << (6));
423
424 return pin_flags;
425}
426
427static inline bool_Bool
428eb_pin_vma(struct i915_execbuffer *eb,
429 const struct drm_i915_gem_exec_object2 *entry,
430 struct eb_vma *ev)
431{
432 struct i915_vma *vma = ev->vma;
433 u64 pin_flags;
434
435 if (vma->node.size)
436 pin_flags = vma->node.start;
437 else
438 pin_flags = entry->offset & PIN_OFFSET_MASK-(1ULL << (12));
439
440 pin_flags |= PIN_USER(1ULL << (11)) | PIN_NOEVICT(1ULL << (0)) | PIN_OFFSET_FIXED(1ULL << (7));
441 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT)__builtin_expect(!!(ev->flags & (1<<1)), 0))
442 pin_flags |= PIN_GLOBAL(1ULL << (10));
443
444 /* Attempt to reuse the current location if available */
445 /* TODO: Add -EDEADLK handling here */
446 if (unlikely(i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags))__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, 0, 0
, pin_flags)), 0)
) {
447 if (entry->flags & EXEC_OBJECT_PINNED(1<<4))
448 return false0;
449
450 /* Failing that pick any _free_ space if suitable */
451 if (unlikely(i915_vma_pin_ww(vma, &eb->ww,__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
452 entry->pad_to_size,__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
453 entry->alignment,__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
454 eb_pin_flags(entry, ev->flags) |__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
455 PIN_USER | PIN_NOEVICT))__builtin_expect(!!(i915_vma_pin_ww(vma, &eb->ww, entry
->pad_to_size, entry->alignment, eb_pin_flags(entry, ev
->flags) | (1ULL << (11)) | (1ULL << (0)))), 0
)
)
456 return false0;
457 }
458
459 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
460 if (unlikely(i915_vma_pin_fence(vma))__builtin_expect(!!(i915_vma_pin_fence(vma)), 0)) {
461 i915_vma_unpin(vma);
462 return false0;
463 }
464
465 if (vma->fence)
466 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (30));
467 }
468
469 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (31));
470 return !eb_vma_misplaced(entry, vma, ev->flags);
471}
472
473static inline void
474eb_unreserve_vma(struct eb_vma *ev)
475{
476 if (!(ev->flags & __EXEC_OBJECT_HAS_PIN(1UL << (31))))
477 return;
478
479 if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE)__builtin_expect(!!(ev->flags & (1UL << (30))), 0
)
)
480 __i915_vma_unpin_fence(ev->vma);
481
482 __i915_vma_unpin(ev->vma);
483 ev->flags &= ~__EXEC_OBJECT_RESERVED((1UL << (31)) | (1UL << (30)));
484}
485
486static int
487eb_validate_vma(struct i915_execbuffer *eb,
488 struct drm_i915_gem_exec_object2 *entry,
489 struct i915_vma *vma)
490{
491 if (unlikely(entry->flags & eb->invalid_flags)__builtin_expect(!!(entry->flags & eb->invalid_flags
), 0)
)
492 return -EINVAL22;
493
494 if (unlikely(entry->alignment &&__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
495 !is_power_of_2_u64(entry->alignment))__builtin_expect(!!(entry->alignment && !is_power_of_2_u64
(entry->alignment)), 0)
)
496 return -EINVAL22;
497
498 /*
499 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
500 * any non-page-aligned or non-canonical addresses.
501 */
502 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
503 entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK))__builtin_expect(!!(entry->flags & (1<<4) &&
entry->offset != gen8_canonical_addr(entry->offset &
-(1ULL << (12)))), 0)
)
504 return -EINVAL22;
505
506 /* pad_to_size was once a reserved field, so sanitize it */
507 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE(1<<5)) {
508 if (unlikely(offset_in_page(entry->pad_to_size))__builtin_expect(!!(((vaddr_t)(entry->pad_to_size) & (
(1 << 12) - 1))), 0)
)
509 return -EINVAL22;
510 } else {
511 entry->pad_to_size = 0;
512 }
513 /*
514 * From drm_mm perspective address space is continuous,
515 * so from this point we're always using non-canonical
516 * form internally.
517 */
518 entry->offset = gen8_noncanonical_addr(entry->offset);
519
520 if (!eb->reloc_cache.has_fence) {
521 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE(1<<0);
522 } else {
523 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE(1<<0) ||
524 eb->reloc_cache.needs_unfenced) &&
525 i915_gem_object_is_tiled(vma->obj))
526 entry->flags |= EXEC_OBJECT_NEEDS_GTT(1<<1) | __EXEC_OBJECT_NEEDS_MAP(1UL << (29));
527 }
528
529 if (!(entry->flags & EXEC_OBJECT_PINNED(1<<4)))
530 entry->flags |= eb->context_flags;
531
532 return 0;
533}
534
535static void
536eb_add_vma(struct i915_execbuffer *eb,
537 unsigned int i, unsigned batch_idx,
538 struct i915_vma *vma)
539{
540 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
541 struct eb_vma *ev = &eb->vma[i];
542
543 GEM_BUG_ON(i915_vma_is_closed(vma))((void)0);
544
545 ev->vma = vma;
546 ev->exec = entry;
547 ev->flags = entry->flags;
548
549 if (eb->lut_size > 0) {
550 ev->handle = entry->handle;
551 hlist_add_head(&ev->node,
552 &eb->buckets[hash_32(entry->handle,
553 eb->lut_size)]);
554 }
555
556 if (entry->relocation_count)
557 list_add_tail(&ev->reloc_link, &eb->relocs);
558
559 /*
560 * SNA is doing fancy tricks with compressing batch buffers, which leads
561 * to negative relocation deltas. Usually that works out ok since the
562 * relocate address is still positive, except when the batch is placed
563 * very low in the GTT. Ensure this doesn't happen.
564 *
565 * Note that actual hangs have only been observed on gen7, but for
566 * paranoia do it everywhere.
567 */
568 if (i == batch_idx) {
569 if (entry->relocation_count &&
570 !(ev->flags & EXEC_OBJECT_PINNED(1<<4)))
571 ev->flags |= __EXEC_OBJECT_NEEDS_BIAS(1UL << (28));
572 if (eb->reloc_cache.has_fence)
573 ev->flags |= EXEC_OBJECT_NEEDS_FENCE(1<<0);
574
575 eb->batch = ev;
576 }
577}
578
579static inline int use_cpu_reloc(const struct reloc_cache *cache,
580 const struct drm_i915_gem_object *obj)
581{
582 if (!i915_gem_object_has_struct_page(obj))
583 return false0;
584
585 if (DBG_FORCE_RELOC0 == FORCE_CPU_RELOC)
586 return true1;
587
588 if (DBG_FORCE_RELOC0 == FORCE_GTT_RELOC)
589 return false0;
590
591 return (cache->has_llc ||
592 obj->cache_dirty ||
593 obj->cache_level != I915_CACHE_NONE);
594}
595
596static int eb_reserve_vma(struct i915_execbuffer *eb,
597 struct eb_vma *ev,
598 u64 pin_flags)
599{
600 struct drm_i915_gem_exec_object2 *entry = ev->exec;
601 struct i915_vma *vma = ev->vma;
602 int err;
603
604 if (drm_mm_node_allocated(&vma->node) &&
605 eb_vma_misplaced(entry, vma, ev->flags)) {
606 err = i915_vma_unbind(vma);
607 if (err)
608 return err;
609 }
610
611 err = i915_vma_pin_ww(vma, &eb->ww,
612 entry->pad_to_size, entry->alignment,
613 eb_pin_flags(entry, ev->flags) | pin_flags);
614 if (err)
615 return err;
616
617 if (entry->offset != vma->node.start) {
618 entry->offset = vma->node.start | UPDATE(1ULL << (7));
619 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
620 }
621
622 if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)__builtin_expect(!!(ev->flags & (1<<0)), 0)) {
623 err = i915_vma_pin_fence(vma);
624 if (unlikely(err)__builtin_expect(!!(err), 0)) {
625 i915_vma_unpin(vma);
626 return err;
627 }
628
629 if (vma->fence)
630 ev->flags |= __EXEC_OBJECT_HAS_FENCE(1UL << (30));
631 }
632
633 ev->flags |= __EXEC_OBJECT_HAS_PIN(1UL << (31));
634 GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags))((void)0);
635
636 return 0;
637}
638
639static int eb_reserve(struct i915_execbuffer *eb)
640{
641 const unsigned int count = eb->buffer_count;
642 unsigned int pin_flags = PIN_USER(1ULL << (11)) | PIN_NONBLOCK(1ULL << (2));
643 struct list_head last;
644 struct eb_vma *ev;
645 unsigned int i, pass;
646 int err = 0;
647
648 /*
649 * Attempt to pin all of the buffers into the GTT.
650 * This is done in 3 phases:
651 *
652 * 1a. Unbind all objects that do not match the GTT constraints for
653 * the execbuffer (fenceable, mappable, alignment etc).
654 * 1b. Increment pin count for already bound objects.
655 * 2. Bind new objects.
656 * 3. Decrement pin count.
657 *
658 * This avoid unnecessary unbinding of later objects in order to make
659 * room for the earlier objects *unless* we need to defragment.
660 */
661 pass = 0;
662 do {
663 list_for_each_entry(ev, &eb->unbound, bind_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->bind_link
) *__mptr = ((&eb->unbound)->next); (__typeof(*ev)
*)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), bind_link
) );}); &ev->bind_link != (&eb->unbound); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->bind_link ) *__mptr
= (ev->bind_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), bind_link) );}))
{
664 err = eb_reserve_vma(eb, ev, pin_flags);
665 if (err)
666 break;
667 }
668 if (err != -ENOSPC28)
669 return err;
670
671 /* Resort *all* the objects into priority order */
672 INIT_LIST_HEAD(&eb->unbound);
673 INIT_LIST_HEAD(&last);
674 for (i = 0; i < count; i++) {
675 unsigned int flags;
676
677 ev = &eb->vma[i];
678 flags = ev->flags;
679 if (flags & EXEC_OBJECT_PINNED(1<<4) &&
680 flags & __EXEC_OBJECT_HAS_PIN(1UL << (31)))
681 continue;
682
683 eb_unreserve_vma(ev);
684
685 if (flags & EXEC_OBJECT_PINNED(1<<4))
686 /* Pinned must have their slot */
687 list_add(&ev->bind_link, &eb->unbound);
688 else if (flags & __EXEC_OBJECT_NEEDS_MAP(1UL << (29)))
689 /* Map require the lowest 256MiB (aperture) */
690 list_add_tail(&ev->bind_link, &eb->unbound);
691 else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS(1<<3)))
692 /* Prioritise 4GiB region for restricted bo */
693 list_add(&ev->bind_link, &last);
694 else
695 list_add_tail(&ev->bind_link, &last);
696 }
697 list_splice_tail(&last, &eb->unbound);
698
699 switch (pass++) {
700 case 0:
701 break;
702
703 case 1:
704 /* Too fragmented, unbind everything and retry */
705 mutex_lock(&eb->context->vm->mutex)rw_enter_write(&eb->context->vm->mutex);
706 err = i915_gem_evict_vm(eb->context->vm);
707 mutex_unlock(&eb->context->vm->mutex)rw_exit_write(&eb->context->vm->mutex);
708 if (err)
709 return err;
710 break;
711
712 default:
713 return -ENOSPC28;
714 }
715
716 pin_flags = PIN_USER(1ULL << (11));
717 } while (1);
718}
719
720static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
721{
722 if (eb->args->flags & I915_EXEC_BATCH_FIRST(1<<18))
723 return 0;
724 else
725 return eb->buffer_count - 1;
726}
727
728static int eb_select_context(struct i915_execbuffer *eb)
729{
730 struct i915_gem_context *ctx;
731
732 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
733 if (unlikely(!ctx)__builtin_expect(!!(!ctx), 0))
734 return -ENOENT2;
735
736 eb->gem_context = ctx;
737 if (rcu_access_pointer(ctx->vm)(ctx->vm))
738 eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT(1<<1);
739
740 eb->context_flags = 0;
741 if (test_bit(UCONTEXT_NO_ZEROMAP0, &ctx->user_flags))
742 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS(1UL << (28));
743
744 return 0;
745}
746
747static int __eb_add_lut(struct i915_execbuffer *eb,
748 u32 handle, struct i915_vma *vma)
749{
750 struct i915_gem_context *ctx = eb->gem_context;
751 struct i915_lut_handle *lut;
752 int err;
753
754 lut = i915_lut_handle_alloc();
755 if (unlikely(!lut)__builtin_expect(!!(!lut), 0))
756 return -ENOMEM12;
757
758 i915_vma_get(vma);
759 if (!atomic_fetch_inc(&vma->open_count)__sync_fetch_and_add(&vma->open_count, 1))
760 i915_vma_reopen(vma);
761 lut->handle = handle;
762 lut->ctx = ctx;
763
764 /* Check that the context hasn't been closed in the meantime */
765 err = -EINTR4;
766 if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
767 struct i915_address_space *vm = rcu_access_pointer(ctx->vm)(ctx->vm);
768
769 if (unlikely(vm && vma->vm != vm)__builtin_expect(!!(vm && vma->vm != vm), 0))
770 err = -EAGAIN35; /* user racing with ctx set-vm */
771 else if (likely(!i915_gem_context_is_closed(ctx))__builtin_expect(!!(!i915_gem_context_is_closed(ctx)), 1))
772 err = radix_tree_insert(&ctx->handles_vma, handle, vma);
773 else
774 err = -ENOENT2;
775 if (err == 0) { /* And nor has this handle */
776 struct drm_i915_gem_object *obj = vma->obj;
777
778 spin_lock(&obj->lut_lock)mtx_enter(&obj->lut_lock);
779 if (idr_find(&eb->file->object_idr, handle) == obj) {
780 list_add(&lut->obj_link, &obj->lut_list);
781 } else {
782 radix_tree_delete(&ctx->handles_vma, handle);
783 err = -ENOENT2;
784 }
785 spin_unlock(&obj->lut_lock)mtx_leave(&obj->lut_lock);
786 }
787 mutex_unlock(&ctx->lut_mutex)rw_exit_write(&ctx->lut_mutex);
788 }
789 if (unlikely(err)__builtin_expect(!!(err), 0))
790 goto err;
791
792 return 0;
793
794err:
795 i915_vma_close(vma);
796 i915_vma_put(vma);
797 i915_lut_handle_free(lut);
798 return err;
799}
800
801static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
802{
803 struct i915_address_space *vm = eb->context->vm;
804
805 do {
806 struct drm_i915_gem_object *obj;
807 struct i915_vma *vma;
808 int err;
809
810 rcu_read_lock();
811 vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
812 if (likely(vma && vma->vm == vm)__builtin_expect(!!(vma && vma->vm == vm), 1))
813 vma = i915_vma_tryget(vma);
814 rcu_read_unlock();
815 if (likely(vma)__builtin_expect(!!(vma), 1))
816 return vma;
817
818 obj = i915_gem_object_lookup(eb->file, handle);
819 if (unlikely(!obj)__builtin_expect(!!(!obj), 0))
820 return ERR_PTR(-ENOENT2);
821
822 vma = i915_vma_instance(obj, vm, NULL((void *)0));
823 if (IS_ERR(vma)) {
824 i915_gem_object_put(obj);
825 return vma;
826 }
827
828 err = __eb_add_lut(eb, handle, vma);
829 if (likely(!err)__builtin_expect(!!(!err), 1))
830 return vma;
831
832 i915_gem_object_put(obj);
833 if (err != -EEXIST17)
834 return ERR_PTR(err);
835 } while (1);
836}
837
838static int eb_lookup_vmas(struct i915_execbuffer *eb)
839{
840 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
841 unsigned int batch = eb_batch_index(eb);
842 unsigned int i;
843 int err = 0;
844
845 INIT_LIST_HEAD(&eb->relocs);
846
847 for (i = 0; i < eb->buffer_count; i++) {
848 struct i915_vma *vma;
849
850 vma = eb_lookup_vma(eb, eb->exec[i].handle);
851 if (IS_ERR(vma)) {
852 err = PTR_ERR(vma);
853 goto err;
854 }
855
856 err = eb_validate_vma(eb, &eb->exec[i], vma);
857 if (unlikely(err)__builtin_expect(!!(err), 0)) {
858 i915_vma_put(vma);
859 goto err;
860 }
861
862 eb_add_vma(eb, i, batch, vma);
863 }
864
865 if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)__builtin_expect(!!(eb->batch->flags & (1<<2)
), 0)
) {
866 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
867 "Attempting to use self-modifying batch buffer\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Attempting to use self-modifying batch buffer\n"
)
;
868 return -EINVAL22;
869 }
870
871 if (range_overflows_t(u64,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->batch_len)) size__
= ((u64)(eb->batch_len)); typeof((u64)(eb->batch->vma
->size)) max__ = ((u64)(eb->batch->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
872 eb->batch_start_offset, eb->batch_len,({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->batch_len)) size__
= ((u64)(eb->batch_len)); typeof((u64)(eb->batch->vma
->size)) max__ = ((u64)(eb->batch->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
873 eb->batch->vma->size)({ typeof((u64)(eb->batch_start_offset)) start__ = ((u64)(
eb->batch_start_offset)); typeof((u64)(eb->batch_len)) size__
= ((u64)(eb->batch_len)); typeof((u64)(eb->batch->vma
->size)) max__ = ((u64)(eb->batch->vma->size)); (
void)(&start__ == &size__); (void)(&start__ == &
max__); start__ >= max__ || size__ > max__ - start__; }
)
) {
874 drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Attempting to use out-of-bounds batch\n"
)
;
875 return -EINVAL22;
876 }
877
878 if (eb->batch_len == 0)
879 eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
880 if (unlikely(eb->batch_len == 0)__builtin_expect(!!(eb->batch_len == 0), 0)) { /* impossible! */
881 drm_dbg(&i915->drm, "Invalid batch length\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Invalid batch length\n"
)
;
882 return -EINVAL22;
883 }
884
885 return 0;
886
887err:
888 eb->vma[i].vma = NULL((void *)0);
889 return err;
890}
891
892static int eb_validate_vmas(struct i915_execbuffer *eb)
893{
894 unsigned int i;
895 int err;
896
897 INIT_LIST_HEAD(&eb->unbound);
898
899 for (i = 0; i < eb->buffer_count; i++) {
900 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
901 struct eb_vma *ev = &eb->vma[i];
902 struct i915_vma *vma = ev->vma;
903
904 err = i915_gem_object_lock(vma->obj, &eb->ww);
905 if (err)
906 return err;
907
908 if (eb_pin_vma(eb, entry, ev)) {
909 if (entry->offset != vma->node.start) {
910 entry->offset = vma->node.start | UPDATE(1ULL << (7));
911 eb->args->flags |= __EXEC_HAS_RELOC(1UL << (31));
912 }
913 } else {
914 eb_unreserve_vma(ev);
915
916 list_add_tail(&ev->bind_link, &eb->unbound);
917 if (drm_mm_node_allocated(&vma->node)) {
918 err = i915_vma_unbind(vma);
919 if (err)
920 return err;
921 }
922 }
923
924 GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&((void)0)
925 eb_vma_misplaced(&eb->exec[i], vma, ev->flags))((void)0);
926 }
927
928 if (!list_empty(&eb->unbound))
929 return eb_reserve(eb);
930
931 return 0;
932}
933
934static struct eb_vma *
935eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
936{
937 if (eb->lut_size < 0) {
938 if (handle >= -eb->lut_size)
939 return NULL((void *)0);
940 return &eb->vma[handle];
941 } else {
942 struct hlist_head *head;
943 struct eb_vma *ev;
944
945 head = &eb->buckets[hash_32(handle, eb->lut_size)];
946 hlist_for_each_entry(ev, head, node)for (ev = (((head)->first) ? ({ const __typeof( ((__typeof
(*ev) *)0)->node ) *__mptr = ((head)->first); (__typeof
(*ev) *)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), node
) );}) : ((void *)0)); ev != ((void *)0); ev = (((ev)->node
.next) ? ({ const __typeof( ((__typeof(*ev) *)0)->node ) *
__mptr = ((ev)->node.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), node) );}) : ((void *)0)
))
{
947 if (ev->handle == handle)
948 return ev;
949 }
950 return NULL((void *)0);
951 }
952}
953
954static void eb_release_vmas(struct i915_execbuffer *eb, bool_Bool final)
955{
956 const unsigned int count = eb->buffer_count;
957 unsigned int i;
958
959 for (i = 0; i < count; i++) {
960 struct eb_vma *ev = &eb->vma[i];
961 struct i915_vma *vma = ev->vma;
962
963 if (!vma)
964 break;
965
966 eb_unreserve_vma(ev);
967
968 if (final)
969 i915_vma_put(vma);
970 }
971
972 eb_unpin_engine(eb);
973}
974
975static void eb_destroy(const struct i915_execbuffer *eb)
976{
977 GEM_BUG_ON(eb->reloc_cache.rq)((void)0);
978
979 if (eb->lut_size > 0)
980 kfree(eb->buckets);
981}
982
983static inline u64
984relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
985 const struct i915_vma *target)
986{
987 return gen8_canonical_addr((int)reloc->delta + target->node.start);
988}
989
990static void reloc_cache_clear(struct reloc_cache *cache)
991{
992 cache->rq = NULL((void *)0);
993 cache->rq_cmd = NULL((void *)0);
994 cache->pool = NULL((void *)0);
995 cache->rq_size = 0;
996}
997
998static void reloc_cache_init(struct reloc_cache *cache,
999 struct drm_i915_privateinteldrm_softc *i915)
1000{
1001 cache->page = -1;
1002 cache->vaddr = 0;
1003 /* Must be a variable in the struct to allow GCC to unroll. */
1004 cache->gen = INTEL_GEN(i915)((&(i915)->__info)->gen);
1005 cache->has_llc = HAS_LLC(i915)((&(i915)->__info)->has_llc);
1006 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915)((&(i915)->__info)->has_64bit_reloc);
1007 cache->has_fence = cache->gen < 4;
1008 cache->needs_unfenced = INTEL_INFO(i915)(&(i915)->__info)->unfenced_needs_alignment;
1009 cache->node.flags = 0;
1010
1011 cache->map = i915->agph;
1012 cache->iot = i915->bst;
1013
1014 reloc_cache_clear(cache);
1015}
1016
1017static inline void *unmask_page(unsigned long p)
1018{
1019 return (void *)(uintptr_t)(p & LINUX_PAGE_MASK(~((1 << 12) - 1)));
1020}
1021
1022static inline unsigned int unmask_flags(unsigned long p)
1023{
1024 return p & ~LINUX_PAGE_MASK(~((1 << 12) - 1));
1025}
1026
1027#define KMAP0x4 0x4 /* after CLFLUSH_FLAGS */
1028
1029static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
1030{
1031 struct drm_i915_privateinteldrm_softc *i915 =
1032 container_of(cache, struct i915_execbuffer, reloc_cache)({ const __typeof( ((struct i915_execbuffer *)0)->reloc_cache
) *__mptr = (cache); (struct i915_execbuffer *)( (char *)__mptr
- __builtin_offsetof(struct i915_execbuffer, reloc_cache) );
})
->i915;
1033 return &i915->ggtt;
1034}
1035
1036static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
1037{
1038 if (!cache->pool)
1039 return;
1040
1041 /*
1042 * This is a bit nasty, normally we keep objects locked until the end
1043 * of execbuffer, but we already submit this, and have to unlock before
1044 * dropping the reference. Fortunately we can only hold 1 pool node at
1045 * a time, so this should be harmless.
1046 */
1047 i915_gem_ww_unlock_single(cache->pool->obj);
1048 intel_gt_buffer_pool_put(cache->pool);
1049 cache->pool = NULL((void *)0);
1050}
1051
1052static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
1053{
1054 struct drm_i915_gem_object *obj = cache->rq->batch->obj;
1055
1056 GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32))((void)0);
1057 cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END(((0x0a) << 23) | (0));
1058
1059 i915_gem_object_flush_map(obj);
1060 i915_gem_object_unpin_map(obj);
1061
1062 intel_gt_chipset_flush(cache->rq->engine->gt);
1063
1064 i915_request_add(cache->rq);
1065 reloc_cache_put_pool(eb, cache);
1066 reloc_cache_clear(cache);
1067
1068 eb->reloc_pool = NULL((void *)0);
1069}
1070
1071static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
1072{
1073 void *vaddr;
1074
1075 if (cache->rq)
1076 reloc_gpu_flush(eb, cache);
1077
1078 if (!cache->vaddr)
1079 return;
1080
1081 vaddr = unmask_page(cache->vaddr);
1082 if (cache->vaddr & KMAP0x4) {
1083 struct drm_i915_gem_object *obj =
1084 (struct drm_i915_gem_object *)cache->node.mm;
1085 if (cache->vaddr & CLFLUSH_AFTER(1UL << (1)))
1086 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1087
1088 kunmap_atomic(vaddr);
1089 i915_gem_object_finish_access(obj);
1090 } else {
1091 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1092
1093 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1094#ifdef __linux__
1095 io_mapping_unmap_atomic((void __iomem *)vaddr);
1096#else
1097 agp_unmap_atomic(cache->map, cache->ioh);
1098#endif
1099
1100 if (drm_mm_node_allocated(&cache->node)) {
1101 ggtt->vm.clear_range(&ggtt->vm,
1102 cache->node.start,
1103 cache->node.size);
1104 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1105 drm_mm_remove_node(&cache->node);
1106 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1107 } else {
1108 i915_vma_unpin((struct i915_vma *)cache->node.mm);
1109 }
1110 }
1111
1112 cache->vaddr = 0;
1113 cache->page = -1;
1114}
1115
1116static void *reloc_kmap(struct drm_i915_gem_object *obj,
1117 struct reloc_cache *cache,
1118 unsigned long pageno)
1119{
1120 void *vaddr;
1121 struct vm_page *page;
1122
1123 if (cache->vaddr) {
1124 kunmap_atomic(unmask_page(cache->vaddr));
1125 } else {
1126 unsigned int flushes;
1127 int err;
1128
1129 err = i915_gem_object_prepare_write(obj, &flushes);
1130 if (err)
1131 return ERR_PTR(err);
1132
1133 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS)extern char _ctassert[(!(0x4 & ((1UL << (0)) | (1UL
<< (1))))) ? 1 : -1 ] __attribute__((__unused__))
;
1134 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & LINUX_PAGE_MASK)extern char _ctassert[(!((0x4 | ((1UL << (0)) | (1UL <<
(1)))) & (~((1 << 12) - 1)))) ? 1 : -1 ] __attribute__
((__unused__))
;
1135
1136 cache->vaddr = flushes | KMAP0x4;
1137 cache->node.mm = (void *)obj;
1138 if (flushes)
1139 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1140 }
1141
1142 page = i915_gem_object_get_page(obj, pageno);
1143 if (!obj->mm.dirty)
1144 set_page_dirty(page)x86_atomic_clearbits_u32(&page->pg_flags, 0x00000008);
1145
1146 vaddr = kmap_atomic(page);
1147 cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
1148 cache->page = pageno;
1149
1150 return vaddr;
1151}
1152
1153static void *reloc_iomap(struct drm_i915_gem_object *obj,
1154 struct i915_execbuffer *eb,
1155 unsigned long page)
1156{
1157 struct reloc_cache *cache = &eb->reloc_cache;
1158 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
1159 unsigned long offset;
1160 void *vaddr;
1161
1162 if (cache->vaddr) {
1163 intel_gt_flush_ggtt_writes(ggtt->vm.gt);
1164#ifdef __linux__
1165 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
1166#else
1167 agp_unmap_atomic(cache->map, cache->ioh);
1168#endif
1169 } else {
1170 struct i915_vma *vma;
1171 int err;
1172
1173 if (i915_gem_object_is_tiled(obj))
1174 return ERR_PTR(-EINVAL22);
1175
1176 if (use_cpu_reloc(cache, obj))
1177 return NULL((void *)0);
1178
1179 err = i915_gem_object_set_to_gtt_domain(obj, true1);
1180 if (err)
1181 return ERR_PTR(err);
1182
1183 vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL((void *)0), 0, 0,
1184 PIN_MAPPABLE(1ULL << (3)) |
1185 PIN_NONBLOCK(1ULL << (2)) /* NOWARN */ |
1186 PIN_NOEVICT(1ULL << (0)));
1187 if (vma == ERR_PTR(-EDEADLK11))
1188 return vma;
1189
1190 if (IS_ERR(vma)) {
1191 memset(&cache->node, 0, sizeof(cache->node))__builtin_memset((&cache->node), (0), (sizeof(cache->
node)))
;
1192 mutex_lock(&ggtt->vm.mutex)rw_enter_write(&ggtt->vm.mutex);
1193 err = drm_mm_insert_node_in_range
1194 (&ggtt->vm.mm, &cache->node,
1195 PAGE_SIZE(1 << 12), 0, I915_COLOR_UNEVICTABLE(-1),
1196 0, ggtt->mappable_end,
1197 DRM_MM_INSERT_LOW);
1198 mutex_unlock(&ggtt->vm.mutex)rw_exit_write(&ggtt->vm.mutex);
1199 if (err) /* no inactive aperture space, use cpu reloc */
1200 return NULL((void *)0);
1201 } else {
1202 cache->node.start = vma->node.start;
1203 cache->node.mm = (void *)vma;
1204 }
1205 }
1206
1207 offset = cache->node.start;
1208 if (drm_mm_node_allocated(&cache->node)) {
1209 ggtt->vm.insert_page(&ggtt->vm,
1210 i915_gem_object_get_dma_address(obj, page),
1211 offset, I915_CACHE_NONE, 0);
1212 } else {
1213 offset += page << PAGE_SHIFT12;
1214 }
1215
1216#ifdef __linux__
1217 vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1218 offset);
1219#else
1220 agp_map_atomic(cache->map, offset, &cache->ioh);
1221 vaddr = bus_space_vaddr(cache->iot, cache->ioh)((cache->iot)->vaddr((cache->ioh)));
1222#endif
1223 cache->page = page;
1224 cache->vaddr = (unsigned long)vaddr;
1225
1226 return vaddr;
1227}
1228
1229static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1230 struct i915_execbuffer *eb,
1231 unsigned long page)
1232{
1233 struct reloc_cache *cache = &eb->reloc_cache;
1234 void *vaddr;
1235
1236 if (cache->page == page) {
1237 vaddr = unmask_page(cache->vaddr);
1238 } else {
1239 vaddr = NULL((void *)0);
1240 if ((cache->vaddr & KMAP0x4) == 0)
1241 vaddr = reloc_iomap(obj, eb, page);
1242 if (!vaddr)
1243 vaddr = reloc_kmap(obj, cache, page);
1244 }
1245
1246 return vaddr;
1247}
1248
1249static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1250{
1251 if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))__builtin_expect(!!(flushes & ((1UL << (0)) | (1UL <<
(1)))), 0)
) {
1252 if (flushes & CLFLUSH_BEFORE(1UL << (0))) {
1253 clflushopt(addr);
1254 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
1255 }
1256
1257 *addr = value;
1258
1259 /*
1260 * Writes to the same cacheline are serialised by the CPU
1261 * (including clflush). On the write path, we only require
1262 * that it hits memory in an orderly fashion and place
1263 * mb barriers at the start and end of the relocation phase
1264 * to ensure ordering of clflush wrt to the system.
1265 */
1266 if (flushes & CLFLUSH_AFTER(1UL << (1)))
1267 clflushopt(addr);
1268 } else
1269 *addr = value;
1270}
1271
1272static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
1273{
1274 struct drm_i915_gem_object *obj = vma->obj;
1275 int err;
1276
1277 assert_vma_held(vma)do { (void)(&((vma)->resv)->lock.base); } while(0);
1278
1279 if (obj->cache_dirty & ~obj->cache_coherent)
1280 i915_gem_clflush_object(obj, 0);
1281 obj->write_domain = 0;
1282
1283 err = i915_request_await_object(rq, vma->obj, true1);
1284 if (err == 0)
1285 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE(1<<2));
1286
1287 return err;
1288}
1289
1290static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1291 struct intel_engine_cs *engine,
1292 struct i915_vma *vma,
1293 unsigned int len)
1294{
1295 struct reloc_cache *cache = &eb->reloc_cache;
1296 struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
1297 struct i915_request *rq;
1298 struct i915_vma *batch;
1299 u32 *cmd;
1300 int err;
1301
1302 if (!pool) {
1303 pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE(1 << 12));
1304 if (IS_ERR(pool))
1305 return PTR_ERR(pool);
1306 }
1307 eb->reloc_pool = NULL((void *)0);
1308
1309 err = i915_gem_object_lock(pool->obj, &eb->ww);
1310 if (err)
1311 goto err_pool;
1312
1313 cmd = i915_gem_object_pin_map(pool->obj,
1314 cache->has_llc ?
1315 I915_MAP_FORCE_WB :
1316 I915_MAP_FORCE_WC);
1317 if (IS_ERR(cmd)) {
1318 err = PTR_ERR(cmd);
1319 goto err_pool;
1320 }
1321
1322 memset32(cmd, 0, pool->obj->base.size / sizeof(u32));
1323
1324 batch = i915_vma_instance(pool->obj, vma->vm, NULL((void *)0));
1325 if (IS_ERR(batch)) {
1326 err = PTR_ERR(batch);
1327 goto err_unmap;
1328 }
1329
1330 err = i915_vma_pin_ww(batch, &eb->ww, 0, 0, PIN_USER(1ULL << (11)) | PIN_NONBLOCK(1ULL << (2)));
1331 if (err)
1332 goto err_unmap;
1333
1334 if (engine == eb->context->engine) {
1335 rq = i915_request_create(eb->context);
1336 } else {
1337 struct intel_context *ce = eb->reloc_context;
1338
1339 if (!ce) {
1340 ce = intel_context_create(engine);
1341 if (IS_ERR(ce)) {
1342 err = PTR_ERR(ce);
1343 goto err_unpin;
1344 }
1345
1346 i915_vm_put(ce->vm);
1347 ce->vm = i915_vm_get(eb->context->vm);
1348 eb->reloc_context = ce;
1349 }
1350
1351 err = intel_context_pin_ww(ce, &eb->ww);
1352 if (err)
1353 goto err_unpin;
1354
1355 rq = i915_request_create(ce);
1356 intel_context_unpin(ce);
1357 }
1358 if (IS_ERR(rq)) {
1359 err = PTR_ERR(rq);
1360 goto err_unpin;
1361 }
1362
1363 err = intel_gt_buffer_pool_mark_active(pool, rq);
1364 if (err)
1365 goto err_request;
1366
1367 err = reloc_move_to_gpu(rq, vma);
1368 if (err)
1369 goto err_request;
1370
1371 err = eb->engine->emit_bb_start(rq,
1372 batch->node.start, PAGE_SIZE(1 << 12),
1373 cache->gen > 5 ? 0 : I915_DISPATCH_SECURE(1UL << (0)));
1374 if (err)
1375 goto skip_request;
1376
1377 assert_vma_held(batch)do { (void)(&((batch)->resv)->lock.base); } while(0
)
;
1378 err = i915_request_await_object(rq, batch->obj, false0);
1379 if (err == 0)
1380 err = i915_vma_move_to_active(batch, rq, 0);
1381 if (err)
1382 goto skip_request;
1383
1384 rq->batch = batch;
1385 i915_vma_unpin(batch);
1386
1387 cache->rq = rq;
1388 cache->rq_cmd = cmd;
1389 cache->rq_size = 0;
1390 cache->pool = pool;
1391
1392 /* Return with batch mapping (cmd) still pinned */
1393 return 0;
1394
1395skip_request:
1396 i915_request_set_error_once(rq, err);
1397err_request:
1398 i915_request_add(rq);
1399err_unpin:
1400 i915_vma_unpin(batch);
1401err_unmap:
1402 i915_gem_object_unpin_map(pool->obj);
1403err_pool:
1404 eb->reloc_pool = pool;
1405 return err;
1406}
1407
1408static bool_Bool reloc_can_use_engine(const struct intel_engine_cs *engine)
1409{
1410 return engine->class != VIDEO_DECODE_CLASS1 || !IS_GEN(engine->i915, 6)(0 + (&(engine->i915)->__info)->gen == (6));
1411}
1412
1413static u32 *reloc_gpu(struct i915_execbuffer *eb,
1414 struct i915_vma *vma,
1415 unsigned int len)
1416{
1417 struct reloc_cache *cache = &eb->reloc_cache;
1418 u32 *cmd;
1419
1420 if (cache->rq_size > PAGE_SIZE(1 << 12)/sizeof(u32) - (len + 1))
1421 reloc_gpu_flush(eb, cache);
1422
1423 if (unlikely(!cache->rq)__builtin_expect(!!(!cache->rq), 0)) {
1424 int err;
1425 struct intel_engine_cs *engine = eb->engine;
1426
1427 /* If we need to copy for the cmdparser, we will stall anyway */
1428 if (eb_use_cmdparser(eb))
1429 return ERR_PTR(-EWOULDBLOCK35);
1430
1431 if (!reloc_can_use_engine(engine)) {
1432 engine = engine->gt->engine_class[COPY_ENGINE_CLASS3][0];
1433 if (!engine)
1434 return ERR_PTR(-ENODEV19);
1435 }
1436
1437 err = __reloc_gpu_alloc(eb, engine, vma, len);
1438 if (unlikely(err)__builtin_expect(!!(err), 0))
1439 return ERR_PTR(err);
1440 }
1441
1442 cmd = cache->rq_cmd + cache->rq_size;
1443 cache->rq_size += len;
1444
1445 return cmd;
1446}
1447
1448static inline bool_Bool use_reloc_gpu(struct i915_vma *vma)
1449{
1450 if (DBG_FORCE_RELOC0 == FORCE_GPU_RELOC)
1451 return true1;
1452
1453 if (DBG_FORCE_RELOC0)
1454 return false0;
1455
1456 return !dma_resv_test_signaled_rcu(vma->resv, true1);
1457}
1458
1459static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
1460{
1461 struct vm_page *page;
1462 unsigned long addr;
1463
1464 GEM_BUG_ON(vma->pages != vma->obj->mm.pages)((void)0);
1465
1466 page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT12);
1467 addr = PFN_PHYS(page_to_pfn(page))(((((page)->phys_addr) / (1 << 12))) << 12);
1468 GEM_BUG_ON(overflows_type(addr, u32))((void)0); /* expected dma32 */
1469
1470 return addr + offset_in_page(offset)((vaddr_t)(offset) & ((1 << 12) - 1));
1471}
1472
1473static int __reloc_entry_gpu(struct i915_execbuffer *eb,
1474 struct i915_vma *vma,
1475 u64 offset,
1476 u64 target_addr)
1477{
1478 const unsigned int gen = eb->reloc_cache.gen;
1479 unsigned int len;
1480 u32 *batch;
1481 u64 addr;
1482
1483 if (gen >= 8)
1484 len = offset & 7 ? 8 : 5;
1485 else if (gen >= 4)
1486 len = 4;
1487 else
1488 len = 3;
1489
1490 batch = reloc_gpu(eb, vma, len);
1491 if (batch == ERR_PTR(-EDEADLK11))
1492 return -EDEADLK11;
1493 else if (IS_ERR(batch))
1494 return false0;
1495
1496 addr = gen8_canonical_addr(vma->node.start + offset);
1497 if (gen >= 8) {
1498 if (offset & 7) {
1499 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1500 *batch++ = lower_32_bits(addr)((u32)(addr));
1501 *batch++ = upper_32_bits(addr)((u32)(((addr) >> 16) >> 16));
1502 *batch++ = lower_32_bits(target_addr)((u32)(target_addr));
1503
1504 addr = gen8_canonical_addr(addr + 4);
1505
1506 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1507 *batch++ = lower_32_bits(addr)((u32)(addr));
1508 *batch++ = upper_32_bits(addr)((u32)(((addr) >> 16) >> 16));
1509 *batch++ = upper_32_bits(target_addr)((u32)(((target_addr) >> 16) >> 16));
1510 } else {
1511 *batch++ = (MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2)) | (1 << 21)) + 1;
1512 *batch++ = lower_32_bits(addr)((u32)(addr));
1513 *batch++ = upper_32_bits(addr)((u32)(((addr) >> 16) >> 16));
1514 *batch++ = lower_32_bits(target_addr)((u32)(target_addr));
1515 *batch++ = upper_32_bits(target_addr)((u32)(((target_addr) >> 16) >> 16));
1516 }
1517 } else if (gen >= 6) {
1518 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1519 *batch++ = 0;
1520 *batch++ = addr;
1521 *batch++ = target_addr;
1522 } else if (IS_I965G(eb->i915)IS_PLATFORM(eb->i915, INTEL_I965G)) {
1523 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2));
1524 *batch++ = 0;
1525 *batch++ = vma_phys_addr(vma, offset);
1526 *batch++ = target_addr;
1527 } else if (gen >= 4) {
1528 *batch++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2)) | MI_USE_GGTT(1 << 22);
1529 *batch++ = 0;
1530 *batch++ = addr;
1531 *batch++ = target_addr;
1532 } else if (gen >= 3 &&
1533 !(IS_I915G(eb->i915)IS_PLATFORM(eb->i915, INTEL_I915G) || IS_I915GM(eb->i915)IS_PLATFORM(eb->i915, INTEL_I915GM))) {
1534 *batch++ = MI_STORE_DWORD_IMM(((0x20) << 23) | (1)) | MI_MEM_VIRTUAL(1 << 22);
1535 *batch++ = addr;
1536 *batch++ = target_addr;
1537 } else {
1538 *batch++ = MI_STORE_DWORD_IMM(((0x20) << 23) | (1));
1539 *batch++ = vma_phys_addr(vma, offset);
1540 *batch++ = target_addr;
1541 }
1542
1543 return true1;
1544}
1545
1546static int reloc_entry_gpu(struct i915_execbuffer *eb,
1547 struct i915_vma *vma,
1548 u64 offset,
1549 u64 target_addr)
1550{
1551 if (eb->reloc_cache.vaddr)
1552 return false0;
1553
1554 if (!use_reloc_gpu(vma))
1555 return false0;
1556
1557 return __reloc_entry_gpu(eb, vma, offset, target_addr);
1558}
1559
1560static u64
1561relocate_entry(struct i915_vma *vma,
1562 const struct drm_i915_gem_relocation_entry *reloc,
1563 struct i915_execbuffer *eb,
1564 const struct i915_vma *target)
1565{
1566 u64 target_addr = relocation_target(reloc, target);
1567 u64 offset = reloc->offset;
1568 int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
1569
1570 if (reloc_gpu < 0)
1571 return reloc_gpu;
1572
1573 if (!reloc_gpu) {
1574 bool_Bool wide = eb->reloc_cache.use_64bit_reloc;
1575 void *vaddr;
1576
1577repeat:
1578 vaddr = reloc_vaddr(vma->obj, eb,
1579 offset >> PAGE_SHIFT12);
1580 if (IS_ERR(vaddr))
1581 return PTR_ERR(vaddr);
1582
1583 GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)))((void)0);
1584 clflush_write32(vaddr + offset_in_page(offset)((vaddr_t)(offset) & ((1 << 12) - 1)),
1585 lower_32_bits(target_addr)((u32)(target_addr)),
1586 eb->reloc_cache.vaddr);
1587
1588 if (wide) {
1589 offset += sizeof(u32);
1590 target_addr >>= 32;
1591 wide = false0;
1592 goto repeat;
1593 }
1594 }
1595
1596 return target->node.start | UPDATE(1ULL << (7));
1597}
1598
1599static u64
1600eb_relocate_entry(struct i915_execbuffer *eb,
1601 struct eb_vma *ev,
1602 const struct drm_i915_gem_relocation_entry *reloc)
1603{
1604 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
1605 struct eb_vma *target;
1606 int err;
1607
1608 /* we've already hold a reference to all valid objects */
1609 target = eb_get_vma(eb, reloc->target_handle);
1610 if (unlikely(!target)__builtin_expect(!!(!target), 0))
1611 return -ENOENT2;
1612
1613 /* Validate that the target is in a valid r/w GPU domain */
1614 if (unlikely(reloc->write_domain & (reloc->write_domain - 1))__builtin_expect(!!(reloc->write_domain & (reloc->write_domain
- 1)), 0)
) {
1615 drm_dbg(&i915->drm, "reloc with multiple write domains: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1616 "target %d offset %d "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1617 "read %08x write %08x",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1618 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1619 (int) reloc->offset,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1620 reloc->read_domains,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1621 reloc->write_domain)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with multiple write domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1622 return -EINVAL22;
1623 }
1624 if (unlikely((reloc->write_domain | reloc->read_domains)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
1625 & ~I915_GEM_GPU_DOMAINS)__builtin_expect(!!((reloc->write_domain | reloc->read_domains
) & ~(0x00000002 | 0x00000004 | 0x00000008 | 0x00000010 |
0x00000020)), 0)
) {
1626 drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1627 "target %d offset %d "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1628 "read %08x write %08x",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1629 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1630 (int) reloc->offset,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1631 reloc->read_domains,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
1632 reloc->write_domain)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "reloc with read/write non-GPU domains: "
"target %d offset %d " "read %08x write %08x", reloc->target_handle
, (int) reloc->offset, reloc->read_domains, reloc->write_domain
)
;
1633 return -EINVAL22;
1634 }
1635
1636 if (reloc->write_domain) {
1637 target->flags |= EXEC_OBJECT_WRITE(1<<2);
1638
1639 /*
1640 * Sandybridge PPGTT errata: We need a global gtt mapping
1641 * for MI and pipe_control writes because the gpu doesn't
1642 * properly redirect them through the ppgtt for non_secure
1643 * batchbuffers.
1644 */
1645 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION0x00000010 &&
1646 IS_GEN(eb->i915, 6)(0 + (&(eb->i915)->__info)->gen == (6))) {
1647 err = i915_vma_bind(target->vma,
1648 target->vma->obj->cache_level,
1649 PIN_GLOBAL(1ULL << (10)), NULL((void *)0));
1650 if (err)
1651 return err;
1652 }
1653 }
1654
1655 /*
1656 * If the relocation already has the right value in it, no
1657 * more work needs to be done.
1658 */
1659 if (!DBG_FORCE_RELOC0 &&
1660 gen8_canonical_addr(target->vma->node.start) == reloc->presumed_offset)
1661 return 0;
1662
1663 /* Check that the relocation address is valid... */
1664 if (unlikely(reloc->offset >__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
1665 ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))__builtin_expect(!!(reloc->offset > ev->vma->size
- (eb->reloc_cache.use_64bit_reloc ? 8 : 4)), 0)
) {
1666 drm_dbg(&i915->drm, "Relocation beyond object bounds: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1667 "target %d offset %d size %d.\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1668 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1669 (int)reloc->offset,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
1670 (int)ev->vma->size)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation beyond object bounds: "
"target %d offset %d size %d.\n", reloc->target_handle, (
int)reloc->offset, (int)ev->vma->size)
;
1671 return -EINVAL22;
1672 }
1673 if (unlikely(reloc->offset & 3)__builtin_expect(!!(reloc->offset & 3), 0)) {
1674 drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1675 "target %d offset %d.\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1676 reloc->target_handle,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
1677 (int)reloc->offset)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Relocation not 4-byte aligned: "
"target %d offset %d.\n", reloc->target_handle, (int)reloc
->offset)
;
1678 return -EINVAL22;
1679 }
1680
1681 /*
1682 * If we write into the object, we need to force the synchronisation
1683 * barrier, either with an asynchronous clflush or if we executed the
1684 * patching using the GPU (though that should be serialised by the
1685 * timeline). To be completely sure, and since we are required to
1686 * do relocations we are already stalling, disable the user's opt
1687 * out of our synchronisation.
1688 */
1689 ev->flags &= ~EXEC_OBJECT_ASYNC(1<<6);
1690
1691 /* and update the user's relocation entry */
1692 return relocate_entry(ev->vma, reloc, eb, target->vma);
1693}
1694
1695static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
1696{
1697#define N_RELOC(x)((x) / sizeof(struct drm_i915_gem_relocation_entry)) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1698 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)((512) / sizeof(struct drm_i915_gem_relocation_entry))];
1699 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1700 struct drm_i915_gem_relocation_entry __user *urelocs =
1701 u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1702 unsigned long remain = entry->relocation_count;
1703
1704 if (unlikely(remain > N_RELOC(ULONG_MAX))__builtin_expect(!!(remain > ((0xffffffffffffffffUL) / sizeof
(struct drm_i915_gem_relocation_entry))), 0)
)
1705 return -EINVAL22;
1706
1707 /*
1708 * We must check that the entire relocation array is safe
1709 * to read. However, if the array is not writable the user loses
1710 * the updated relocation values.
1711 */
1712 if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs)))__builtin_expect(!!(!access_ok(urelocs, remain * sizeof(*urelocs
))), 0)
)
1713 return -EFAULT14;
1714
1715 do {
1716 struct drm_i915_gem_relocation_entry *r = stack;
1717 unsigned int count =
1718 min_t(unsigned long, remain, ARRAY_SIZE(stack))({ unsigned long __min_a = (remain); unsigned long __min_b = (
(sizeof((stack)) / sizeof((stack)[0]))); __min_a < __min_b
? __min_a : __min_b; })
;
1719 unsigned int copied;
1720
1721 /*
1722 * This is the fast path and we cannot handle a pagefault
1723 * whilst holding the struct mutex lest the user pass in the
1724 * relocations contained within a mmaped bo. For in such a case
1725 * we, the page fault handler would call i915_gem_fault() and
1726 * we would try to acquire the struct mutex again. Obviously
1727 * this is bad and so lockdep complains vehemently.
1728 */
1729 pagefault_disable();
1730 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1731 pagefault_enable();
1732 if (unlikely(copied)__builtin_expect(!!(copied), 0)) {
1733 remain = -EFAULT14;
1734 goto out;
1735 }
1736
1737 remain -= count;
1738 do {
1739 u64 offset = eb_relocate_entry(eb, ev, r);
1740
1741 if (likely(offset == 0)__builtin_expect(!!(offset == 0), 1)) {
1742 } else if ((s64)offset < 0) {
1743 remain = (int)offset;
1744 goto out;
1745 } else {
1746 /*
1747 * Note that reporting an error now
1748 * leaves everything in an inconsistent
1749 * state as we have *already* changed
1750 * the relocation value inside the
1751 * object. As we have not changed the
1752 * reloc.presumed_offset or will not
1753 * change the execobject.offset, on the
1754 * call we may not rewrite the value
1755 * inside the object, leaving it
1756 * dangling and causing a GPU hang. Unless
1757 * userspace dynamically rebuilds the
1758 * relocations on each execbuf rather than
1759 * presume a static tree.
1760 *
1761 * We did previously check if the relocations
1762 * were writable (access_ok), an error now
1763 * would be a strange race with mprotect,
1764 * having already demonstrated that we
1765 * can read from this userspace address.
1766 */
1767 offset = gen8_canonical_addr(offset & ~UPDATE(1ULL << (7)));
1768 __put_user(offset,({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
1769 &urelocs[r - stack].presumed_offset)({ __typeof(((offset))) __tmp = ((offset)); -copyout(&(__tmp
), (&urelocs[r - stack].presumed_offset), sizeof(__tmp));
})
;
1770 }
1771 } while (r++, --count);
1772 urelocs += ARRAY_SIZE(stack)(sizeof((stack)) / sizeof((stack)[0]));
1773 } while (remain);
1774out:
1775 reloc_cache_reset(&eb->reloc_cache, eb);
1776 return remain;
1777}
1778
1779static int
1780eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
1781{
1782 const struct drm_i915_gem_exec_object2 *entry = ev->exec;
1783 struct drm_i915_gem_relocation_entry *relocs =
1784 u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
1785 unsigned int i;
1786 int err;
1787
1788 for (i = 0; i < entry->relocation_count; i++) {
1789 u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
1790
1791 if ((s64)offset < 0) {
1792 err = (int)offset;
1793 goto err;
1794 }
1795 }
1796 err = 0;
1797err:
1798 reloc_cache_reset(&eb->reloc_cache, eb);
1799 return err;
1800}
1801
1802static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1803{
1804 const char __user *addr, *end;
1805 unsigned long size;
1806 char __maybe_unused__attribute__((__unused__)) c;
1807
1808 size = entry->relocation_count;
1809 if (size == 0)
1810 return 0;
1811
1812 if (size > N_RELOC(ULONG_MAX)((0xffffffffffffffffUL) / sizeof(struct drm_i915_gem_relocation_entry
))
)
1813 return -EINVAL22;
1814
1815 addr = u64_to_user_ptr(entry->relocs_ptr)((void *)(uintptr_t)(entry->relocs_ptr));
1816 size *= sizeof(struct drm_i915_gem_relocation_entry);
1817 if (!access_ok(addr, size))
1818 return -EFAULT14;
1819
1820 end = addr + size;
1821 for (; addr < end; addr += PAGE_SIZE(1 << 12)) {
1822 int err = __get_user(c, addr)-copyin((addr), &((c)), sizeof((c)));
1823 if (err)
1824 return err;
1825 }
1826 return __get_user(c, end - 1)-copyin((end - 1), &((c)), sizeof((c)));
1827}
1828
1829static int eb_copy_relocations(const struct i915_execbuffer *eb)
1830{
1831 struct drm_i915_gem_relocation_entry *relocs;
1832 const unsigned int count = eb->buffer_count;
1833 unsigned int i;
1834 int err;
1835
1836 for (i = 0; i < count; i++) {
1837 const unsigned int nreloc = eb->exec[i].relocation_count;
1838 struct drm_i915_gem_relocation_entry __user *urelocs;
1839 unsigned long size;
1840 unsigned long copied;
1841
1842 if (nreloc == 0)
1843 continue;
1844
1845 err = check_relocations(&eb->exec[i]);
1846 if (err)
1847 goto err;
1848
1849 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr)((void *)(uintptr_t)(eb->exec[i].relocs_ptr));
1850 size = nreloc * sizeof(*relocs);
1851
1852 relocs = kvmalloc_array(size, 1, GFP_KERNEL(0x0001 | 0x0004));
1853 if (!relocs) {
1854 err = -ENOMEM12;
1855 goto err;
1856 }
1857
1858 /* copy_from_user is limited to < 4GiB */
1859 copied = 0;
1860 do {
1861 unsigned int len =
1862 min_t(u64, BIT_ULL(31), size - copied)({ u64 __min_a = ((1ULL << (31))); u64 __min_b = (size -
copied); __min_a < __min_b ? __min_a : __min_b; })
;
1863
1864 if (__copy_from_user((char *)relocs + copied,
1865 (char __user *)urelocs + copied,
1866 len))
1867 goto end;
1868
1869 copied += len;
1870 } while (copied < size);
1871
1872 /*
1873 * As we do not update the known relocation offsets after
1874 * relocating (due to the complexities in lock handling),
1875 * we need to mark them as invalid now so that we force the
1876 * relocation processing next time. Just in case the target
1877 * object is evicted and then rebound into its old
1878 * presumed_offset before the next execbuffer - if that
1879 * happened we would make the mistake of assuming that the
1880 * relocations were valid.
1881 */
1882 if (!user_access_begin(urelocs, size)access_ok(urelocs, size))
1883 goto end;
1884
1885 for (copied = 0; copied < nreloc; copied++)
1886 unsafe_put_user(-1,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1887 &urelocs[copied].presumed_offset,({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
1888 end_user)({ __typeof((-1)) __tmp = (-1); if (copyout(&(__tmp), &
urelocs[copied].presumed_offset, sizeof(__tmp)) != 0) goto end_user
; })
;
1889 user_access_end();
1890
1891 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1892 }
1893
1894 return 0;
1895
1896end_user:
1897 user_access_end();
1898end:
1899 kvfree(relocs);
1900 err = -EFAULT14;
1901err:
1902 while (i--) {
1903 relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(eb->exec[i].relocs_ptr
); })
;
1904 if (eb->exec[i].relocation_count)
1905 kvfree(relocs);
1906 }
1907 return err;
1908}
1909
1910static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1911{
1912 const unsigned int count = eb->buffer_count;
1913 unsigned int i;
1914
1915 for (i = 0; i < count; i++) {
1916 int err;
1917
1918 err = check_relocations(&eb->exec[i]);
1919 if (err)
1920 return err;
1921 }
1922
1923 return 0;
1924}
1925
1926static noinline__attribute__((__noinline__)) int eb_relocate_parse_slow(struct i915_execbuffer *eb,
1927 struct i915_request *rq)
1928{
1929 bool_Bool have_copy = false0;
1930 struct eb_vma *ev;
1931 int err = 0;
1932
1933repeat:
1934 if (signal_pending(current)(((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" :
"=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_curproc)->p_siglist | (({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p
->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_curproc)->p_sigmask)
) {
1935 err = -ERESTARTSYS4;
1936 goto out;
1937 }
1938
1939 /* We may process another execbuffer during the unlock... */
1940 eb_release_vmas(eb, false0);
1941 i915_gem_ww_ctx_fini(&eb->ww);
1942
1943 if (rq) {
1944 /* nonblocking is always false */
1945 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE(1UL << (0)),
1946 MAX_SCHEDULE_TIMEOUT(0x7fffffff)) < 0) {
1947 i915_request_put(rq);
1948 rq = NULL((void *)0);
1949
1950 err = -EINTR4;
1951 goto err_relock;
1952 }
1953
1954 i915_request_put(rq);
1955 rq = NULL((void *)0);
1956 }
1957
1958 /*
1959 * We take 3 passes through the slowpatch.
1960 *
1961 * 1 - we try to just prefault all the user relocation entries and
1962 * then attempt to reuse the atomic pagefault disabled fast path again.
1963 *
1964 * 2 - we copy the user entries to a local buffer here outside of the
1965 * local and allow ourselves to wait upon any rendering before
1966 * relocations
1967 *
1968 * 3 - we already have a local copy of the relocation entries, but
1969 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1970 */
1971 if (!err) {
1972 err = eb_prefault_relocations(eb);
1973 } else if (!have_copy) {
1974 err = eb_copy_relocations(eb);
1975 have_copy = err == 0;
1976 } else {
1977 cond_resched()do { if (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0"
: "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_schedstate.spc_schedflags & 0x0002) yield
(); } while (0)
;
1978 err = 0;
1979 }
1980
1981 if (!err)
1982 flush_workqueue(eb->i915->mm.userptr_wq);
1983
1984err_relock:
1985 i915_gem_ww_ctx_init(&eb->ww, true1);
1986 if (err)
1987 goto out;
1988
1989 /* reacquire the objects */
1990repeat_validate:
1991 rq = eb_pin_engine(eb, false0);
1992 if (IS_ERR(rq)) {
1993 err = PTR_ERR(rq);
1994 rq = NULL((void *)0);
1995 goto err;
1996 }
1997
1998 /* We didn't throttle, should be NULL */
1999 GEM_WARN_ON(rq)({ __builtin_expect(!!(!!(rq)), 0); });
2000
2001 err = eb_validate_vmas(eb);
2002 if (err)
2003 goto err;
2004
2005 GEM_BUG_ON(!eb->batch)((void)0);
2006
2007 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
2008 if (!have_copy) {
2009 pagefault_disable();
2010 err = eb_relocate_vma(eb, ev);
2011 pagefault_enable();
2012 if (err)
2013 break;
2014 } else {
2015 err = eb_relocate_vma_slow(eb, ev);
2016 if (err)
2017 break;
2018 }
2019 }
2020
2021 if (err == -EDEADLK11)
2022 goto err;
2023
2024 if (err && !have_copy)
2025 goto repeat;
2026
2027 if (err)
2028 goto err;
2029
2030 /* as last step, parse the command buffer */
2031 err = eb_parse(eb);
2032 if (err)
2033 goto err;
2034
2035 /*
2036 * Leave the user relocations as are, this is the painfully slow path,
2037 * and we want to avoid the complication of dropping the lock whilst
2038 * having buffers reserved in the aperture and so causing spurious
2039 * ENOSPC for random operations.
2040 */
2041
2042err:
2043 if (err == -EDEADLK11) {
2044 eb_release_vmas(eb, false0);
2045 err = i915_gem_ww_ctx_backoff(&eb->ww);
2046 if (!err)
2047 goto repeat_validate;
2048 }
2049
2050 if (err == -EAGAIN35)
2051 goto repeat;
2052
2053out:
2054 if (have_copy) {
2055 const unsigned int count = eb->buffer_count;
2056 unsigned int i;
2057
2058 for (i = 0; i < count; i++) {
2059 const struct drm_i915_gem_exec_object2 *entry =
2060 &eb->exec[i];
2061 struct drm_i915_gem_relocation_entry *relocs;
2062
2063 if (!entry->relocation_count)
2064 continue;
2065
2066 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr)({ 1; (typeof(*relocs) *)(uintptr_t)(entry->relocs_ptr); }
)
;
2067 kvfree(relocs);
2068 }
2069 }
2070
2071 if (rq)
2072 i915_request_put(rq);
2073
2074 return err;
2075}
2076
2077static int eb_relocate_parse(struct i915_execbuffer *eb)
2078{
2079 int err;
2080 struct i915_request *rq = NULL((void *)0);
2081 bool_Bool throttle = true1;
2082
2083retry:
2084 rq = eb_pin_engine(eb, throttle);
2085 if (IS_ERR(rq)) {
2086 err = PTR_ERR(rq);
2087 rq = NULL((void *)0);
2088 if (err != -EDEADLK11)
2089 return err;
2090
2091 goto err;
2092 }
2093
2094 if (rq) {
2095#ifdef __linux__
2096 bool_Bool nonblock = eb->file->filp->f_flags & O_NONBLOCK0x0004;
2097#else
2098 bool_Bool nonblock = eb->file->filp->f_flag & FNONBLOCK0x0004;
2099#endif
2100
2101 /* Need to drop all locks now for throttling, take slowpath */
2102 err = i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE(1UL << (0)), 0);
2103 if (err == -ETIME60) {
2104 if (nonblock) {
2105 err = -EWOULDBLOCK35;
2106 i915_request_put(rq);
2107 goto err;
2108 }
2109 goto slow;
2110 }
2111 i915_request_put(rq);
2112 rq = NULL((void *)0);
2113 }
2114
2115 /* only throttle once, even if we didn't need to throttle */
2116 throttle = false0;
2117
2118 err = eb_validate_vmas(eb);
2119 if (err == -EAGAIN35)
2120 goto slow;
2121 else if (err)
2122 goto err;
2123
2124 /* The objects are in their final locations, apply the relocations. */
2125 if (eb->args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
2126 struct eb_vma *ev;
2127
2128 list_for_each_entry(ev, &eb->relocs, reloc_link)for (ev = ({ const __typeof( ((__typeof(*ev) *)0)->reloc_link
) *__mptr = ((&eb->relocs)->next); (__typeof(*ev) *
)( (char *)__mptr - __builtin_offsetof(__typeof(*ev), reloc_link
) );}); &ev->reloc_link != (&eb->relocs); ev = (
{ const __typeof( ((__typeof(*ev) *)0)->reloc_link ) *__mptr
= (ev->reloc_link.next); (__typeof(*ev) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*ev), reloc_link) );}))
{
2129 err = eb_relocate_vma(eb, ev);
2130 if (err)
2131 break;
2132 }
2133
2134 if (err == -EDEADLK11)
2135 goto err;
2136 else if (err)
2137 goto slow;
2138 }
2139
2140 if (!err)
2141 err = eb_parse(eb);
2142
2143err:
2144 if (err == -EDEADLK11) {
2145 eb_release_vmas(eb, false0);
2146 err = i915_gem_ww_ctx_backoff(&eb->ww);
2147 if (!err)
2148 goto retry;
2149 }
2150
2151 return err;
2152
2153slow:
2154 err = eb_relocate_parse_slow(eb, rq);
2155 if (err)
2156 /*
2157 * If the user expects the execobject.offset and
2158 * reloc.presumed_offset to be an exact match,
2159 * as for using NO_RELOC, then we cannot update
2160 * the execobject.offset until we have completed
2161 * relocation.
2162 */
2163 eb->args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
2164
2165 return err;
2166}
2167
2168static int eb_move_to_gpu(struct i915_execbuffer *eb)
2169{
2170 const unsigned int count = eb->buffer_count;
2171 unsigned int i = count;
2172 int err = 0;
2173
2174 while (i--) {
2175 struct eb_vma *ev = &eb->vma[i];
2176 struct i915_vma *vma = ev->vma;
2177 unsigned int flags = ev->flags;
2178 struct drm_i915_gem_object *obj = vma->obj;
2179
2180 assert_vma_held(vma)do { (void)(&((vma)->resv)->lock.base); } while(0);
2181
2182 if (flags & EXEC_OBJECT_CAPTURE(1<<7)) {
2183 struct i915_capture_list *capture;
2184
2185 capture = kmalloc(sizeof(*capture), GFP_KERNEL(0x0001 | 0x0004));
2186 if (capture) {
2187 capture->next = eb->request->capture_list;
2188 capture->vma = vma;
2189 eb->request->capture_list = capture;
2190 }
2191 }
2192
2193 /*
2194 * If the GPU is not _reading_ through the CPU cache, we need
2195 * to make sure that any writes (both previous GPU writes from
2196 * before a change in snooping levels and normal CPU writes)
2197 * caught in that cache are flushed to main memory.
2198 *
2199 * We want to say
2200 * obj->cache_dirty &&
2201 * !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
2202 * but gcc's optimiser doesn't handle that as well and emits
2203 * two jumps instead of one. Maybe one day...
2204 */
2205 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)__builtin_expect(!!(obj->cache_dirty & ~obj->cache_coherent
), 0)
) {
2206 if (i915_gem_clflush_object(obj, 0))
2207 flags &= ~EXEC_OBJECT_ASYNC(1<<6);
2208 }
2209
2210 if (err == 0 && !(flags & EXEC_OBJECT_ASYNC(1<<6))) {
2211 err = i915_request_await_object
2212 (eb->request, obj, flags & EXEC_OBJECT_WRITE(1<<2));
2213 }
2214
2215 if (err == 0)
2216 err = i915_vma_move_to_active(vma, eb->request, flags);
2217 }
2218
2219 if (unlikely(err)__builtin_expect(!!(err), 0))
2220 goto err_skip;
2221
2222 /* Unconditionally flush any chipset caches (for streaming writes). */
2223 intel_gt_chipset_flush(eb->engine->gt);
2224 return 0;
2225
2226err_skip:
2227 i915_request_set_error_once(eb->request, err);
2228 return err;
2229}
2230
2231static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
2232{
2233 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS((-((1 << 21) << 1)) | (3<<6) | (1<<15
))
)
2234 return -EINVAL22;
2235
2236 /* Kernel clipping was a DRI1 misfeature */
2237 if (!(exec->flags & (I915_EXEC_FENCE_ARRAY(1<<19) |
2238 I915_EXEC_USE_EXTENSIONS(1 << 21)))) {
2239 if (exec->num_cliprects || exec->cliprects_ptr)
2240 return -EINVAL22;
2241 }
2242
2243 if (exec->DR4 == 0xffffffff) {
2244 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n")__drm_dbg(DRM_UT_CORE, "UXA submitting garbage DR4, fixing up\n"
)
;
2245 exec->DR4 = 0;
2246 }
2247 if (exec->DR1 || exec->DR4)
2248 return -EINVAL22;
2249
2250 if ((exec->batch_start_offset | exec->batch_len) & 0x7)
2251 return -EINVAL22;
2252
2253 return 0;
2254}
2255
2256static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
2257{
2258 u32 *cs;
2259 int i;
2260
2261 if (!IS_GEN(rq->engine->i915, 7)(0 + (&(rq->engine->i915)->__info)->gen == (7
))
|| rq->engine->id != RCS0) {
2262 drm_dbg(&rq->engine->i915->drm, "sol reset is gen7/rcs only\n")drm_dev_dbg((&rq->engine->i915->drm)->dev, DRM_UT_DRIVER
, "sol reset is gen7/rcs only\n")
;
2263 return -EINVAL22;
2264 }
2265
2266 cs = intel_ring_begin(rq, 4 * 2 + 2);
2267 if (IS_ERR(cs))
2268 return PTR_ERR(cs);
2269
2270 *cs++ = MI_LOAD_REGISTER_IMM(4)(((0x22) << 23) | (2*(4)-1));
2271 for (i = 0; i < 4; i++) {
2272 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)((const i915_reg_t){ .reg = (0x5280 + (i) * 4) }));
2273 *cs++ = 0;
2274 }
2275 *cs++ = MI_NOOP(((0) << 23) | (0));
2276 intel_ring_advance(rq, cs);
2277
2278 return 0;
2279}
2280
2281static struct i915_vma *
2282shadow_batch_pin(struct i915_execbuffer *eb,
2283 struct drm_i915_gem_object *obj,
2284 struct i915_address_space *vm,
2285 unsigned int flags)
2286{
2287 struct i915_vma *vma;
2288 int err;
2289
2290 vma = i915_vma_instance(obj, vm, NULL((void *)0));
2291 if (IS_ERR(vma))
2292 return vma;
2293
2294 err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
2295 if (err)
2296 return ERR_PTR(err);
2297
2298 return vma;
2299}
2300
2301static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
2302{
2303 /*
2304 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2305 * batch" bit. Hence we need to pin secure batches into the global gtt.
2306 * hsw should have this fixed, but bdw mucks it up again. */
2307 if (eb->batch_flags & I915_DISPATCH_SECURE(1UL << (0)))
2308 return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL((void *)0), 0, 0, 0);
2309
2310 return NULL((void *)0);
2311}
2312
2313static int eb_parse(struct i915_execbuffer *eb)
2314{
2315 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2316 struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
2317 struct i915_vma *shadow, *trampoline, *batch;
2318 unsigned long len;
2319 int err;
2320
2321 if (!eb_use_cmdparser(eb)) {
2322 batch = eb_dispatch_secure(eb, eb->batch->vma);
2323 if (IS_ERR(batch))
2324 return PTR_ERR(batch);
2325
2326 goto secure_batch;
2327 }
2328
2329 len = eb->batch_len;
2330 if (!CMDPARSER_USES_GGTT(eb->i915)(0 + (&(eb->i915)->__info)->gen == (7))) {
2331 /*
2332 * ppGTT backed shadow buffers must be mapped RO, to prevent
2333 * post-scan tampering
2334 */
2335 if (!eb->context->vm->has_read_only) {
2336 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
2337 "Cannot prevent post-scan tampering without RO capable vm\n")drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Cannot prevent post-scan tampering without RO capable vm\n"
)
;
2338 return -EINVAL22;
2339 }
2340 } else {
2341 len += I915_CMD_PARSER_TRAMPOLINE_SIZE8;
2342 }
2343 if (unlikely(len < eb->batch_len)__builtin_expect(!!(len < eb->batch_len), 0)) /* last paranoid check of overflow */
2344 return -EINVAL22;
2345
2346 if (!pool) {
2347 pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
2348 if (IS_ERR(pool))
2349 return PTR_ERR(pool);
2350 eb->batch_pool = pool;
2351 }
2352
2353 err = i915_gem_object_lock(pool->obj, &eb->ww);
2354 if (err)
2355 goto err;
2356
2357 shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER(1ULL << (11)));
2358 if (IS_ERR(shadow)) {
2359 err = PTR_ERR(shadow);
2360 goto err;
2361 }
2362 i915_gem_object_set_readonly(shadow->obj);
2363 shadow->private = pool;
2364
2365 trampoline = NULL((void *)0);
2366 if (CMDPARSER_USES_GGTT(eb->i915)(0 + (&(eb->i915)->__info)->gen == (7))) {
2367 trampoline = shadow;
2368
2369 shadow = shadow_batch_pin(eb, pool->obj,
2370 &eb->engine->gt->ggtt->vm,
2371 PIN_GLOBAL(1ULL << (10)));
2372 if (IS_ERR(shadow)) {
2373 err = PTR_ERR(shadow);
2374 shadow = trampoline;
2375 goto err_shadow;
2376 }
2377 shadow->private = pool;
2378
2379 eb->batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
2380 }
2381
2382 batch = eb_dispatch_secure(eb, shadow);
2383 if (IS_ERR(batch)) {
2384 err = PTR_ERR(batch);
2385 goto err_trampoline;
2386 }
2387
2388 err = intel_engine_cmd_parser(eb->engine,
2389 eb->batch->vma,
2390 eb->batch_start_offset,
2391 eb->batch_len,
2392 shadow, trampoline);
2393 if (err)
2394 goto err_unpin_batch;
2395
2396 eb->batch = &eb->vma[eb->buffer_count++];
2397 eb->batch->vma = i915_vma_get(shadow);
2398 eb->batch->flags = __EXEC_OBJECT_HAS_PIN(1UL << (31));
2399
2400 eb->trampoline = trampoline;
2401 eb->batch_start_offset = 0;
2402
2403secure_batch:
2404 if (batch) {
2405 eb->batch = &eb->vma[eb->buffer_count++];
2406 eb->batch->flags = __EXEC_OBJECT_HAS_PIN(1UL << (31));
2407 eb->batch->vma = i915_vma_get(batch);
2408 }
2409 return 0;
2410
2411err_unpin_batch:
2412 if (batch)
2413 i915_vma_unpin(batch);
2414err_trampoline:
2415 if (trampoline)
2416 i915_vma_unpin(trampoline);
2417err_shadow:
2418 i915_vma_unpin(shadow);
2419err:
2420 return err;
2421}
2422
2423static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
2424{
2425 int err;
2426
2427 err = eb_move_to_gpu(eb);
2428 if (err)
2429 return err;
2430
2431 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET(1<<8)) {
2432 err = i915_reset_gen7_sol_offsets(eb->request);
2433 if (err)
2434 return err;
2435 }
2436
2437 /*
2438 * After we completed waiting for other engines (using HW semaphores)
2439 * then we can signal that this request/batch is ready to run. This
2440 * allows us to determine if the batch is still waiting on the GPU
2441 * or actually running by checking the breadcrumb.
2442 */
2443 if (eb->engine->emit_init_breadcrumb) {
2444 err = eb->engine->emit_init_breadcrumb(eb->request);
2445 if (err)
2446 return err;
2447 }
2448
2449 err = eb->engine->emit_bb_start(eb->request,
2450 batch->node.start +
2451 eb->batch_start_offset,
2452 eb->batch_len,
2453 eb->batch_flags);
2454 if (err)
2455 return err;
2456
2457 if (eb->trampoline) {
2458 GEM_BUG_ON(eb->batch_start_offset)((void)0);
2459 err = eb->engine->emit_bb_start(eb->request,
2460 eb->trampoline->node.start +
2461 eb->batch_len,
2462 0, 0);
2463 if (err)
2464 return err;
2465 }
2466
2467 if (intel_context_nopreempt(eb->context))
2468 __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
2469
2470 return 0;
2471}
2472
2473static int num_vcs_engines(const struct drm_i915_privateinteldrm_softc *i915)
2474{
2475 return hweight64(VDBOX_MASK(&i915->gt)({ unsigned int first__ = (VCS0); unsigned int count__ = (4);
((&i915->gt)->info.engine_mask & (((~0UL) >>
(64 - (first__ + count__ - 1) - 1)) & ((~0UL) << (
first__)))) >> first__; })
);
2476}
2477
2478/*
2479 * Find one BSD ring to dispatch the corresponding BSD command.
2480 * The engine index is returned.
2481 */
2482static unsigned int
2483gen8_dispatch_bsd_engine(struct drm_i915_privateinteldrm_softc *dev_priv,
2484 struct drm_file *file)
2485{
2486 struct drm_i915_file_private *file_priv = file->driver_priv;
2487
2488 /* Check whether the file_priv has already selected one ring. */
2489 if ((int)file_priv->bsd_engine < 0)
2490 file_priv->bsd_engine =
2491 get_random_int()arc4random() % num_vcs_engines(dev_priv);
2492
2493 return file_priv->bsd_engine;
2494}
2495
2496static const enum intel_engine_id user_ring_map[] = {
2497 [I915_EXEC_DEFAULT(0<<0)] = RCS0,
2498 [I915_EXEC_RENDER(1<<0)] = RCS0,
2499 [I915_EXEC_BLT(3<<0)] = BCS0,
2500 [I915_EXEC_BSD(2<<0)] = VCS0,
2501 [I915_EXEC_VEBOX(4<<0)] = VECS0
2502};
2503
2504static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
2505{
2506 struct intel_ring *ring = ce->ring;
2507 struct intel_timeline *tl = ce->timeline;
2508 struct i915_request *rq;
2509
2510 /*
2511 * Completely unscientific finger-in-the-air estimates for suitable
2512 * maximum user request size (to avoid blocking) and then backoff.
2513 */
2514 if (intel_ring_update_space(ring) >= PAGE_SIZE(1 << 12))
2515 return NULL((void *)0);
2516
2517 /*
2518 * Find a request that after waiting upon, there will be at least half
2519 * the ring available. The hysteresis allows us to compete for the
2520 * shared ring and should mean that we sleep less often prior to
2521 * claiming our resources, but not so long that the ring completely
2522 * drains before we can submit our next request.
2523 */
2524 list_for_each_entry(rq, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}); &rq->link != (&tl->requests); rq = ({ const
__typeof( ((__typeof(*rq) *)0)->link ) *__mptr = (rq->
link.next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), link) );}))
{
2525 if (rq->ring != ring)
2526 continue;
2527
2528 if (__intel_ring_space(rq->postfix,
2529 ring->emit, ring->size) > ring->size / 2)
2530 break;
2531 }
2532 if (&rq->link == &tl->requests)
2533 return NULL((void *)0); /* weird, we will check again later for real */
2534
2535 return i915_request_get(rq);
2536}
2537
2538static struct i915_request *eb_pin_engine(struct i915_execbuffer *eb, bool_Bool throttle)
2539{
2540 struct intel_context *ce = eb->context;
2541 struct intel_timeline *tl;
2542 struct i915_request *rq = NULL((void *)0);
2543 int err;
2544
2545 GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED)((void)0);
2546
2547 if (unlikely(intel_context_is_banned(ce))__builtin_expect(!!(intel_context_is_banned(ce)), 0))
2548 return ERR_PTR(-EIO5);
2549
2550 /*
2551 * Pinning the contexts may generate requests in order to acquire
2552 * GGTT space, so do this first before we reserve a seqno for
2553 * ourselves.
2554 */
2555 err = intel_context_pin_ww(ce, &eb->ww);
2556 if (err)
2557 return ERR_PTR(err);
2558
2559 /*
2560 * Take a local wakeref for preparing to dispatch the execbuf as
2561 * we expect to access the hardware fairly frequently in the
2562 * process, and require the engine to be kept awake between accesses.
2563 * Upon dispatch, we acquire another prolonged wakeref that we hold
2564 * until the timeline is idle, which in turn releases the wakeref
2565 * taken on the engine, and the parent device.
2566 */
2567 tl = intel_context_timeline_lock(ce);
2568 if (IS_ERR(tl)) {
2569 intel_context_unpin(ce);
2570 return ERR_CAST(tl);
2571 }
2572
2573 intel_context_enter(ce);
2574 if (throttle)
2575 rq = eb_throttle(eb, ce);
2576 intel_context_timeline_unlock(tl);
2577
2578 eb->args->flags |= __EXEC_ENGINE_PINNED(1UL << (30));
2579 return rq;
2580}
2581
2582static void eb_unpin_engine(struct i915_execbuffer *eb)
2583{
2584 struct intel_context *ce = eb->context;
2585 struct intel_timeline *tl = ce->timeline;
2586
2587 if (!(eb->args->flags & __EXEC_ENGINE_PINNED(1UL << (30))))
2588 return;
2589
2590 eb->args->flags &= ~__EXEC_ENGINE_PINNED(1UL << (30));
2591
2592 mutex_lock(&tl->mutex)rw_enter_write(&tl->mutex);
2593 intel_context_exit(ce);
2594 mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
2595
2596 intel_context_unpin(ce);
2597}
2598
2599static unsigned int
2600eb_select_legacy_ring(struct i915_execbuffer *eb)
2601{
2602 struct drm_i915_privateinteldrm_softc *i915 = eb->i915;
2603 struct drm_i915_gem_execbuffer2 *args = eb->args;
2604 unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK(0x3f);
2605
2606 if (user_ring_id != I915_EXEC_BSD(2<<0) &&
2607 (args->flags & I915_EXEC_BSD_MASK(3 << (13)))) {
2608 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2609 "execbuf with non bsd ring but with invalid "drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
2610 "bsd dispatch flags: %d\n", (int)(args->flags))drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with non bsd ring but with invalid "
"bsd dispatch flags: %d\n", (int)(args->flags))
;
2611 return -1;
2612 }
2613
2614 if (user_ring_id == I915_EXEC_BSD(2<<0) && num_vcs_engines(i915) > 1) {
2615 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK(3 << (13));
2616
2617 if (bsd_idx == I915_EXEC_BSD_DEFAULT(0 << (13))) {
2618 bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
2619 } else if (bsd_idx >= I915_EXEC_BSD_RING1(1 << (13)) &&
2620 bsd_idx <= I915_EXEC_BSD_RING2(2 << (13))) {
2621 bsd_idx >>= I915_EXEC_BSD_SHIFT(13);
2622 bsd_idx--;
2623 } else {
2624 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2625 "execbuf with unknown bsd ring: %u\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
2626 bsd_idx)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown bsd ring: %u\n"
, bsd_idx)
;
2627 return -1;
2628 }
2629
2630 return _VCS(bsd_idx)(VCS0 + (bsd_idx));
2631 }
2632
2633 if (user_ring_id >= ARRAY_SIZE(user_ring_map)(sizeof((user_ring_map)) / sizeof((user_ring_map)[0]))) {
2634 drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
2635 user_ring_id)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf with unknown ring: %u\n"
, user_ring_id)
;
2636 return -1;
2637 }
2638
2639 return user_ring_map[user_ring_id];
2640}
2641
2642static int
2643eb_select_engine(struct i915_execbuffer *eb)
2644{
2645 struct intel_context *ce;
2646 unsigned int idx;
2647 int err;
2648
2649 if (i915_gem_context_user_engines(eb->gem_context))
2650 idx = eb->args->flags & I915_EXEC_RING_MASK(0x3f);
2651 else
2652 idx = eb_select_legacy_ring(eb);
2653
2654 ce = i915_gem_context_get_engine(eb->gem_context, idx);
2655 if (IS_ERR(ce))
2656 return PTR_ERR(ce);
2657
2658 intel_gt_pm_get(ce->engine->gt);
2659
2660 if (!test_bit(CONTEXT_ALLOC_BIT1, &ce->flags)) {
2661 err = intel_context_alloc_state(ce);
2662 if (err)
2663 goto err;
2664 }
2665
2666 /*
2667 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
2668 * EIO if the GPU is already wedged.
2669 */
2670 err = intel_gt_terminally_wedged(ce->engine->gt);
2671 if (err)
2672 goto err;
2673
2674 eb->context = ce;
2675 eb->engine = ce->engine;
2676
2677 /*
2678 * Make sure engine pool stays alive even if we call intel_context_put
2679 * during ww handling. The pool is destroyed when last pm reference
2680 * is dropped, which breaks our -EDEADLK handling.
2681 */
2682 return err;
2683
2684err:
2685 intel_gt_pm_put(ce->engine->gt);
2686 intel_context_put(ce);
2687 return err;
2688}
2689
2690static void
2691eb_put_engine(struct i915_execbuffer *eb)
2692{
2693 intel_gt_pm_put(eb->engine->gt);
2694 intel_context_put(eb->context);
2695}
2696
2697static void
2698__free_fence_array(struct eb_fence *fences, unsigned int n)
2699{
2700 while (n--) {
2701 drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)({ unsigned long __v = (unsigned long)(fences[n].syncobj); (typeof
(fences[n].syncobj))(__v & -(1UL << (2))); })
);
2702 dma_fence_put(fences[n].dma_fence);
2703 kfree(fences[n].chain_fence);
2704 }
2705 kvfree(fences);
2706}
2707
2708static int
2709add_timeline_fence_array(struct i915_execbuffer *eb,
2710 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
2711{
2712 struct drm_i915_gem_exec_fence __user *user_fences;
2713 u64 __user *user_values;
2714 struct eb_fence *f;
2715 u64 nfences;
2716 int err = 0;
2717
2718 nfences = timeline_fences->fence_count;
2719 if (!nfences)
2720 return 0;
2721
2722 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2723 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2724 if (nfences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2725 ULONG_MAX / sizeof(*user_fences),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
2726 SIZE_MAX / sizeof(*f))({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user_fences
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
)); __min_a < __min_b ? __min_a : __min_b; })
- eb->num_fences)
2727 return -EINVAL22;
2728
2729 user_fences = u64_to_user_ptr(timeline_fences->handles_ptr)((void *)(uintptr_t)(timeline_fences->handles_ptr));
2730 if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
2731 return -EFAULT14;
2732
2733 user_values = u64_to_user_ptr(timeline_fences->values_ptr)((void *)(uintptr_t)(timeline_fences->values_ptr));
2734 if (!access_ok(user_values, nfences * sizeof(*user_values)))
2735 return -EFAULT14;
2736
2737#ifdef __linux__
2738 f = krealloc(eb->fences,
2739 (eb->num_fences + nfences) * sizeof(*f),
2740 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2741 if (!f)
2742 return -ENOMEM12;
2743#else
2744 f = kmalloc((eb->num_fences + nfences) * sizeof(*f),
2745 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2746 if (!f)
2747 return -ENOMEM12;
2748 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2749 kfree(eb->fences);
2750#endif
2751
2752 eb->fences = f;
2753 f += eb->num_fences;
2754
2755#ifdef notyet
2756 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
2757 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
;
2758#endif
2759
2760 while (nfences--) {
2761 struct drm_i915_gem_exec_fence user_fence;
2762 struct drm_syncobj *syncobj;
2763 struct dma_fence *fence = NULL((void *)0);
2764 u64 point;
2765
2766 if (__copy_from_user(&user_fence,
2767 user_fences++,
2768 sizeof(user_fence)))
2769 return -EFAULT14;
2770
2771 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
2772 return -EINVAL22;
2773
2774 if (__get_user(point, user_values++)-copyin((user_values++), &((point)), sizeof((point))))
2775 return -EFAULT14;
2776
2777 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2778 if (!syncobj) {
2779 DRM_DEBUG("Invalid syncobj handle provided\n")__drm_dbg(DRM_UT_CORE, "Invalid syncobj handle provided\n");
2780 return -ENOENT2;
2781 }
2782
2783 fence = drm_syncobj_fence_get(syncobj);
2784
2785 if (!fence && user_fence.flags &&
2786 !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2787 DRM_DEBUG("Syncobj handle has no fence\n")__drm_dbg(DRM_UT_CORE, "Syncobj handle has no fence\n");
2788 drm_syncobj_put(syncobj);
2789 return -EINVAL22;
2790 }
2791
2792 if (fence)
2793 err = dma_fence_chain_find_seqno(&fence, point);
2794
2795 if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2796 DRM_DEBUG("Syncobj handle missing requested point %llu\n", point)__drm_dbg(DRM_UT_CORE, "Syncobj handle missing requested point %llu\n"
, point)
;
2797 dma_fence_put(fence);
2798 drm_syncobj_put(syncobj);
2799 return err;
2800 }
2801
2802 /*
2803 * A point might have been signaled already and
2804 * garbage collected from the timeline. In this case
2805 * just ignore the point and carry on.
2806 */
2807 if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1))) {
2808 drm_syncobj_put(syncobj);
2809 continue;
2810 }
2811
2812 /*
2813 * For timeline syncobjs we need to preallocate chains for
2814 * later signaling.
2815 */
2816 if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL(1<<1)) {
2817 /*
2818 * Waiting and signaling the same point (when point !=
2819 * 0) would break the timeline.
2820 */
2821 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
2822 DRM_DEBUG("Trying to wait & signal the same timeline point.\n")__drm_dbg(DRM_UT_CORE, "Trying to wait & signal the same timeline point.\n"
)
;
2823 dma_fence_put(fence);
2824 drm_syncobj_put(syncobj);
2825 return -EINVAL22;
2826 }
2827
2828 f->chain_fence =
2829 kmalloc(sizeof(*f->chain_fence),
2830 GFP_KERNEL(0x0001 | 0x0004));
2831 if (!f->chain_fence) {
2832 drm_syncobj_put(syncobj);
2833 dma_fence_put(fence);
2834 return -ENOMEM12;
2835 }
2836 } else {
2837 f->chain_fence = NULL((void *)0);
2838 }
2839
2840 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
2841 f->dma_fence = fence;
2842 f->value = point;
2843 f++;
2844 eb->num_fences++;
2845 }
2846
2847 return 0;
2848}
2849
2850static int add_fence_array(struct i915_execbuffer *eb)
2851{
2852 struct drm_i915_gem_execbuffer2 *args = eb->args;
2853 struct drm_i915_gem_exec_fence __user *user;
2854 unsigned long num_fences = args->num_cliprects;
2855 struct eb_fence *f;
2856
2857 if (!(args->flags & I915_EXEC_FENCE_ARRAY(1<<19)))
16
Assuming the condition is false
17
Taking false branch
2858 return 0;
2859
2860 if (!num_fences)
18
Assuming 'num_fences' is not equal to 0
19
Taking false branch
2861 return 0;
2862
2863 /* Check multiplication overflow for access_ok() and kvmalloc_array() */
2864 BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long))extern char _ctassert[(!(sizeof(size_t) > sizeof(unsigned long
))) ? 1 : -1 ] __attribute__((__unused__))
;
2865 if (num_fences > min_t(unsigned long,({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
20
Assuming '__min_a' is >= '__min_b'
21
'?' condition is false
22
Assuming the condition is false
23
Taking false branch
2866 ULONG_MAX / sizeof(*user),({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
2867 SIZE_MAX / sizeof(*f) - eb->num_fences)({ unsigned long __min_a = (0xffffffffffffffffUL / sizeof(*user
)); unsigned long __min_b = (0xffffffffffffffffUL / sizeof(*f
) - eb->num_fences); __min_a < __min_b ? __min_a : __min_b
; })
)
2868 return -EINVAL22;
2869
2870 user = u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr));
2871 if (!access_ok(user, num_fences * sizeof(*user)))
24
Taking false branch
2872 return -EFAULT14;
2873
2874#ifdef __linux__
2875 f = krealloc(eb->fences,
2876 (eb->num_fences + num_fences) * sizeof(*f),
2877 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2878 if (!f)
2879 return -ENOMEM12;
2880#else
2881 f = kmalloc((eb->num_fences + num_fences) * sizeof(*f),
25
Calling 'kmalloc'
27
Returned allocated memory
2882 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
2883 if (!f)
28
Assuming 'f' is non-null
29
Taking false branch
2884 return -ENOMEM12;
2885 memcpy(f, eb->fences, eb->num_fences * sizeof(*f))__builtin_memcpy((f), (eb->fences), (eb->num_fences * sizeof
(*f)))
;
2886 kfree(eb->fences);
2887#endif
2888
2889 eb->fences = f;
2890 f += eb->num_fences;
2891 while (num_fences--) {
30
Loop condition is true. Entering loop body
38
Loop condition is false. Execution continues on line 2930
2892 struct drm_i915_gem_exec_fence user_fence;
2893 struct drm_syncobj *syncobj;
2894 struct dma_fence *fence = NULL((void *)0);
2895
2896 if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
31
Taking false branch
2897 return -EFAULT14;
2898
2899 if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS(-((1<<1) << 1)))
32
Assuming the condition is false
33
Taking false branch
2900 return -EINVAL22;
2901
2902 syncobj = drm_syncobj_find(eb->file, user_fence.handle);
2903 if (!syncobj) {
34
Assuming 'syncobj' is non-null
35
Taking false branch
2904 DRM_DEBUG("Invalid syncobj handle provided\n")__drm_dbg(DRM_UT_CORE, "Invalid syncobj handle provided\n");
2905 return -ENOENT2;
2906 }
2907
2908 if (user_fence.flags & I915_EXEC_FENCE_WAIT(1<<0)) {
36
Assuming the condition is false
37
Taking false branch
2909 fence = drm_syncobj_fence_get(syncobj);
2910 if (!fence) {
2911 DRM_DEBUG("Syncobj handle has no fence\n")__drm_dbg(DRM_UT_CORE, "Syncobj handle has no fence\n");
2912 drm_syncobj_put(syncobj);
2913 return -EINVAL22;
2914 }
2915 }
2916
2917#ifdef notyet
2918 BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
2919 ~__I915_EXEC_FENCE_UNKNOWN_FLAGS)extern char _ctassert[(!(~(ARCH_KMALLOC_MINALIGN - 1) & ~
(-((1<<1) << 1)))) ? 1 : -1 ] __attribute__((__unused__
))
;
2920#endif
2921
2922 f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2)({ unsigned long __bits = (user_fence.flags); ((void)0); ((typeof
(syncobj))((unsigned long)(syncobj) | __bits)); })
;
2923 f->dma_fence = fence;
2924 f->value = 0;
2925 f->chain_fence = NULL((void *)0);
2926 f++;
2927 eb->num_fences++;
2928 }
2929
2930 return 0;
2931}
2932
2933static void put_fence_array(struct eb_fence *fences, int num_fences)
2934{
2935 if (fences)
2936 __free_fence_array(fences, num_fences);
2937}
2938
2939static int
2940await_fence_array(struct i915_execbuffer *eb)
2941{
2942 unsigned int n;
2943 int err;
2944
2945 for (n = 0; n < eb->num_fences; n++) {
2946 struct drm_syncobj *syncobj;
2947 unsigned int flags;
2948
2949 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
2950
2951 if (!eb->fences[n].dma_fence)
2952 continue;
2953
2954 err = i915_request_await_dma_fence(eb->request,
2955 eb->fences[n].dma_fence);
2956 if (err < 0)
2957 return err;
2958 }
2959
2960 return 0;
2961}
2962
2963static void signal_fence_array(const struct i915_execbuffer *eb)
2964{
2965 struct dma_fence * const fence = &eb->request->fence;
2966 unsigned int n;
2967
2968 for (n = 0; n < eb->num_fences; n++) {
2969 struct drm_syncobj *syncobj;
2970 unsigned int flags;
2971
2972 syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2)({ unsigned long __v = (unsigned long)(eb->fences[n].syncobj
); *(&flags) = __v & ((1UL << (2)) - 1); (typeof
(eb->fences[n].syncobj))(__v & -(1UL << (2))); }
)
;
2973 if (!(flags & I915_EXEC_FENCE_SIGNAL(1<<1)))
2974 continue;
2975
2976 if (eb->fences[n].chain_fence) {
2977 drm_syncobj_add_point(syncobj,
2978 eb->fences[n].chain_fence,
2979 fence,
2980 eb->fences[n].value);
2981 /*
2982 * The chain's ownership is transferred to the
2983 * timeline.
2984 */
2985 eb->fences[n].chain_fence = NULL((void *)0);
2986 } else {
2987 drm_syncobj_replace_fence(syncobj, fence);
2988 }
2989 }
2990}
2991
2992static int
2993parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
2994{
2995 struct i915_execbuffer *eb = data;
2996 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
2997
2998 if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
2999 return -EFAULT14;
3000
3001 return add_timeline_fence_array(eb, &timeline_fences);
3002}
3003
3004static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
3005{
3006 struct i915_request *rq, *rn;
3007
3008 list_for_each_entry_safe(rq, rn, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = ((&tl->requests)->next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link) );
}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = (rq->link.next); (__typeof(*rq) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*rq), link) );}); &rq->
link != (&tl->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->link ) *__mptr = (rn->link.next
); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rn), link) );}))
3009 if (rq == end || !i915_request_retire(rq))
3010 break;
3011}
3012
3013static int eb_request_add(struct i915_execbuffer *eb, int err)
3014{
3015 struct i915_request *rq = eb->request;
3016 struct intel_timeline * const tl = i915_request_timeline(rq);
3017 struct i915_sched_attr attr = {};
3018 struct i915_request *prev;
3019
3020 lockdep_assert_held(&tl->mutex)do { (void)(&tl->mutex); } while(0);
3021 lockdep_unpin_lock(&tl->mutex, rq->cookie);
3022
3023 trace_i915_request_add(rq);
3024
3025 prev = __i915_request_commit(rq);
3026
3027 /* Check that the context wasn't destroyed before submission */
3028 if (likely(!intel_context_is_closed(eb->context))__builtin_expect(!!(!intel_context_is_closed(eb->context))
, 1)
) {
3029 attr = eb->gem_context->sched;
3030 } else {
3031 /* Serialise with context_close via the add_to_timeline */
3032 i915_request_set_error_once(rq, -ENOENT2);
3033 __i915_request_skip(rq);
3034 err = -ENOENT2; /* override any transient errors */
3035 }
3036
3037 __i915_request_queue(rq, &attr);
3038
3039 /* Try to clean up the client's timeline after submitting the request */
3040 if (prev)
3041 retire_requests(tl, prev);
3042
3043 mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
3044
3045 return err;
3046}
3047
3048static const i915_user_extension_fn execbuf_extensions[] = {
3049 [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES0] = parse_timeline_fences,
3050};
3051
3052static int
3053parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
3054 struct i915_execbuffer *eb)
3055{
3056 if (!(args->flags & I915_EXEC_USE_EXTENSIONS(1 << 21)))
3057 return 0;
3058
3059 /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
3060 * have another flag also using it at the same time.
3061 */
3062 if (eb->args->flags & I915_EXEC_FENCE_ARRAY(1<<19))
3063 return -EINVAL22;
3064
3065 if (args->num_cliprects != 0)
3066 return -EINVAL22;
3067
3068 return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr)((void *)(uintptr_t)(args->cliprects_ptr)),
3069 execbuf_extensions,
3070 ARRAY_SIZE(execbuf_extensions)(sizeof((execbuf_extensions)) / sizeof((execbuf_extensions)[0
]))
,
3071 eb);
3072}
3073
3074static int
3075i915_gem_do_execbuffer(struct drm_device *dev,
3076 struct drm_file *file,
3077 struct drm_i915_gem_execbuffer2 *args,
3078 struct drm_i915_gem_exec_object2 *exec)
3079{
3080 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3081 struct i915_execbuffer eb;
3082 struct dma_fence *in_fence = NULL((void *)0);
3083 struct sync_file *out_fence = NULL((void *)0);
3084 struct i915_vma *batch;
3085 int out_fence_fd = -1;
3086 int err;
3087
3088 BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS)extern char _ctassert[(!((~0u << 30) & ~((-((1 <<
21) << 1)) | (3<<6) | (1<<15)))) ? 1 : -1 ]
__attribute__((__unused__))
;
3089 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &extern char _ctassert[(!((~0u << 28) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
3090 ~__EXEC_OBJECT_UNKNOWN_FLAGS)extern char _ctassert[(!((~0u << 28) & ~-((1<<
7)<<1))) ? 1 : -1 ] __attribute__((__unused__))
;
3091
3092 eb.i915 = i915;
3093 eb.file = file;
3094 eb.args = args;
3095 if (DBG_FORCE_RELOC0 || !(args->flags & I915_EXEC_NO_RELOC(1<<11)))
7
Assuming the condition is false
8
Taking false branch
3096 args->flags |= __EXEC_HAS_RELOC(1UL << (31));
3097
3098 eb.exec = exec;
3099 eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
3100 eb.vma[0].vma = NULL((void *)0);
3101 eb.reloc_pool = eb.batch_pool = NULL((void *)0);
3102 eb.reloc_context = NULL((void *)0);
3103
3104 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS-((1<<7)<<1);
3105 reloc_cache_init(&eb.reloc_cache, eb.i915);
3106
3107 eb.buffer_count = args->buffer_count;
3108 eb.batch_start_offset = args->batch_start_offset;
3109 eb.batch_len = args->batch_len;
3110 eb.trampoline = NULL((void *)0);
3111
3112 eb.fences = NULL((void *)0);
3113 eb.num_fences = 0;
3114
3115 eb.batch_flags = 0;
3116 if (args->flags & I915_EXEC_SECURE(1<<9)) {
9
Assuming the condition is false
10
Taking false branch
3117 if (INTEL_GEN(i915)((&(i915)->__info)->gen) >= 11)
3118 return -ENODEV19;
3119
3120 /* Return -EPERM to trigger fallback code on old binaries. */
3121 if (!HAS_SECURE_BATCHES(i915)(((&(i915)->__info)->gen) < 6))
3122 return -EPERM1;
3123
3124 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN0x1))
3125 return -EPERM1;
3126
3127 eb.batch_flags |= I915_DISPATCH_SECURE(1UL << (0));
3128 }
3129 if (args->flags & I915_EXEC_IS_PINNED(1<<10))
11
Assuming the condition is false
12
Taking false branch
3130 eb.batch_flags |= I915_DISPATCH_PINNED(1UL << (1));
3131
3132 err = parse_execbuf2_extensions(args, &eb);
3133 if (err)
13
Assuming 'err' is 0
14
Taking false branch
3134 goto err_ext;
3135
3136 err = add_fence_array(&eb);
15
Calling 'add_fence_array'
39
Returned allocated memory
3137 if (err
39.1
'err' is 0
39.1
'err' is 0
)
40
Taking false branch
3138 goto err_ext;
3139
3140#define IN_FENCES (I915_EXEC_FENCE_IN(1<<16) | I915_EXEC_FENCE_SUBMIT(1 << 20))
3141 if (args->flags & IN_FENCES) {
41
Assuming the condition is true
42
Taking true branch
3142 if ((args->flags & IN_FENCES) == IN_FENCES)
43
Assuming the condition is true
44
Taking true branch
3143 return -EINVAL22;
45
Potential leak of memory pointed to by 'eb.fences'
3144
3145 in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)((u32)(args->rsvd2)));
3146 if (!in_fence) {
3147 err = -EINVAL22;
3148 goto err_ext;
3149 }
3150 }
3151#undef IN_FENCES
3152
3153 if (args->flags & I915_EXEC_FENCE_OUT(1<<17)) {
3154 out_fence_fd = get_unused_fd_flags(O_CLOEXEC0x10000);
3155 if (out_fence_fd < 0) {
3156 err = out_fence_fd;
3157 goto err_in_fence;
3158 }
3159 }
3160
3161 err = eb_create(&eb);
3162 if (err)
3163 goto err_out_fence;
3164
3165 GEM_BUG_ON(!eb.lut_size)((void)0);
3166
3167 err = eb_select_context(&eb);
3168 if (unlikely(err)__builtin_expect(!!(err), 0))
3169 goto err_destroy;
3170
3171 err = eb_select_engine(&eb);
3172 if (unlikely(err)__builtin_expect(!!(err), 0))
3173 goto err_context;
3174
3175 err = eb_lookup_vmas(&eb);
3176 if (err) {
3177 eb_release_vmas(&eb, true1);
3178 goto err_engine;
3179 }
3180
3181 i915_gem_ww_ctx_init(&eb.ww, true1);
3182
3183 err = eb_relocate_parse(&eb);
3184 if (err) {
3185 /*
3186 * If the user expects the execobject.offset and
3187 * reloc.presumed_offset to be an exact match,
3188 * as for using NO_RELOC, then we cannot update
3189 * the execobject.offset until we have completed
3190 * relocation.
3191 */
3192 args->flags &= ~__EXEC_HAS_RELOC(1UL << (31));
3193 goto err_vma;
3194 }
3195
3196 ww_acquire_done(&eb.ww.ctx);
3197
3198 batch = eb.batch->vma;
3199
3200 /* All GPU relocation batches must be submitted prior to the user rq */
3201 GEM_BUG_ON(eb.reloc_cache.rq)((void)0);
3202
3203 /* Allocate a request for this batch buffer nice and early. */
3204 eb.request = i915_request_create(eb.context);
3205 if (IS_ERR(eb.request)) {
3206 err = PTR_ERR(eb.request);
3207 goto err_vma;
3208 }
3209
3210 if (in_fence) {
3211 if (args->flags & I915_EXEC_FENCE_SUBMIT(1 << 20))
3212 err = i915_request_await_execution(eb.request,
3213 in_fence,
3214 eb.engine->bond_execute);
3215 else
3216 err = i915_request_await_dma_fence(eb.request,
3217 in_fence);
3218 if (err < 0)
3219 goto err_request;
3220 }
3221
3222 if (eb.fences) {
3223 err = await_fence_array(&eb);
3224 if (err)
3225 goto err_request;
3226 }
3227
3228 if (out_fence_fd != -1) {
3229 out_fence = sync_file_create(&eb.request->fence);
3230 if (!out_fence) {
3231 err = -ENOMEM12;
3232 goto err_request;
3233 }
3234 }
3235
3236 /*
3237 * Whilst this request exists, batch_obj will be on the
3238 * active_list, and so will hold the active reference. Only when this
3239 * request is retired will the the batch_obj be moved onto the
3240 * inactive_list and lose its active reference. Hence we do not need
3241 * to explicitly hold another reference here.
3242 */
3243 eb.request->batch = batch;
3244 if (eb.batch_pool)
3245 intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
3246
3247 trace_i915_request_queue(eb.request, eb.batch_flags);
3248 err = eb_submit(&eb, batch);
3249err_request:
3250 i915_request_get(eb.request);
3251 err = eb_request_add(&eb, err);
3252
3253 if (eb.fences)
3254 signal_fence_array(&eb);
3255
3256 if (out_fence) {
3257 if (err == 0) {
3258 fd_install(out_fence_fd, out_fence->file);
3259 args->rsvd2 &= GENMASK_ULL(31, 0)(((~0ULL) >> (64 - (31) - 1)) & ((~0ULL) << (
0)))
; /* keep in-fence */
3260 args->rsvd2 |= (u64)out_fence_fd << 32;
3261 out_fence_fd = -1;
3262 } else {
3263 fput(out_fence->file);
3264 }
3265 }
3266 i915_request_put(eb.request);
3267
3268err_vma:
3269 eb_release_vmas(&eb, true1);
3270 if (eb.trampoline)
3271 i915_vma_unpin(eb.trampoline);
3272 WARN_ON(err == -EDEADLK)({ int __ret = !!((err == -11)); if (__ret) printf("%s", "WARN_ON("
"err == -11" ")"); __builtin_expect(!!(__ret), 0); })
;
3273 i915_gem_ww_ctx_fini(&eb.ww);
3274
3275 if (eb.batch_pool)
3276 intel_gt_buffer_pool_put(eb.batch_pool);
3277 if (eb.reloc_pool)
3278 intel_gt_buffer_pool_put(eb.reloc_pool);
3279 if (eb.reloc_context)
3280 intel_context_put(eb.reloc_context);
3281err_engine:
3282 eb_put_engine(&eb);
3283err_context:
3284 i915_gem_context_put(eb.gem_context);
3285err_destroy:
3286 eb_destroy(&eb);
3287err_out_fence:
3288 if (out_fence_fd != -1)
3289 put_unused_fd(out_fence_fd);
3290err_in_fence:
3291 dma_fence_put(in_fence);
3292err_ext:
3293 put_fence_array(eb.fences, eb.num_fences);
3294 return err;
3295}
3296
3297static size_t eb_element_size(void)
3298{
3299 return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
3300}
3301
3302static bool_Bool check_buffer_count(size_t count)
3303{
3304 const size_t sz = eb_element_size();
3305
3306 /*
3307 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
3308 * array size (see eb_create()). Otherwise, we can accept an array as
3309 * large as can be addressed (though use large arrays at your peril)!
3310 */
3311
3312 return !(count < 1 || count > INT_MAX0x7fffffff || count > SIZE_MAX0xffffffffffffffffUL / sz - 1);
3313}
3314
3315/*
3316 * Legacy execbuffer just creates an exec2 list from the original exec object
3317 * list array and passes it to the real function.
3318 */
3319int
3320i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
3321 struct drm_file *file)
3322{
3323 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3324 struct drm_i915_gem_execbuffer *args = data;
3325 struct drm_i915_gem_execbuffer2 exec2;
3326 struct drm_i915_gem_exec_object *exec_list = NULL((void *)0);
3327 struct drm_i915_gem_exec_object2 *exec2_list = NULL((void *)0);
3328 const size_t count = args->buffer_count;
3329 unsigned int i;
3330 int err;
3331
3332 if (!check_buffer_count(count)) {
3333 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf2 with %zd buffers\n"
, count)
;
3334 return -EINVAL22;
3335 }
3336
3337 exec2.buffers_ptr = args->buffers_ptr;
3338 exec2.buffer_count = args->buffer_count;
3339 exec2.batch_start_offset = args->batch_start_offset;
3340 exec2.batch_len = args->batch_len;
3341 exec2.DR1 = args->DR1;
3342 exec2.DR4 = args->DR4;
3343 exec2.num_cliprects = args->num_cliprects;
3344 exec2.cliprects_ptr = args->cliprects_ptr;
3345 exec2.flags = I915_EXEC_RENDER(1<<0);
3346 i915_execbuffer2_set_context_id(exec2, 0)(exec2).rsvd1 = 0 & (0xffffffff);
3347
3348 err = i915_gem_check_execbuffer(&exec2);
3349 if (err)
3350 return err;
3351
3352 /* Copy in the exec list from userland */
3353 exec_list = kvmalloc_array(count, sizeof(*exec_list),
3354 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3355
3356 /* Allocate extra slots for use by the command parser */
3357 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3358 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3359 if (exec_list == NULL((void *)0) || exec2_list == NULL((void *)0)) {
3360 drm_dbg(&i915->drm,drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %d buffers\n"
, args->buffer_count)
3361 "Failed to allocate exec list for %d buffers\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %d buffers\n"
, args->buffer_count)
3362 args->buffer_count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %d buffers\n"
, args->buffer_count)
;
3363 kvfree(exec_list);
3364 kvfree(exec2_list);
3365 return -ENOMEM12;
3366 }
3367 err = copy_from_user(exec_list,
3368 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr)),
3369 sizeof(*exec_list) * count);
3370 if (err) {
3371 drm_dbg(&i915->drm, "copy %d exec entries failed %d\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "copy %d exec entries failed %d\n"
, args->buffer_count, err)
3372 args->buffer_count, err)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "copy %d exec entries failed %d\n"
, args->buffer_count, err)
;
3373 kvfree(exec_list);
3374 kvfree(exec2_list);
3375 return -EFAULT14;
3376 }
3377
3378 for (i = 0; i < args->buffer_count; i++) {
3379 exec2_list[i].handle = exec_list[i].handle;
3380 exec2_list[i].relocation_count = exec_list[i].relocation_count;
3381 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
3382 exec2_list[i].alignment = exec_list[i].alignment;
3383 exec2_list[i].offset = exec_list[i].offset;
3384 if (INTEL_GEN(to_i915(dev))((&(to_i915(dev))->__info)->gen) < 4)
3385 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE(1<<0);
3386 else
3387 exec2_list[i].flags = 0;
3388 }
3389
3390 err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
3391 if (exec2.flags & __EXEC_HAS_RELOC(1UL << (31))) {
3392 struct drm_i915_gem_exec_object __user *user_exec_list =
3393 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr));
3394
3395 /* Copy the new buffer offsets back to the user's exec list. */
3396 for (i = 0; i < args->buffer_count; i++) {
3397 if (!(exec2_list[i].offset & UPDATE(1ULL << (7))))
3398 continue;
3399
3400 exec2_list[i].offset =
3401 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK-(1ULL << (12)));
3402 exec2_list[i].offset &= PIN_OFFSET_MASK-(1ULL << (12));
3403 if (__copy_to_user(&user_exec_list[i].offset,
3404 &exec2_list[i].offset,
3405 sizeof(user_exec_list[i].offset)))
3406 break;
3407 }
3408 }
3409
3410 kvfree(exec_list);
3411 kvfree(exec2_list);
3412 return err;
3413}
3414
3415int
3416i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
3417 struct drm_file *file)
3418{
3419 struct drm_i915_privateinteldrm_softc *i915 = to_i915(dev);
3420 struct drm_i915_gem_execbuffer2 *args = data;
3421 struct drm_i915_gem_exec_object2 *exec2_list;
3422 const size_t count = args->buffer_count;
3423 int err;
3424
3425 if (!check_buffer_count(count)) {
1
Taking false branch
3426 drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "execbuf2 with %zd buffers\n"
, count)
;
3427 return -EINVAL22;
3428 }
3429
3430 err = i915_gem_check_execbuffer(args);
3431 if (err
1.1
'err' is 0
1.1
'err' is 0
)
2
Taking false branch
3432 return err;
3433
3434 /* Allocate extra slots for use by the command parser */
3435 exec2_list = kvmalloc_array(count + 2, eb_element_size(),
3436 __GFP_NOWARN0 | GFP_KERNEL(0x0001 | 0x0004));
3437 if (exec2_list == NULL((void *)0)) {
3
Assuming 'exec2_list' is not equal to NULL
4
Taking false branch
3438 drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
3439 count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "Failed to allocate exec list for %zd buffers\n"
, count)
;
3440 return -ENOMEM12;
3441 }
3442 if (copy_from_user(exec2_list,
5
Taking false branch
3443 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr)),
3444 sizeof(*exec2_list) * count)) {
3445 drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count)drm_dev_dbg((&i915->drm)->dev, DRM_UT_DRIVER, "copy %zd exec entries failed\n"
, count)
;
3446 kvfree(exec2_list);
3447 return -EFAULT14;
3448 }
3449
3450 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
6
Calling 'i915_gem_do_execbuffer'
3451
3452 /*
3453 * Now that we have begun execution of the batchbuffer, we ignore
3454 * any new error after this point. Also given that we have already
3455 * updated the associated relocations, we try to write out the current
3456 * object locations irrespective of any error.
3457 */
3458 if (args->flags & __EXEC_HAS_RELOC(1UL << (31))) {
3459 struct drm_i915_gem_exec_object2 __user *user_exec_list =
3460 u64_to_user_ptr(args->buffers_ptr)((void *)(uintptr_t)(args->buffers_ptr));
3461 unsigned int i;
3462
3463 /* Copy the new buffer offsets back to the user's exec list. */
3464 /*
3465 * Note: count * sizeof(*user_exec_list) does not overflow,
3466 * because we checked 'count' in check_buffer_count().
3467 *
3468 * And this range already got effectively checked earlier
3469 * when we did the "copy_from_user()" above.
3470 */
3471 if (!user_write_access_begin(user_exec_list,access_ok(user_exec_list, count * sizeof(*user_exec_list))
3472 count * sizeof(*user_exec_list))access_ok(user_exec_list, count * sizeof(*user_exec_list)))
3473 goto end;
3474
3475 for (i = 0; i < args->buffer_count; i++) {
3476 if (!(exec2_list[i].offset & UPDATE(1ULL << (7))))
3477 continue;
3478
3479 exec2_list[i].offset =
3480 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK-(1ULL << (12)));
3481 unsafe_put_user(exec2_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3482 &user_exec_list[i].offset,({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
3483 end_user)({ __typeof((exec2_list[i].offset)) __tmp = (exec2_list[i].offset
); if (copyout(&(__tmp), &user_exec_list[i].offset, sizeof
(__tmp)) != 0) goto end_user; })
;
3484 }
3485end_user:
3486 user_write_access_end();
3487end:;
3488 }
3489
3490 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS(-((1 << 21) << 1));
3491 kvfree(exec2_list);
3492 return err;
3493}
3494
3495#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
3496#include "selftests/i915_gem_execbuffer.c"
3497#endif

/usr/src/sys/dev/pci/drm/include/linux/slab.h

1/* Public domain. */
2
3#ifndef _LINUX_SLAB_H
4#define _LINUX_SLAB_H
5
6#include <sys/types.h>
7#include <sys/malloc.h>
8
9#include <linux/types.h>
10#include <linux/workqueue.h>
11#include <linux/gfp.h>
12
13#include <linux/processor.h> /* for CACHELINESIZE */
14
15static inline void *
16kmalloc(size_t size, int flags)
17{
18 return malloc(size, M_DRM145, flags);
26
Memory is allocated
19}
20
21static inline void *
22kmalloc_array(size_t n, size_t size, int flags)
23{
24 if (n != 0 && SIZE_MAX0xffffffffffffffffUL / n < size)
25 return NULL((void *)0);
26 return malloc(n * size, M_DRM145, flags);
27}
28
29static inline void *
30kcalloc(size_t n, size_t size, int flags)
31{
32 if (n != 0 && SIZE_MAX0xffffffffffffffffUL / n < size)
33 return NULL((void *)0);
34 return malloc(n * size, M_DRM145, flags | M_ZERO0x0008);
35}
36
37static inline void *
38kzalloc(size_t size, int flags)
39{
40 return malloc(size, M_DRM145, flags | M_ZERO0x0008);
41}
42
43static inline void
44kfree(const void *objp)
45{
46 free((void *)objp, M_DRM145, 0);
47}
48
49#endif