Bug Summary

File:dev/pci/drm/i915/gt/intel_lrc.c
Warning:line 1584, column 26
Value stored to 'engine' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name intel_lrc.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/i915/gt/intel_lrc.c
1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Ben Widawsky <ben@bwidawsk.net>
25 * Michel Thierry <michel.thierry@intel.com>
26 * Thomas Daniel <thomas.daniel@intel.com>
27 * Oscar Mateo <oscar.mateo@intel.com>
28 *
29 */
30
31/**
32 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33 *
34 * Motivation:
35 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36 * These expanded contexts enable a number of new abilities, especially
37 * "Execlists" (also implemented in this file).
38 *
39 * One of the main differences with the legacy HW contexts is that logical
40 * ring contexts incorporate many more things to the context's state, like
41 * PDPs or ringbuffer control registers:
42 *
43 * The reason why PDPs are included in the context is straightforward: as
44 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46 * instead, the GPU will do it for you on the context switch.
47 *
48 * But, what about the ringbuffer control registers (head, tail, etc..)?
49 * shouldn't we just need a set of those per engine command streamer? This is
50 * where the name "Logical Rings" starts to make sense: by virtualizing the
51 * rings, the engine cs shifts to a new "ring buffer" with every context
52 * switch. When you want to submit a workload to the GPU you: A) choose your
53 * context, B) find its appropriate virtualized ring, C) write commands to it
54 * and then, finally, D) tell the GPU to switch to that context.
55 *
56 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57 * to a contexts is via a context execution list, ergo "Execlists".
58 *
59 * LRC implementation:
60 * Regarding the creation of contexts, we have:
61 *
62 * - One global default context.
63 * - One local default context for each opened fd.
64 * - One local extra context for each context create ioctl call.
65 *
66 * Now that ringbuffers belong per-context (and not per-engine, like before)
67 * and that contexts are uniquely tied to a given engine (and not reusable,
68 * like before) we need:
69 *
70 * - One ringbuffer per-engine inside each context.
71 * - One backing object per-engine inside each context.
72 *
73 * The global default context starts its life with these new objects fully
74 * allocated and populated. The local default context for each opened fd is
75 * more complex, because we don't know at creation time which engine is going
76 * to use them. To handle this, we have implemented a deferred creation of LR
77 * contexts:
78 *
79 * The local context starts its life as a hollow or blank holder, that only
80 * gets populated for a given engine once we receive an execbuffer. If later
81 * on we receive another execbuffer ioctl for the same context but a different
82 * engine, we allocate/populate a new ringbuffer and context backing object and
83 * so on.
84 *
85 * Finally, regarding local contexts created using the ioctl call: as they are
86 * only allowed with the render ring, we can allocate & populate them right
87 * away (no need to defer anything, at least for now).
88 *
89 * Execlists implementation:
90 * Execlists are the new method by which, on gen8+ hardware, workloads are
91 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92 * This method works as follows:
93 *
94 * When a request is committed, its commands (the BB start and any leading or
95 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96 * for the appropriate context. The tail pointer in the hardware context is not
97 * updated at this time, but instead, kept by the driver in the ringbuffer
98 * structure. A structure representing this request is added to a request queue
99 * for the appropriate engine: this structure contains a copy of the context's
100 * tail after the request was written to the ring buffer and a pointer to the
101 * context itself.
102 *
103 * If the engine's request queue was empty before the request was added, the
104 * queue is processed immediately. Otherwise the queue will be processed during
105 * a context switch interrupt. In any case, elements on the queue will get sent
106 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107 * globally unique 20-bits submission ID.
108 *
109 * When execution of a request completes, the GPU updates the context status
110 * buffer with a context complete event and generates a context switch interrupt.
111 * During the interrupt handling, the driver examines the events in the buffer:
112 * for each context complete event, if the announced ID matches that on the head
113 * of the request queue, then that request is retired and removed from the queue.
114 *
115 * After processing, if any requests were retired and the queue is not empty
116 * then a new execution list can be submitted. The two requests at the front of
117 * the queue are next to be submitted but since a context may not occur twice in
118 * an execution list, if subsequent requests have the same ID as the first then
119 * the two requests must be combined. This is done simply by discarding requests
120 * at the head of the queue until either only one requests is left (in which case
121 * we use a NULL second context) or the first two requests have unique IDs.
122 *
123 * By always executing the first two requests in the queue the driver ensures
124 * that the GPU is kept as busy as possible. In the case where a single context
125 * completes but a second context is still executing, the request for this second
126 * context will be at the head of the queue when we remove the first one. This
127 * request will then be resubmitted along with a new request for a different context,
128 * which will cause the hardware to continue executing the second request and queue
129 * the new request (the GPU detects the condition of a context getting preempted
130 * with the same context and optimizes the context switch flow by not doing
131 * preemption, but just sampling the new tail pointer).
132 *
133 */
134#include <linux/interrupt.h>
135
136#include "i915_drv.h"
137#include "i915_perf.h"
138#include "i915_trace.h"
139#include "i915_vgpu.h"
140#include "intel_breadcrumbs.h"
141#include "intel_context.h"
142#include "intel_engine_pm.h"
143#include "intel_gt.h"
144#include "intel_gt_pm.h"
145#include "intel_gt_requests.h"
146#include "intel_lrc_reg.h"
147#include "intel_mocs.h"
148#include "intel_reset.h"
149#include "intel_ring.h"
150#include "intel_workarounds.h"
151#include "shmem_utils.h"
152
153#define RING_EXECLIST_QFULL(1 << 0x2) (1 << 0x2)
154#define RING_EXECLIST1_VALID(1 << 0x3) (1 << 0x3)
155#define RING_EXECLIST0_VALID(1 << 0x4) (1 << 0x4)
156#define RING_EXECLIST_ACTIVE_STATUS(3 << 0xE) (3 << 0xE)
157#define RING_EXECLIST1_ACTIVE(1 << 0x11) (1 << 0x11)
158#define RING_EXECLIST0_ACTIVE(1 << 0x12) (1 << 0x12)
159
160#define GEN8_CTX_STATUS_IDLE_ACTIVE(1 << 0) (1 << 0)
161#define GEN8_CTX_STATUS_PREEMPTED(1 << 1) (1 << 1)
162#define GEN8_CTX_STATUS_ELEMENT_SWITCH(1 << 2) (1 << 2)
163#define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3) (1 << 3)
164#define GEN8_CTX_STATUS_COMPLETE(1 << 4) (1 << 4)
165#define GEN8_CTX_STATUS_LITE_RESTORE(1 << 15) (1 << 15)
166
167#define GEN8_CTX_STATUS_COMPLETED_MASK((1 << 4) | (1 << 1)) \
168 (GEN8_CTX_STATUS_COMPLETE(1 << 4) | GEN8_CTX_STATUS_PREEMPTED(1 << 1))
169
170#define CTX_DESC_FORCE_RESTORE(1ULL << (2)) BIT_ULL(2)(1ULL << (2))
171
172#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(0x1) (0x1) /* lower csb dword */
173#define GEN12_CTX_SWITCH_DETAIL(csb_dw)((csb_dw) & 0xF) ((csb_dw) & 0xF) /* upper csb dword */
174#define GEN12_CSB_SW_CTX_ID_MASK(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15
)))
GENMASK(25, 15)(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15
)))
175#define GEN12_IDLE_CTX_ID0x7FF 0x7FF
176#define GEN12_CSB_CTX_VALID(csb_dw)(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((csb_dw) & ((((~0UL) >> (64 - (25) - 1))
& ((~0UL) << (15))))) >> (__builtin_ffsll(((
(~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15))
)) - 1))) != 0x7FF)
\
177 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw)((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((csb_dw) & ((((~0UL) >> (64 - (25) - 1))
& ((~0UL) << (15))))) >> (__builtin_ffsll(((
(~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15))
)) - 1)))
!= GEN12_IDLE_CTX_ID0x7FF)
178
179/* Typical size of the average request (2 pipecontrols and a MI_BB) */
180#define EXECLISTS_REQUEST_SIZE64 64 /* bytes */
181
182struct virtual_engine {
183 struct intel_engine_cs base;
184 struct intel_context context;
185 struct rcu_work rcu;
186
187 /*
188 * We allow only a single request through the virtual engine at a time
189 * (each request in the timeline waits for the completion fence of
190 * the previous before being submitted). By restricting ourselves to
191 * only submitting a single request, each request is placed on to a
192 * physical to maximise load spreading (by virtue of the late greedy
193 * scheduling -- each real engine takes the next available request
194 * upon idling).
195 */
196 struct i915_request *request;
197
198 /*
199 * We keep a rbtree of available virtual engines inside each physical
200 * engine, sorted by priority. Here we preallocate the nodes we need
201 * for the virtual engine, indexed by physical_engine->id.
202 */
203 struct ve_node {
204 struct rb_node rb;
205 int prio;
206 } nodes[I915_NUM_ENGINES];
207
208 /*
209 * Keep track of bonded pairs -- restrictions upon on our selection
210 * of physical engines any particular request may be submitted to.
211 * If we receive a submit-fence from a master engine, we will only
212 * use one of sibling_mask physical engines.
213 */
214 struct ve_bond {
215 const struct intel_engine_cs *master;
216 intel_engine_mask_t sibling_mask;
217 } *bonds;
218 unsigned int num_bonds;
219
220 /* And finally, which physical engines this virtual engine maps onto. */
221 unsigned int num_siblings;
222 struct intel_engine_cs *siblings[];
223};
224
225static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
226{
227 GEM_BUG_ON(!intel_engine_is_virtual(engine))((void)0);
228 return container_of(engine, struct virtual_engine, base)({ const __typeof( ((struct virtual_engine *)0)->base ) *__mptr
= (engine); (struct virtual_engine *)( (char *)__mptr - __builtin_offsetof
(struct virtual_engine, base) );})
;
229}
230
231static int __execlists_context_alloc(struct intel_context *ce,
232 struct intel_engine_cs *engine);
233
234static void execlists_init_reg_state(u32 *reg_state,
235 const struct intel_context *ce,
236 const struct intel_engine_cs *engine,
237 const struct intel_ring *ring,
238 bool_Bool close);
239static void
240__execlists_update_reg_state(const struct intel_context *ce,
241 const struct intel_engine_cs *engine,
242 u32 head);
243
244static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
245{
246 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
247 return 0x60;
248 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
249 return 0x54;
250 else if (engine->class == RENDER_CLASS0)
251 return 0x58;
252 else
253 return -1;
254}
255
256static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
257{
258 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
259 return 0x74;
260 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
261 return 0x68;
262 else if (engine->class == RENDER_CLASS0)
263 return 0xd8;
264 else
265 return -1;
266}
267
268static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
269{
270 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
271 return 0x12;
272 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9 || engine->class == RENDER_CLASS0)
273 return 0x18;
274 else
275 return -1;
276}
277
278static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
279{
280 int x;
281
282 x = lrc_ring_wa_bb_per_ctx(engine);
283 if (x < 0)
284 return x;
285
286 return x + 2;
287}
288
289static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
290{
291 int x;
292
293 x = lrc_ring_indirect_ptr(engine);
294 if (x < 0)
295 return x;
296
297 return x + 2;
298}
299
300static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
301{
302 if (engine->class != RENDER_CLASS0)
303 return -1;
304
305 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
306 return 0xb6;
307 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
308 return 0xaa;
309 else
310 return -1;
311}
312
313static u32
314lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
315{
316 switch (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen)) {
317 default:
318 MISSING_CASE(INTEL_GEN(engine->i915))({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n"
, "((&(engine->i915)->__info)->gen)", (long)(((&
(engine->i915)->__info)->gen))); __builtin_expect(!!
(__ret), 0); })
;
319 fallthroughdo {} while (0);
320 case 12:
321 return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0xD;
322 case 11:
323 return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x1A;
324 case 10:
325 return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x19;
326 case 9:
327 return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x26;
328 case 8:
329 return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x17;
330 }
331}
332
333static void
334lrc_ring_setup_indirect_ctx(u32 *regs,
335 const struct intel_engine_cs *engine,
336 u32 ctx_bb_ggtt_addr,
337 u32 size)
338{
339 GEM_BUG_ON(!size)((void)0);
340 GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES))((void)0);
341 GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1)((void)0);
342 regs[lrc_ring_indirect_ptr(engine) + 1] =
343 ctx_bb_ggtt_addr | (size / CACHELINE_BYTES64);
344
345 GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1)((void)0);
346 regs[lrc_ring_indirect_offset(engine) + 1] =
347 lrc_ring_indirect_offset_default(engine) << 6;
348}
349
350static u32 intel_context_get_runtime(const struct intel_context *ce)
351{
352 /*
353 * We can use either ppHWSP[16] which is recorded before the context
354 * switch (and so excludes the cost of context switches) or use the
355 * value from the context image itself, which is saved/restored earlier
356 * and so includes the cost of the save.
357 */
358 return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP])({ typeof(ce->lrc_reg_state[(0x22 + 1)]) __tmp = *(volatile
typeof(ce->lrc_reg_state[(0x22 + 1)]) *)&(ce->lrc_reg_state
[(0x22 + 1)]); membar_datadep_consumer(); __tmp; })
;
359}
360
361static void mark_eio(struct i915_request *rq)
362{
363 if (i915_request_completed(rq))
364 return;
365
366 GEM_BUG_ON(i915_request_signaled(rq))((void)0);
367
368 i915_request_set_error_once(rq, -EIO5);
369 i915_request_mark_complete(rq);
370}
371
372static struct i915_request *
373active_request(const struct intel_timeline * const tl, struct i915_request *rq)
374{
375 struct i915_request *active = rq;
376
377 rcu_read_lock();
378 list_for_each_entry_continue_reverse(rq, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->link ) *
__mptr = (rq->link.prev); (__typeof(*rq) *)( (char *)__mptr
- __builtin_offsetof(__typeof(*rq), link) );}); &rq->
link != (&tl->requests); rq = ({ const __typeof( ((__typeof
(*rq) *)0)->link ) *__mptr = (rq->link.prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), link
) );}))
{
379 if (i915_request_completed(rq))
380 break;
381
382 active = rq;
383 }
384 rcu_read_unlock();
385
386 return active;
387}
388
389static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
390{
391 return (i915_ggtt_offset(engine->status_page.vma) +
392 I915_GEM_HWS_PREEMPT_ADDR(0x32 * sizeof(u32)));
393}
394
395static inline void
396ring_set_paused(const struct intel_engine_cs *engine, int state)
397{
398 /*
399 * We inspect HWS_PREEMPT with a semaphore inside
400 * engine->emit_fini_breadcrumb. If the dword is true,
401 * the ring is paused as the semaphore will busywait
402 * until the dword is false.
403 */
404 engine->status_page.addr[I915_GEM_HWS_PREEMPT0x32] = state;
405 if (state)
406 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
407}
408
409static inline struct i915_priolist *to_priolist(struct rb_node *rb)
410{
411 return rb_entry(rb, struct i915_priolist, node)({ const __typeof( ((struct i915_priolist *)0)->node ) *__mptr
= (rb); (struct i915_priolist *)( (char *)__mptr - __builtin_offsetof
(struct i915_priolist, node) );})
;
412}
413
414static inline int rq_prio(const struct i915_request *rq)
415{
416 return READ_ONCE(rq->sched.attr.priority)({ typeof(rq->sched.attr.priority) __tmp = *(volatile typeof
(rq->sched.attr.priority) *)&(rq->sched.attr.priority
); membar_datadep_consumer(); __tmp; })
;
417}
418
419static int effective_prio(const struct i915_request *rq)
420{
421 int prio = rq_prio(rq);
422
423 /*
424 * If this request is special and must not be interrupted at any
425 * cost, so be it. Note we are only checking the most recent request
426 * in the context and so may be masking an earlier vip request. It
427 * is hoped that under the conditions where nopreempt is used, this
428 * will not matter (i.e. all requests to that context will be
429 * nopreempt for as long as desired).
430 */
431 if (i915_request_has_nopreempt(rq))
432 prio = I915_PRIORITY_UNPREEMPTABLE0x7fffffff;
433
434 return prio;
435}
436
437static int queue_prio(const struct intel_engine_execlists *execlists)
438{
439 struct i915_priolist *p;
440 struct rb_node *rb;
441
442 rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->queue)->rb_root), -1)
;
443 if (!rb)
444 return INT_MIN(-0x7fffffff-1);
445
446 /*
447 * As the priolist[] are inverted, with the highest priority in [0],
448 * we have to flip the index value to become priority.
449 */
450 p = to_priolist(rb);
451 if (!I915_USER_PRIORITY_SHIFT0)
452 return p->priority;
453
454 return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT0) - ffs(p->used);
455}
456
457static inline bool_Bool need_preempt(const struct intel_engine_cs *engine,
458 const struct i915_request *rq,
459 struct rb_node *rb)
460{
461 int last_prio;
462
463 if (!intel_engine_has_semaphores(engine))
464 return false0;
465
466 /*
467 * Check if the current priority hint merits a preemption attempt.
468 *
469 * We record the highest value priority we saw during rescheduling
470 * prior to this dequeue, therefore we know that if it is strictly
471 * less than the current tail of ESLP[0], we do not need to force
472 * a preempt-to-idle cycle.
473 *
474 * However, the priority hint is a mere hint that we may need to
475 * preempt. If that hint is stale or we may be trying to preempt
476 * ourselves, ignore the request.
477 *
478 * More naturally we would write
479 * prio >= max(0, last);
480 * except that we wish to prevent triggering preemption at the same
481 * priority level: the task that is running should remain running
482 * to preserve FIFO ordering of dependencies.
483 */
484 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1)(((effective_prio(rq))>(I915_PRIORITY_NORMAL - 1))?(effective_prio
(rq)):(I915_PRIORITY_NORMAL - 1))
;
485 if (engine->execlists.queue_priority_hint <= last_prio)
486 return false0;
487
488 /*
489 * Check against the first request in ELSP[1], it will, thanks to the
490 * power of PI, be the highest priority of that context.
491 */
492 if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
493 rq_prio(list_next_entry(rq, sched.link)({ const __typeof( ((typeof(*(rq)) *)0)->sched.link ) *__mptr
= (((rq)->sched.link.next)); (typeof(*(rq)) *)( (char *)__mptr
- __builtin_offsetof(typeof(*(rq)), sched.link) );})
) > last_prio)
494 return true1;
495
496 if (rb) {
497 struct virtual_engine *ve =
498 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
499 bool_Bool preempt = false0;
500
501 if (engine == ve->siblings[0]) { /* only preempt one sibling */
502 struct i915_request *next;
503
504 rcu_read_lock();
505 next = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
506 if (next)
507 preempt = rq_prio(next) > last_prio;
508 rcu_read_unlock();
509 }
510
511 if (preempt)
512 return preempt;
513 }
514
515 /*
516 * If the inflight context did not trigger the preemption, then maybe
517 * it was the set of queued requests? Pick the highest priority in
518 * the queue (the first active priolist) and see if it deserves to be
519 * running instead of ELSP[0].
520 *
521 * The highest priority request in the queue can not be either
522 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
523 * context, it's priority would not exceed ELSP[0] aka last_prio.
524 */
525 return queue_prio(&engine->execlists) > last_prio;
526}
527
528__maybe_unused__attribute__((__unused__)) static inline bool_Bool
529assert_priority_queue(const struct i915_request *prev,
530 const struct i915_request *next)
531{
532 /*
533 * Without preemption, the prev may refer to the still active element
534 * which we refuse to let go.
535 *
536 * Even with preemption, there are times when we think it is better not
537 * to preempt and leave an ostensibly lower priority request in flight.
538 */
539 if (i915_request_is_active(prev))
540 return true1;
541
542 return rq_prio(prev) >= rq_prio(next);
543}
544
545/*
546 * The context descriptor encodes various attributes of a context,
547 * including its GTT address and some flags. Because it's fairly
548 * expensive to calculate, we'll just do it once and cache the result,
549 * which remains valid until the context is unpinned.
550 *
551 * This is what a descriptor looks like, from LSB to MSB::
552 *
553 * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
554 * bits 12-31: LRCA, GTT address of (the HWSP of) this context
555 * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
556 * bits 53-54: mbz, reserved for use by hardware
557 * bits 55-63: group ID, currently unused and set to 0
558 *
559 * Starting from Gen11, the upper dword of the descriptor has a new format:
560 *
561 * bits 32-36: reserved
562 * bits 37-47: SW context ID
563 * bits 48:53: engine instance
564 * bit 54: mbz, reserved for use by hardware
565 * bits 55-60: SW counter
566 * bits 61-63: engine class
567 *
568 * engine info, SW context ID and SW counter need to form a unique number
569 * (Context ID) per lrc.
570 */
571static u32
572lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
573{
574 u32 desc;
575
576 desc = INTEL_LEGACY_32B_CONTEXT;
577 if (i915_vm_is_4lvl(ce->vm))
578 desc = INTEL_LEGACY_64B_CONTEXT;
579 desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT3;
580
581 desc |= GEN8_CTX_VALID(1 << 0) | GEN8_CTX_PRIVILEGE(1 << 8);
582 if (IS_GEN(engine->i915, 8)(0 + (&(engine->i915)->__info)->gen == (8)))
583 desc |= GEN8_CTX_L3LLC_COHERENT(1 << 5);
584
585 return i915_ggtt_offset(ce->state) | desc;
586}
587
588static inline unsigned int dword_in_page(void *addr)
589{
590 return offset_in_page(addr)((vaddr_t)(addr) & ((1 << 12) - 1)) / sizeof(u32);
591}
592
593static void set_offsets(u32 *regs,
594 const u8 *data,
595 const struct intel_engine_cs *engine,
596 bool_Bool clear)
597#define NOP(x) (BIT(7)(1UL << (7)) | (x))
598#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))0)
599#define POSTED(1UL << (0)) BIT(0)(1UL << (0))
600#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)0)
601#define REG16(x) \
602 (((x) >> 9) | BIT(7)(1UL << (7)) | BUILD_BUG_ON_ZERO(x >= 0x10000)0), \
603 (((x) >> 2) & 0x7f)
604#define END(total_state_size) 0, (total_state_size)
605{
606 const u32 base = engine->mmio_base;
607
608 while (*data) {
609 u8 count, flags;
610
611 if (*data & BIT(7)(1UL << (7))) { /* skip */
612 count = *data++ & ~BIT(7)(1UL << (7));
613 if (clear)
614 memset32(regs, MI_NOOP(((0) << 23) | (0)), count);
615 regs += count;
616 continue;
617 }
618
619 count = *data & 0x3f;
620 flags = *data >> 6;
621 data++;
622
623 *regs = MI_LOAD_REGISTER_IMM(count)(((0x22) << 23) | (2*(count)-1));
624 if (flags & POSTED(1UL << (0)))
625 *regs |= MI_LRI_FORCE_POSTED(1<<12);
626 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
627 *regs |= MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
628 regs++;
629
630 GEM_BUG_ON(!count)((void)0);
631 do {
632 u32 offset = 0;
633 u8 v;
634
635 do {
636 v = *data++;
637 offset <<= 7;
638 offset |= v & ~BIT(7)(1UL << (7));
639 } while (v & BIT(7)(1UL << (7)));
640
641 regs[0] = base + (offset << 2);
642 if (clear)
643 regs[1] = 0;
644 regs += 2;
645 } while (--count);
646 }
647
648 if (clear) {
649 u8 count = *++data;
650
651 /* Clear past the tail for HW access */
652 GEM_BUG_ON(dword_in_page(regs) > count)((void)0);
653 memset32(regs, MI_NOOP(((0) << 23) | (0)), count - dword_in_page(regs));
654
655 /* Close the batch; used mainly by live_lrc_layout() */
656 *regs = MI_BATCH_BUFFER_END(((0x0a) << 23) | (0));
657 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 10)
658 *regs |= BIT(0)(1UL << (0));
659 }
660}
661
662static const u8 gen8_xcs_offsets[] = {
663 NOP(1),
664 LRI(11, 0),
665 REG16(0x244),
666 REG(0x034),
667 REG(0x030),
668 REG(0x038),
669 REG(0x03c),
670 REG(0x168),
671 REG(0x140),
672 REG(0x110),
673 REG(0x11c),
674 REG(0x114),
675 REG(0x118),
676
677 NOP(9),
678 LRI(9, 0),
679 REG16(0x3a8),
680 REG16(0x28c),
681 REG16(0x288),
682 REG16(0x284),
683 REG16(0x280),
684 REG16(0x27c),
685 REG16(0x278),
686 REG16(0x274),
687 REG16(0x270),
688
689 NOP(13),
690 LRI(2, 0),
691 REG16(0x200),
692 REG(0x028),
693
694 END(80)
695};
696
697static const u8 gen9_xcs_offsets[] = {
698 NOP(1),
699 LRI(14, POSTED(1UL << (0))),
700 REG16(0x244),
701 REG(0x034),
702 REG(0x030),
703 REG(0x038),
704 REG(0x03c),
705 REG(0x168),
706 REG(0x140),
707 REG(0x110),
708 REG(0x11c),
709 REG(0x114),
710 REG(0x118),
711 REG(0x1c0),
712 REG(0x1c4),
713 REG(0x1c8),
714
715 NOP(3),
716 LRI(9, POSTED(1UL << (0))),
717 REG16(0x3a8),
718 REG16(0x28c),
719 REG16(0x288),
720 REG16(0x284),
721 REG16(0x280),
722 REG16(0x27c),
723 REG16(0x278),
724 REG16(0x274),
725 REG16(0x270),
726
727 NOP(13),
728 LRI(1, POSTED(1UL << (0))),
729 REG16(0x200),
730
731 NOP(13),
732 LRI(44, POSTED(1UL << (0))),
733 REG(0x028),
734 REG(0x09c),
735 REG(0x0c0),
736 REG(0x178),
737 REG(0x17c),
738 REG16(0x358),
739 REG(0x170),
740 REG(0x150),
741 REG(0x154),
742 REG(0x158),
743 REG16(0x41c),
744 REG16(0x600),
745 REG16(0x604),
746 REG16(0x608),
747 REG16(0x60c),
748 REG16(0x610),
749 REG16(0x614),
750 REG16(0x618),
751 REG16(0x61c),
752 REG16(0x620),
753 REG16(0x624),
754 REG16(0x628),
755 REG16(0x62c),
756 REG16(0x630),
757 REG16(0x634),
758 REG16(0x638),
759 REG16(0x63c),
760 REG16(0x640),
761 REG16(0x644),
762 REG16(0x648),
763 REG16(0x64c),
764 REG16(0x650),
765 REG16(0x654),
766 REG16(0x658),
767 REG16(0x65c),
768 REG16(0x660),
769 REG16(0x664),
770 REG16(0x668),
771 REG16(0x66c),
772 REG16(0x670),
773 REG16(0x674),
774 REG16(0x678),
775 REG16(0x67c),
776 REG(0x068),
777
778 END(176)
779};
780
781static const u8 gen12_xcs_offsets[] = {
782 NOP(1),
783 LRI(13, POSTED(1UL << (0))),
784 REG16(0x244),
785 REG(0x034),
786 REG(0x030),
787 REG(0x038),
788 REG(0x03c),
789 REG(0x168),
790 REG(0x140),
791 REG(0x110),
792 REG(0x1c0),
793 REG(0x1c4),
794 REG(0x1c8),
795 REG(0x180),
796 REG16(0x2b4),
797
798 NOP(5),
799 LRI(9, POSTED(1UL << (0))),
800 REG16(0x3a8),
801 REG16(0x28c),
802 REG16(0x288),
803 REG16(0x284),
804 REG16(0x280),
805 REG16(0x27c),
806 REG16(0x278),
807 REG16(0x274),
808 REG16(0x270),
809
810 END(80)
811};
812
813static const u8 gen8_rcs_offsets[] = {
814 NOP(1),
815 LRI(14, POSTED(1UL << (0))),
816 REG16(0x244),
817 REG(0x034),
818 REG(0x030),
819 REG(0x038),
820 REG(0x03c),
821 REG(0x168),
822 REG(0x140),
823 REG(0x110),
824 REG(0x11c),
825 REG(0x114),
826 REG(0x118),
827 REG(0x1c0),
828 REG(0x1c4),
829 REG(0x1c8),
830
831 NOP(3),
832 LRI(9, POSTED(1UL << (0))),
833 REG16(0x3a8),
834 REG16(0x28c),
835 REG16(0x288),
836 REG16(0x284),
837 REG16(0x280),
838 REG16(0x27c),
839 REG16(0x278),
840 REG16(0x274),
841 REG16(0x270),
842
843 NOP(13),
844 LRI(1, 0),
845 REG(0x0c8),
846
847 END(80)
848};
849
850static const u8 gen9_rcs_offsets[] = {
851 NOP(1),
852 LRI(14, POSTED(1UL << (0))),
853 REG16(0x244),
854 REG(0x34),
855 REG(0x30),
856 REG(0x38),
857 REG(0x3c),
858 REG(0x168),
859 REG(0x140),
860 REG(0x110),
861 REG(0x11c),
862 REG(0x114),
863 REG(0x118),
864 REG(0x1c0),
865 REG(0x1c4),
866 REG(0x1c8),
867
868 NOP(3),
869 LRI(9, POSTED(1UL << (0))),
870 REG16(0x3a8),
871 REG16(0x28c),
872 REG16(0x288),
873 REG16(0x284),
874 REG16(0x280),
875 REG16(0x27c),
876 REG16(0x278),
877 REG16(0x274),
878 REG16(0x270),
879
880 NOP(13),
881 LRI(1, 0),
882 REG(0xc8),
883
884 NOP(13),
885 LRI(44, POSTED(1UL << (0))),
886 REG(0x28),
887 REG(0x9c),
888 REG(0xc0),
889 REG(0x178),
890 REG(0x17c),
891 REG16(0x358),
892 REG(0x170),
893 REG(0x150),
894 REG(0x154),
895 REG(0x158),
896 REG16(0x41c),
897 REG16(0x600),
898 REG16(0x604),
899 REG16(0x608),
900 REG16(0x60c),
901 REG16(0x610),
902 REG16(0x614),
903 REG16(0x618),
904 REG16(0x61c),
905 REG16(0x620),
906 REG16(0x624),
907 REG16(0x628),
908 REG16(0x62c),
909 REG16(0x630),
910 REG16(0x634),
911 REG16(0x638),
912 REG16(0x63c),
913 REG16(0x640),
914 REG16(0x644),
915 REG16(0x648),
916 REG16(0x64c),
917 REG16(0x650),
918 REG16(0x654),
919 REG16(0x658),
920 REG16(0x65c),
921 REG16(0x660),
922 REG16(0x664),
923 REG16(0x668),
924 REG16(0x66c),
925 REG16(0x670),
926 REG16(0x674),
927 REG16(0x678),
928 REG16(0x67c),
929 REG(0x68),
930
931 END(176)
932};
933
934static const u8 gen11_rcs_offsets[] = {
935 NOP(1),
936 LRI(15, POSTED(1UL << (0))),
937 REG16(0x244),
938 REG(0x034),
939 REG(0x030),
940 REG(0x038),
941 REG(0x03c),
942 REG(0x168),
943 REG(0x140),
944 REG(0x110),
945 REG(0x11c),
946 REG(0x114),
947 REG(0x118),
948 REG(0x1c0),
949 REG(0x1c4),
950 REG(0x1c8),
951 REG(0x180),
952
953 NOP(1),
954 LRI(9, POSTED(1UL << (0))),
955 REG16(0x3a8),
956 REG16(0x28c),
957 REG16(0x288),
958 REG16(0x284),
959 REG16(0x280),
960 REG16(0x27c),
961 REG16(0x278),
962 REG16(0x274),
963 REG16(0x270),
964
965 LRI(1, POSTED(1UL << (0))),
966 REG(0x1b0),
967
968 NOP(10),
969 LRI(1, 0),
970 REG(0x0c8),
971
972 END(80)
973};
974
975static const u8 gen12_rcs_offsets[] = {
976 NOP(1),
977 LRI(13, POSTED(1UL << (0))),
978 REG16(0x244),
979 REG(0x034),
980 REG(0x030),
981 REG(0x038),
982 REG(0x03c),
983 REG(0x168),
984 REG(0x140),
985 REG(0x110),
986 REG(0x1c0),
987 REG(0x1c4),
988 REG(0x1c8),
989 REG(0x180),
990 REG16(0x2b4),
991
992 NOP(5),
993 LRI(9, POSTED(1UL << (0))),
994 REG16(0x3a8),
995 REG16(0x28c),
996 REG16(0x288),
997 REG16(0x284),
998 REG16(0x280),
999 REG16(0x27c),
1000 REG16(0x278),
1001 REG16(0x274),
1002 REG16(0x270),
1003
1004 LRI(3, POSTED(1UL << (0))),
1005 REG(0x1b0),
1006 REG16(0x5a8),
1007 REG16(0x5ac),
1008
1009 NOP(6),
1010 LRI(1, 0),
1011 REG(0x0c8),
1012 NOP(3 + 9 + 1),
1013
1014 LRI(51, POSTED(1UL << (0))),
1015 REG16(0x588),
1016 REG16(0x588),
1017 REG16(0x588),
1018 REG16(0x588),
1019 REG16(0x588),
1020 REG16(0x588),
1021 REG(0x028),
1022 REG(0x09c),
1023 REG(0x0c0),
1024 REG(0x178),
1025 REG(0x17c),
1026 REG16(0x358),
1027 REG(0x170),
1028 REG(0x150),
1029 REG(0x154),
1030 REG(0x158),
1031 REG16(0x41c),
1032 REG16(0x600),
1033 REG16(0x604),
1034 REG16(0x608),
1035 REG16(0x60c),
1036 REG16(0x610),
1037 REG16(0x614),
1038 REG16(0x618),
1039 REG16(0x61c),
1040 REG16(0x620),
1041 REG16(0x624),
1042 REG16(0x628),
1043 REG16(0x62c),
1044 REG16(0x630),
1045 REG16(0x634),
1046 REG16(0x638),
1047 REG16(0x63c),
1048 REG16(0x640),
1049 REG16(0x644),
1050 REG16(0x648),
1051 REG16(0x64c),
1052 REG16(0x650),
1053 REG16(0x654),
1054 REG16(0x658),
1055 REG16(0x65c),
1056 REG16(0x660),
1057 REG16(0x664),
1058 REG16(0x668),
1059 REG16(0x66c),
1060 REG16(0x670),
1061 REG16(0x674),
1062 REG16(0x678),
1063 REG16(0x67c),
1064 REG(0x068),
1065 REG(0x084),
1066 NOP(1),
1067
1068 END(192)
1069};
1070
1071#undef END
1072#undef REG16
1073#undef REG
1074#undef LRI
1075#undef NOP
1076
1077static const u8 *reg_offsets(const struct intel_engine_cs *engine)
1078{
1079 /*
1080 * The gen12+ lists only have the registers we program in the basic
1081 * default state. We rely on the context image using relative
1082 * addressing to automatic fixup the register state between the
1083 * physical engines for virtual engine.
1084 */
1085 GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&((void)0)
1086 !intel_engine_has_relative_mmio(engine))((void)0);
1087
1088 if (engine->class == RENDER_CLASS0) {
1089 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
1090 return gen12_rcs_offsets;
1091 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
1092 return gen11_rcs_offsets;
1093 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
1094 return gen9_rcs_offsets;
1095 else
1096 return gen8_rcs_offsets;
1097 } else {
1098 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
1099 return gen12_xcs_offsets;
1100 else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
1101 return gen9_xcs_offsets;
1102 else
1103 return gen8_xcs_offsets;
1104 }
1105}
1106
1107static struct i915_request *
1108__unwind_incomplete_requests(struct intel_engine_cs *engine)
1109{
1110 struct i915_request *rq, *rn, *active = NULL((void *)0);
1111 struct list_head *pl;
1112 int prio = I915_PRIORITY_INVALID((-0x7fffffff-1) | (u8)((1UL << (0)) - 1));
1113
1114 lockdep_assert_held(&engine->active.lock)do { (void)(&engine->active.lock); } while(0);
1115
1116 list_for_each_entry_safe_reverse(rq, rn,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->active.requests)->prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = ((rq)->sched.link.prev); (__typeof(
*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &(rq)->sched.link != (&engine->active
.requests); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *
)0)->sched.link ) *__mptr = (rn->sched.link.prev); (__typeof
(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
1117 &engine->active.requests,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->active.requests)->prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = ((rq)->sched.link.prev); (__typeof(
*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &(rq)->sched.link != (&engine->active
.requests); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *
)0)->sched.link ) *__mptr = (rn->sched.link.prev); (__typeof
(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
1118 sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->active.requests)->prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = ((rq)->sched.link.prev); (__typeof(
*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &(rq)->sched.link != (&engine->active
.requests); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *
)0)->sched.link ) *__mptr = (rn->sched.link.prev); (__typeof
(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
{
1119 if (i915_request_completed(rq))
1120 continue; /* XXX */
1121
1122 __i915_request_unsubmit(rq);
1123
1124 /*
1125 * Push the request back into the queue for later resubmission.
1126 * If this request is not native to this physical engine (i.e.
1127 * it came from a virtual source), push it back onto the virtual
1128 * engine so that it can be moved across onto another physical
1129 * engine as load dictates.
1130 */
1131 if (likely(rq->execution_mask == engine->mask)__builtin_expect(!!(rq->execution_mask == engine->mask)
, 1)
) {
1132 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID)((void)0);
1133 if (rq_prio(rq) != prio) {
1134 prio = rq_prio(rq);
1135 pl = i915_sched_lookup_priolist(engine, prio);
1136 }
1137 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))((void)0);
1138
1139 list_move(&rq->sched.link, pl);
1140 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1141
1142 /* Check in case we rollback so far we wrap [size/2] */
1143 if (intel_ring_direction(rq->ring,
1144 rq->tail,
1145 rq->ring->tail + 8) > 0)
1146 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2));
1147
1148 active = rq;
1149 } else {
1150 struct intel_engine_cs *owner = rq->context->engine;
1151
1152 WRITE_ONCE(rq->engine, owner)({ typeof(rq->engine) __tmp = (owner); *(volatile typeof(rq
->engine) *)&(rq->engine) = __tmp; __tmp; })
;
1153 owner->submit_request(rq);
1154 active = NULL((void *)0);
1155 }
1156 }
1157
1158 return active;
1159}
1160
1161struct i915_request *
1162execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1163{
1164 struct intel_engine_cs *engine =
1165 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
1166
1167 return __unwind_incomplete_requests(engine);
1168}
1169
1170static inline void
1171execlists_context_status_change(struct i915_request *rq, unsigned long status)
1172{
1173 /*
1174 * Only used when GVT-g is enabled now. When GVT-g is disabled,
1175 * The compiler should eliminate this function as dead-code.
1176 */
1177 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)0)
1178 return;
1179
1180#ifdef notyet
1181 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1182 status, rq);
1183#endif
1184}
1185
1186static void intel_engine_context_in(struct intel_engine_cs *engine)
1187{
1188 unsigned long flags;
1189
1190 if (atomic_add_unless(&engine->stats.active, 1, 0))
1191 return;
1192
1193 write_seqlock_irqsave(&engine->stats.lock, flags)do { flags = 0; __write_seqlock_irqsave(&engine->stats
.lock); } while (0)
;
1194 if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1195 engine->stats.start = ktime_get();
1196 atomic_inc(&engine->stats.active)__sync_fetch_and_add(&engine->stats.active, 1);
1197 }
1198 write_sequnlock_irqrestore(&engine->stats.lock, flags)do { (void)(flags); __write_sequnlock_irqrestore(&engine->
stats.lock); } while (0)
;
1199}
1200
1201static void intel_engine_context_out(struct intel_engine_cs *engine)
1202{
1203 unsigned long flags;
1204
1205 GEM_BUG_ON(!atomic_read(&engine->stats.active))((void)0);
1206
1207 if (atomic_add_unless(&engine->stats.active, -1, 1))
1208 return;
1209
1210 write_seqlock_irqsave(&engine->stats.lock, flags)do { flags = 0; __write_seqlock_irqsave(&engine->stats
.lock); } while (0)
;
1211 if (atomic_dec_and_test(&engine->stats.active)(__sync_sub_and_fetch((&engine->stats.active), 1) == 0
)
) {
1212 engine->stats.total =
1213 ktime_add(engine->stats.total,
1214 ktime_sub(ktime_get(), engine->stats.start));
1215 }
1216 write_sequnlock_irqrestore(&engine->stats.lock, flags)do { (void)(flags); __write_sequnlock_irqrestore(&engine->
stats.lock); } while (0)
;
1217}
1218
1219static void
1220execlists_check_context(const struct intel_context *ce,
1221 const struct intel_engine_cs *engine)
1222{
1223 const struct intel_ring *ring = ce->ring;
1224 u32 *regs = ce->lrc_reg_state;
1225 bool_Bool valid = true1;
1226 int x;
1227
1228 if (regs[CTX_RING_START(0x08 + 1)] != i915_ggtt_offset(ring->vma)) {
1229 pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n"
, engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring->
vma))
1230 engine->name,printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n"
, engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring->
vma))
1231 regs[CTX_RING_START],printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n"
, engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring->
vma))
1232 i915_ggtt_offset(ring->vma))printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n"
, engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring->
vma))
;
1233 regs[CTX_RING_START(0x08 + 1)] = i915_ggtt_offset(ring->vma);
1234 valid = false0;
1235 }
1236
1237 if ((regs[CTX_RING_CTL(0x0a + 1)] & ~(RING_WAIT(1 << 11) | RING_WAIT_SEMAPHORE(1 << 10))) !=
1238 (RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) | RING_VALID0x00000001)) {
1239 pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n"
, engine->name, regs[(0x0a + 1)], (u32)(((ring->size) -
(1 << 12)) | 0x00000001))
1240 engine->name,printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n"
, engine->name, regs[(0x0a + 1)], (u32)(((ring->size) -
(1 << 12)) | 0x00000001))
1241 regs[CTX_RING_CTL],printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n"
, engine->name, regs[(0x0a + 1)], (u32)(((ring->size) -
(1 << 12)) | 0x00000001))
1242 (u32)(RING_CTL_SIZE(ring->size) | RING_VALID))printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n"
, engine->name, regs[(0x0a + 1)], (u32)(((ring->size) -
(1 << 12)) | 0x00000001))
;
1243 regs[CTX_RING_CTL(0x0a + 1)] = RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) | RING_VALID0x00000001;
1244 valid = false0;
1245 }
1246
1247 x = lrc_ring_mi_mode(engine);
1248 if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING(1 << 8)) {
1249 pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",printk("\0013" "%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n"
, engine->name, regs[x + 1])
1250 engine->name, regs[x + 1])printk("\0013" "%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n"
, engine->name, regs[x + 1])
;
1251 regs[x + 1] &= ~STOP_RING(1 << 8);
1252 regs[x + 1] |= STOP_RING(1 << 8) << 16;
1253 valid = false0;
1254 }
1255
1256 WARN_ONCE(!valid, "Invalid lrc state found before submission\n")({ static int __warned; int __ret = !!(!valid); if (__ret &&
!__warned) { printf("Invalid lrc state found before submission\n"
); __warned = 1; } __builtin_expect(!!(__ret), 0); })
;
1257}
1258
1259static void restore_default_state(struct intel_context *ce,
1260 struct intel_engine_cs *engine)
1261{
1262 u32 *regs;
1263
1264 regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE)__builtin_memset((ce->lrc_reg_state), (0), (engine->context_size
- (1 << 12)))
;
1265 execlists_init_reg_state(regs, ce, engine, ce->ring, true1);
1266
1267 ce->runtime.last = intel_context_get_runtime(ce);
1268}
1269
1270static void reset_active(struct i915_request *rq,
1271 struct intel_engine_cs *engine)
1272{
1273 struct intel_context * const ce = rq->context;
1274 u32 head;
1275
1276 /*
1277 * The executing context has been cancelled. We want to prevent
1278 * further execution along this context and propagate the error on
1279 * to anything depending on its results.
1280 *
1281 * In __i915_request_submit(), we apply the -EIO and remove the
1282 * requests' payloads for any banned requests. But first, we must
1283 * rewind the context back to the start of the incomplete request so
1284 * that we do not jump back into the middle of the batch.
1285 *
1286 * We preserve the breadcrumbs and semaphores of the incomplete
1287 * requests so that inter-timeline dependencies (i.e other timelines)
1288 * remain correctly ordered. And we defer to __i915_request_submit()
1289 * so that all asynchronous waits are correctly handled.
1290 */
1291 ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1292 rq->fence.context, rq->fence.seqno)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1293
1294 /* On resubmission of the active request, payload will be scrubbed */
1295 if (i915_request_completed(rq))
1296 head = rq->tail;
1297 else
1298 head = active_request(ce->timeline, rq)->head;
1299 head = intel_ring_wrap(ce->ring, head);
1300
1301 /* Scrub the context image to prevent replaying the previous batch */
1302 restore_default_state(ce, engine);
1303 __execlists_update_reg_state(ce, engine, head);
1304
1305 /* We've switched away, so this should be a no-op, but intent matters */
1306 ce->lrc.desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2));
1307}
1308
1309static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1310{
1311#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
1312 ce->runtime.num_underflow += dt < 0;
1313 ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt)({ u32 __max_a = (ce->runtime.max_underflow); u32 __max_b =
(-dt); __max_a > __max_b ? __max_a : __max_b; })
;
1314#endif
1315}
1316
1317static void intel_context_update_runtime(struct intel_context *ce)
1318{
1319 u32 old;
1320 s32 dt;
1321
1322 if (intel_context_is_barrier(ce))
1323 return;
1324
1325 old = ce->runtime.last;
1326 ce->runtime.last = intel_context_get_runtime(ce);
1327 dt = ce->runtime.last - old;
1328
1329 if (unlikely(dt <= 0)__builtin_expect(!!(dt <= 0), 0)) {
1330 CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",do { const struct intel_context *ce__ = (ce); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (ce__->
engine); do { } while (0); } while (0); } while (0)
1331 old, ce->runtime.last, dt)do { const struct intel_context *ce__ = (ce); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (ce__->
engine); do { } while (0); } while (0); } while (0)
;
1332 st_update_runtime_underflow(ce, dt);
1333 return;
1334 }
1335
1336 ewma_runtime_add(&ce->runtime.avg, dt);
1337 ce->runtime.total += dt;
1338}
1339
1340static inline struct intel_engine_cs *
1341__execlists_schedule_in(struct i915_request *rq)
1342{
1343 struct intel_engine_cs * const engine = rq->engine;
1344 struct intel_context * const ce = rq->context;
1345
1346 intel_context_get(ce);
1347
1348 if (unlikely(intel_context_is_banned(ce))__builtin_expect(!!(intel_context_is_banned(ce)), 0))
1349 reset_active(rq, engine);
1350
1351 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
1352 execlists_check_context(ce, engine);
1353
1354 if (ce->tag) {
1355 /* Use a fixed tag for OA and friends */
1356 GEM_BUG_ON(ce->tag <= BITS_PER_LONG)((void)0);
1357 ce->lrc.ccid = ce->tag;
1358 } else {
1359 /* We don't need a strict matching tag, just different values */
1360 unsigned int tag = ffs(READ_ONCE(engine->context_tag)({ typeof(engine->context_tag) __tmp = *(volatile typeof(engine
->context_tag) *)&(engine->context_tag); membar_datadep_consumer
(); __tmp; })
);
1361
1362 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG)((void)0);
1363 clear_bit(tag - 1, &engine->context_tag);
1364 ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT37 - 32);
1365
1366 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID)extern char _ctassert[(!(64 > ((1<<11) - 1))) ? 1 : -
1 ] __attribute__((__unused__))
;
1367 }
1368
1369 ce->lrc.ccid |= engine->execlists.ccid;
1370
1371 __intel_gt_pm_get(engine->gt);
1372 if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)__sync_fetch_and_add(&engine->fw_active, 1))
1373 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1374 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1375 intel_engine_context_in(engine);
1376
1377 return engine;
1378}
1379
1380static inline struct i915_request *
1381execlists_schedule_in(struct i915_request *rq, int idx)
1382{
1383 struct intel_context * const ce = rq->context;
1384 struct intel_engine_cs *old;
1385
1386 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine))((void)0);
1387 trace_i915_request_in(rq, idx);
1388
1389 old = READ_ONCE(ce->inflight)({ typeof(ce->inflight) __tmp = *(volatile typeof(ce->inflight
) *)&(ce->inflight); membar_datadep_consumer(); __tmp;
})
;
1390 do {
1391 if (!old) {
1392 WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq))({ typeof(ce->inflight) __tmp = (__execlists_schedule_in(rq
)); *(volatile typeof(ce->inflight) *)&(ce->inflight
) = __tmp; __tmp; })
;
1393 break;
1394 }
1395 } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))({ __typeof(&ce->inflight) __op = (__typeof((&ce->
inflight)))(&old); __typeof(*(&ce->inflight)) __o =
*__op; __typeof(*(&ce->inflight)) __p = __sync_val_compare_and_swap
((&ce->inflight), (__o), (({ unsigned long __v = (unsigned
long)(old); (typeof(old))(__v + 1); }))); if (__p != __o) *__op
= __p; (__p == __o); })
);
1396
1397 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine)((void)0);
1398 return i915_request_get(rq);
1399}
1400
1401static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
1402{
1403 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
1404 struct i915_request *next = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
1405
1406 if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
1407 tasklet_hi_schedule(&ve->base.execlists.tasklet);
1408}
1409
1410static inline void
1411__execlists_schedule_out(struct i915_request *rq,
1412 struct intel_engine_cs * const engine,
1413 unsigned int ccid)
1414{
1415 struct intel_context * const ce = rq->context;
1416
1417 /*
1418 * NB process_csb() is not under the engine->active.lock and hence
1419 * schedule_out can race with schedule_in meaning that we should
1420 * refrain from doing non-trivial work here.
1421 */
1422
1423 /*
1424 * If we have just completed this context, the engine may now be
1425 * idle and we want to re-enter powersaving.
1426 */
1427 if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1428 i915_request_completed(rq))
1429 intel_engine_add_retire(engine, ce->timeline);
1430
1431 ccid >>= GEN11_SW_CTX_ID_SHIFT37 - 32;
1432 ccid &= GEN12_MAX_CONTEXT_HW_ID((1<<11) - 1);
1433 if (ccid < BITS_PER_LONG64) {
1434 GEM_BUG_ON(ccid == 0)((void)0);
1435 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag))((void)0);
1436 set_bit(ccid - 1, &engine->context_tag);
1437 }
1438
1439 intel_context_update_runtime(ce);
1440 intel_engine_context_out(engine);
1441 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1442 if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)__sync_sub_and_fetch((&engine->fw_active), 1))
1443 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1444 intel_gt_pm_put_async(engine->gt);
1445
1446 /*
1447 * If this is part of a virtual engine, its next request may
1448 * have been blocked waiting for access to the active context.
1449 * We have to kick all the siblings again in case we need to
1450 * switch (e.g. the next request is not runnable on this
1451 * engine). Hopefully, we will already have submitted the next
1452 * request before the tasklet runs and do not need to rebuild
1453 * each virtual tree and kick everyone again.
1454 */
1455 if (ce->engine != engine)
1456 kick_siblings(rq, ce);
1457
1458 intel_context_put(ce);
1459}
1460
1461static inline void
1462execlists_schedule_out(struct i915_request *rq)
1463{
1464 struct intel_context * const ce = rq->context;
1465 struct intel_engine_cs *cur, *old;
1466 u32 ccid;
1467
1468 trace_i915_request_out(rq);
1469
1470 ccid = rq->context->lrc.ccid;
1471 old = READ_ONCE(ce->inflight)({ typeof(ce->inflight) __tmp = *(volatile typeof(ce->inflight
) *)&(ce->inflight); membar_datadep_consumer(); __tmp;
})
;
1472 do
1473 cur = ptr_unmask_bits(old, 2)((unsigned long)(old) & ((1UL << (2)) - 1)) ? ptr_dec(old)({ unsigned long __v = (unsigned long)(old); (typeof(old))(__v
- 1); })
: NULL((void *)0);
1474 while (!try_cmpxchg(&ce->inflight, &old, cur)({ __typeof(&ce->inflight) __op = (__typeof((&ce->
inflight)))(&old); __typeof(*(&ce->inflight)) __o =
*__op; __typeof(*(&ce->inflight)) __p = __sync_val_compare_and_swap
((&ce->inflight), (__o), (cur)); if (__p != __o) *__op
= __p; (__p == __o); })
);
1475 if (!cur)
1476 __execlists_schedule_out(rq, old, ccid);
1477
1478 i915_request_put(rq);
1479}
1480
1481static u64 execlists_update_context(struct i915_request *rq)
1482{
1483 struct intel_context *ce = rq->context;
1484 u64 desc = ce->lrc.desc;
1485 u32 tail, prev;
1486
1487 /*
1488 * WaIdleLiteRestore:bdw,skl
1489 *
1490 * We should never submit the context with the same RING_TAIL twice
1491 * just in case we submit an empty ring, which confuses the HW.
1492 *
1493 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1494 * the normal request to be able to always advance the RING_TAIL on
1495 * subsequent resubmissions (for lite restore). Should that fail us,
1496 * and we try and submit the same tail again, force the context
1497 * reload.
1498 *
1499 * If we need to return to a preempted context, we need to skip the
1500 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
1501 * HW has a tendency to ignore us rewinding the TAIL to the end of
1502 * an earlier request.
1503 */
1504 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail)((void)0);
1505 prev = rq->ring->tail;
1506 tail = intel_ring_set_tail(rq->ring, rq->tail);
1507 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)__builtin_expect(!!(intel_ring_direction(rq->ring, tail, prev
) <= 0), 0)
)
1508 desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2));
1509 ce->lrc_reg_state[CTX_RING_TAIL(0x06 + 1)] = tail;
1510 rq->tail = rq->wa_tail;
1511
1512 /*
1513 * Make sure the context image is complete before we submit it to HW.
1514 *
1515 * Ostensibly, writes (including the WCB) should be flushed prior to
1516 * an uncached write such as our mmio register access, the empirical
1517 * evidence (esp. on Braswell) suggests that the WC write into memory
1518 * may not be visible to the HW prior to the completion of the UC
1519 * register write and that we may begin execution from the context
1520 * before its image is complete leading to invalid PD chasing.
1521 */
1522 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
1523
1524 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE(1ULL << (2));
1525 return desc;
1526}
1527
1528static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1529{
1530 if (execlists->ctrl_reg) {
1531 writel(lower_32_bits(desc), execlists->submit_reg + port * 2)iowrite32(((u32)(desc)), execlists->submit_reg + port * 2);
1532 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1)iowrite32(((u32)(((desc) >> 16) >> 16)), execlists
->submit_reg + port * 2 + 1)
;
1533 } else {
1534 writel(upper_32_bits(desc), execlists->submit_reg)iowrite32(((u32)(((desc) >> 16) >> 16)), execlists
->submit_reg)
;
1535 writel(lower_32_bits(desc), execlists->submit_reg)iowrite32(((u32)(desc)), execlists->submit_reg);
1536 }
1537}
1538
1539static __maybe_unused__attribute__((__unused__)) char *
1540dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
1541{
1542 if (!rq)
1543 return "";
1544
1545 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1546 prefix,
1547 rq->context->lrc.ccid,
1548 rq->fence.context, rq->fence.seqno,
1549 i915_request_completed(rq) ? "!" :
1550 i915_request_started(rq) ? "*" :
1551 "",
1552 rq_prio(rq));
1553
1554 return buf;
1555}
1556
1557static __maybe_unused__attribute__((__unused__)) void
1558trace_ports(const struct intel_engine_execlists *execlists,
1559 const char *msg,
1560 struct i915_request * const *ports)
1561{
1562 const struct intel_engine_cs *engine =
1563 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
1564 char __maybe_unused__attribute__((__unused__)) p0[40], p1[40];
1565
1566 if (!ports[0])
1567 return;
1568
1569 ENGINE_TRACE(engine, "%s { %s%s }\n", msg,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1570 dump_port(p0, sizeof(p0), "", ports[0]),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1571 dump_port(p1, sizeof(p1), ", ", ports[1]))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1572}
1573
1574static inline bool_Bool
1575reset_in_progress(const struct intel_engine_execlists *execlists)
1576{
1577 return unlikely(!__tasklet_is_enabled(&execlists->tasklet))__builtin_expect(!!(!__tasklet_is_enabled(&execlists->
tasklet)), 0)
;
1578}
1579
1580static __maybe_unused__attribute__((__unused__)) bool_Bool
1581assert_pending_valid(const struct intel_engine_execlists *execlists,
1582 const char *msg)
1583{
1584 struct intel_engine_cs *engine =
Value stored to 'engine' during its initialization is never read
1585 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
1586 struct i915_request * const *port, *rq;
1587 struct intel_context *ce = NULL((void *)0);
1588 bool_Bool sentinel = false0;
1589 u32 ccid = -1;
1590
1591 trace_ports(execlists, msg, execlists->pending);
1592
1593 /* We may be messing around with the lists during reset, lalala */
1594 if (reset_in_progress(execlists))
1595 return true1;
1596
1597 if (!execlists->pending[0]) {
1598 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",do { } while (0)
1599 engine->name)do { } while (0);
1600 return false0;
1601 }
1602
1603 if (execlists->pending[execlists_num_ports(execlists)]) {
1604 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",do { } while (0)
1605 engine->name, execlists_num_ports(execlists))do { } while (0);
1606 return false0;
1607 }
1608
1609 for (port = execlists->pending; (rq = *port); port++) {
1610 unsigned long flags;
1611 bool_Bool ok = true1;
1612
1613 GEM_BUG_ON(!kref_read(&rq->fence.refcount))((void)0);
1614 GEM_BUG_ON(!i915_request_is_active(rq))((void)0);
1615
1616 if (ce == rq->context) {
1617 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",do { } while (0)
1618 engine->name,do { } while (0)
1619 ce->timeline->fence_context,do { } while (0)
1620 port - execlists->pending)do { } while (0);
1621 return false0;
1622 }
1623 ce = rq->context;
1624
1625 if (ccid == ce->lrc.ccid) {
1626 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",do { } while (0)
1627 engine->name,do { } while (0)
1628 ccid, ce->timeline->fence_context,do { } while (0)
1629 port - execlists->pending)do { } while (0);
1630 return false0;
1631 }
1632 ccid = ce->lrc.ccid;
1633
1634 /*
1635 * Sentinels are supposed to be the last request so they flush
1636 * the current execution off the HW. Check that they are the only
1637 * request in the pending submission.
1638 */
1639 if (sentinel) {
1640 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",do { } while (0)
1641 engine->name,do { } while (0)
1642 ce->timeline->fence_context,do { } while (0)
1643 port - execlists->pending)do { } while (0);
1644 return false0;
1645 }
1646 sentinel = i915_request_has_sentinel(rq);
1647
1648 /* Hold tightly onto the lock to prevent concurrent retires! */
1649 if (!spin_trylock_irqsave(&rq->lock, flags)({ (void)(flags); mtx_enter_try(&rq->lock) ? 1 : 0; }))
1650 continue;
1651
1652 if (i915_request_completed(rq))
1653 goto unlock;
1654
1655 if (i915_active_is_idle(&ce->active) &&
1656 !intel_context_is_barrier(ce)) {
1657 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",do { } while (0)
1658 engine->name,do { } while (0)
1659 ce->timeline->fence_context,do { } while (0)
1660 port - execlists->pending)do { } while (0);
1661 ok = false0;
1662 goto unlock;
1663 }
1664
1665 if (!i915_vma_is_pinned(ce->state)) {
1666 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",do { } while (0)
1667 engine->name,do { } while (0)
1668 ce->timeline->fence_context,do { } while (0)
1669 port - execlists->pending)do { } while (0);
1670 ok = false0;
1671 goto unlock;
1672 }
1673
1674 if (!i915_vma_is_pinned(ce->ring->vma)) {
1675 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",do { } while (0)
1676 engine->name,do { } while (0)
1677 ce->timeline->fence_context,do { } while (0)
1678 port - execlists->pending)do { } while (0);
1679 ok = false0;
1680 goto unlock;
1681 }
1682
1683unlock:
1684 spin_unlock_irqrestore(&rq->lock, flags)do { (void)(flags); mtx_leave(&rq->lock); } while (0);
1685 if (!ok)
1686 return false0;
1687 }
1688
1689 return ce;
1690}
1691
1692static void execlists_submit_ports(struct intel_engine_cs *engine)
1693{
1694 struct intel_engine_execlists *execlists = &engine->execlists;
1695 unsigned int n;
1696
1697 GEM_BUG_ON(!assert_pending_valid(execlists, "submit"))((void)0);
1698
1699 /*
1700 * We can skip acquiring intel_runtime_pm_get() here as it was taken
1701 * on our behalf by the request (see i915_gem_mark_busy()) and it will
1702 * not be relinquished until the device is idle (see
1703 * i915_gem_idle_work_handler()). As a precaution, we make sure
1704 * that all ELSP are drained i.e. we have processed the CSB,
1705 * before allowing ourselves to idle and calling intel_runtime_pm_put().
1706 */
1707 GEM_BUG_ON(!intel_engine_pm_is_awake(engine))((void)0);
1708
1709 /*
1710 * ELSQ note: the submit queue is not cleared after being submitted
1711 * to the HW so we need to make sure we always clean it up. This is
1712 * currently ensured by the fact that we always write the same number
1713 * of elsq entries, keep this in mind before changing the loop below.
1714 */
1715 for (n = execlists_num_ports(execlists); n--; ) {
1716 struct i915_request *rq = execlists->pending[n];
1717
1718 write_desc(execlists,
1719 rq ? execlists_update_context(rq) : 0,
1720 n);
1721 }
1722
1723 /* we need to manually load the submit queue */
1724 if (execlists->ctrl_reg)
1725 writel(EL_CTRL_LOAD, execlists->ctrl_reg)iowrite32((1 << 0), execlists->ctrl_reg);
1726}
1727
1728static bool_Bool ctx_single_port_submission(const struct intel_context *ce)
1729{
1730 return (IS_ENABLED(CONFIG_DRM_I915_GVT)0 &&
1731 intel_context_force_single_submission(ce));
1732}
1733
1734static bool_Bool can_merge_ctx(const struct intel_context *prev,
1735 const struct intel_context *next)
1736{
1737 if (prev != next)
1738 return false0;
1739
1740 if (ctx_single_port_submission(prev))
1741 return false0;
1742
1743 return true1;
1744}
1745
1746static unsigned long i915_request_flags(const struct i915_request *rq)
1747{
1748 return READ_ONCE(rq->fence.flags)({ typeof(rq->fence.flags) __tmp = *(volatile typeof(rq->
fence.flags) *)&(rq->fence.flags); membar_datadep_consumer
(); __tmp; })
;
1749}
1750
1751static bool_Bool can_merge_rq(const struct i915_request *prev,
1752 const struct i915_request *next)
1753{
1754 GEM_BUG_ON(prev == next)((void)0);
1755 GEM_BUG_ON(!assert_priority_queue(prev, next))((void)0);
1756
1757 /*
1758 * We do not submit known completed requests. Therefore if the next
1759 * request is already completed, we can pretend to merge it in
1760 * with the previous context (and we will skip updating the ELSP
1761 * and tracking). Thus hopefully keeping the ELSP full with active
1762 * contexts, despite the best efforts of preempt-to-busy to confuse
1763 * us.
1764 */
1765 if (i915_request_completed(next))
1766 return true1;
1767
1768 if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &__builtin_expect(!!((i915_request_flags(prev) ^ i915_request_flags
(next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) | (
1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
1769 (BIT(I915_FENCE_FLAG_NOPREEMPT) |__builtin_expect(!!((i915_request_flags(prev) ^ i915_request_flags
(next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) | (
1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
1770 BIT(I915_FENCE_FLAG_SENTINEL)))__builtin_expect(!!((i915_request_flags(prev) ^ i915_request_flags
(next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) | (
1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
)
1771 return false0;
1772
1773 if (!can_merge_ctx(prev->context, next->context))
1774 return false0;
1775
1776 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno))((void)0);
1777 return true1;
1778}
1779
1780static void virtual_update_register_offsets(u32 *regs,
1781 struct intel_engine_cs *engine)
1782{
1783 set_offsets(regs, reg_offsets(engine), engine, false0);
1784}
1785
1786static bool_Bool virtual_matches(const struct virtual_engine *ve,
1787 const struct i915_request *rq,
1788 const struct intel_engine_cs *engine)
1789{
1790 const struct intel_engine_cs *inflight;
1791
1792 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1793 return false0;
1794
1795 /*
1796 * We track when the HW has completed saving the context image
1797 * (i.e. when we have seen the final CS event switching out of
1798 * the context) and must not overwrite the context image before
1799 * then. This restricts us to only using the active engine
1800 * while the previous virtualized request is inflight (so
1801 * we reuse the register offsets). This is a very small
1802 * hystersis on the greedy seelction algorithm.
1803 */
1804 inflight = intel_context_inflight(&ve->context)({ unsigned long __v = (unsigned long)(({ typeof((&ve->
context)->inflight) __tmp = *(volatile typeof((&ve->
context)->inflight) *)&((&ve->context)->inflight
); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof((
&ve->context)->inflight) __tmp = *(volatile typeof(
(&ve->context)->inflight) *)&((&ve->context
)->inflight); membar_datadep_consumer(); __tmp; })))(__v &
-(1UL << (2))); })
;
1805 if (inflight && inflight != engine)
1806 return false0;
1807
1808 return true1;
1809}
1810
1811static void virtual_xfer_context(struct virtual_engine *ve,
1812 struct intel_engine_cs *engine)
1813{
1814 unsigned int n;
1815
1816 if (likely(engine == ve->siblings[0])__builtin_expect(!!(engine == ve->siblings[0]), 1))
1817 return;
1818
1819 GEM_BUG_ON(READ_ONCE(ve->context.inflight))((void)0);
1820 if (!intel_engine_has_relative_mmio(engine))
1821 virtual_update_register_offsets(ve->context.lrc_reg_state,
1822 engine);
1823
1824 /*
1825 * Move the bound engine to the top of the list for
1826 * future execution. We then kick this tasklet first
1827 * before checking others, so that we preferentially
1828 * reuse this set of bound registers.
1829 */
1830 for (n = 1; n < ve->num_siblings; n++) {
1831 if (ve->siblings[n] == engine) {
1832 swap(ve->siblings[n], ve->siblings[0])do { __typeof(ve->siblings[n]) __tmp = (ve->siblings[n]
); (ve->siblings[n]) = (ve->siblings[0]); (ve->siblings
[0]) = __tmp; } while(0)
;
1833 break;
1834 }
1835 }
1836}
1837
1838#define for_each_waiter(p__, rq__)for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->wait_link
) *__mptr = ((&(rq__)->sched.waiters_list)->next);
(__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}); &p__->wait_link != (&(rq__
)->sched.waiters_list); p__ = ({ const __typeof( ((__typeof
(*p__) *)0)->wait_link ) *__mptr = (p__->wait_link.next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}))
\
1839 list_for_each_entry_lockless(p__, \for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->wait_link
) *__mptr = ((&(rq__)->sched.waiters_list)->next);
(__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}); &p__->wait_link != (&(rq__
)->sched.waiters_list); p__ = ({ const __typeof( ((__typeof
(*p__) *)0)->wait_link ) *__mptr = (p__->wait_link.next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}))
1840 &(rq__)->sched.waiters_list, \for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->wait_link
) *__mptr = ((&(rq__)->sched.waiters_list)->next);
(__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}); &p__->wait_link != (&(rq__
)->sched.waiters_list); p__ = ({ const __typeof( ((__typeof
(*p__) *)0)->wait_link ) *__mptr = (p__->wait_link.next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}))
1841 wait_link)for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->wait_link
) *__mptr = ((&(rq__)->sched.waiters_list)->next);
(__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}); &p__->wait_link != (&(rq__
)->sched.waiters_list); p__ = ({ const __typeof( ((__typeof
(*p__) *)0)->wait_link ) *__mptr = (p__->wait_link.next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), wait_link) );}))
1842
1843#define for_each_signaler(p__, rq__)for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->signal_link
) *__mptr = ((&(rq__)->sched.signalers_list)->next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), signal_link) );}); &p__->signal_link != (&
(rq__)->sched.signalers_list); p__ = ({ const __typeof( ((
__typeof(*p__) *)0)->signal_link ) *__mptr = (p__->signal_link
.next); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*p__), signal_link) );}))
\
1844 list_for_each_entry_rcu(p__, \for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->signal_link
) *__mptr = ((&(rq__)->sched.signalers_list)->next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), signal_link) );}); &p__->signal_link != (&
(rq__)->sched.signalers_list); p__ = ({ const __typeof( ((
__typeof(*p__) *)0)->signal_link ) *__mptr = (p__->signal_link
.next); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*p__), signal_link) );}))
1845 &(rq__)->sched.signalers_list, \for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->signal_link
) *__mptr = ((&(rq__)->sched.signalers_list)->next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), signal_link) );}); &p__->signal_link != (&
(rq__)->sched.signalers_list); p__ = ({ const __typeof( ((
__typeof(*p__) *)0)->signal_link ) *__mptr = (p__->signal_link
.next); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*p__), signal_link) );}))
1846 signal_link)for (p__ = ({ const __typeof( ((__typeof(*p__) *)0)->signal_link
) *__mptr = ((&(rq__)->sched.signalers_list)->next
); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p__), signal_link) );}); &p__->signal_link != (&
(rq__)->sched.signalers_list); p__ = ({ const __typeof( ((
__typeof(*p__) *)0)->signal_link ) *__mptr = (p__->signal_link
.next); (__typeof(*p__) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*p__), signal_link) );}))
1847
1848static void defer_request(struct i915_request *rq, struct list_head * const pl)
1849{
1850 DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
1851
1852 /*
1853 * We want to move the interrupted request to the back of
1854 * the round-robin list (i.e. its priority level), but
1855 * in doing so, we must then move all requests that were in
1856 * flight and were waiting for the interrupted request to
1857 * be run after it again.
1858 */
1859 do {
1860 struct i915_dependency *p;
1861
1862 GEM_BUG_ON(i915_request_is_active(rq))((void)0);
1863 list_move_tail(&rq->sched.link, pl);
1864
1865 for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->wait_link
) *__mptr = ((&(rq)->sched.waiters_list)->next); (
__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), wait_link) );}); &p->wait_link != (&(rq)->
sched.waiters_list); p = ({ const __typeof( ((__typeof(*p) *)
0)->wait_link ) *__mptr = (p->wait_link.next); (__typeof
(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof(*p), wait_link
) );}))
{
1866 struct i915_request *w =
1867 container_of(p->waiter, typeof(*w), sched)({ const __typeof( ((typeof(*w) *)0)->sched ) *__mptr = (p
->waiter); (typeof(*w) *)( (char *)__mptr - __builtin_offsetof
(typeof(*w), sched) );})
;
1868
1869 if (p->flags & I915_DEPENDENCY_WEAK(1UL << (2)))
1870 continue;
1871
1872 /* Leave semaphores spinning on the other engines */
1873 if (w->engine != rq->engine)
1874 continue;
1875
1876 /* No waiter should start before its signaler */
1877 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&((void)0)
1878 i915_request_started(w) &&((void)0)
1879 !i915_request_completed(rq))((void)0);
1880
1881 GEM_BUG_ON(i915_request_is_active(w))((void)0);
1882 if (!i915_request_is_ready(w))
1883 continue;
1884
1885 if (rq_prio(w) < rq_prio(rq))
1886 continue;
1887
1888 GEM_BUG_ON(rq_prio(w) > rq_prio(rq))((void)0);
1889 list_move_tail(&w->sched.link, &list);
1890 }
1891
1892 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link)(list_empty(&list) ? ((void *)0) : ({ const __typeof( ((typeof
(*rq) *)0)->sched.link ) *__mptr = ((&list)->next);
(typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(typeof(
*rq), sched.link) );}))
;
1893 } while (rq);
1894}
1895
1896static void defer_active(struct intel_engine_cs *engine)
1897{
1898 struct i915_request *rq;
1899
1900 rq = __unwind_incomplete_requests(engine);
1901 if (!rq)
1902 return;
1903
1904 defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1905}
1906
1907static bool_Bool
1908need_timeslice(const struct intel_engine_cs *engine,
1909 const struct i915_request *rq,
1910 const struct rb_node *rb)
1911{
1912 int hint;
1913
1914 if (!intel_engine_has_timeslices(engine))
1915 return false0;
1916
1917 hint = engine->execlists.queue_priority_hint;
1918
1919 if (rb) {
1920 const struct virtual_engine *ve =
1921 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
1922 const struct intel_engine_cs *inflight =
1923 intel_context_inflight(&ve->context)({ unsigned long __v = (unsigned long)(({ typeof((&ve->
context)->inflight) __tmp = *(volatile typeof((&ve->
context)->inflight) *)&((&ve->context)->inflight
); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof((
&ve->context)->inflight) __tmp = *(volatile typeof(
(&ve->context)->inflight) *)&((&ve->context
)->inflight); membar_datadep_consumer(); __tmp; })))(__v &
-(1UL << (2))); })
;
1924
1925 if (!inflight || inflight == engine) {
1926 struct i915_request *next;
1927
1928 rcu_read_lock();
1929 next = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
1930 if (next)
1931 hint = max(hint, rq_prio(next))(((hint)>(rq_prio(next)))?(hint):(rq_prio(next)));
1932 rcu_read_unlock();
1933 }
1934 }
1935
1936 if (!list_is_last(&rq->sched.link, &engine->active.requests))
1937 hint = max(hint, rq_prio(list_next_entry(rq, sched.link)))(((hint)>(rq_prio(({ const __typeof( ((typeof(*(rq)) *)0)->
sched.link ) *__mptr = (((rq)->sched.link.next)); (typeof(
*(rq)) *)( (char *)__mptr - __builtin_offsetof(typeof(*(rq)),
sched.link) );}))))?(hint):(rq_prio(({ const __typeof( ((typeof
(*(rq)) *)0)->sched.link ) *__mptr = (((rq)->sched.link
.next)); (typeof(*(rq)) *)( (char *)__mptr - __builtin_offsetof
(typeof(*(rq)), sched.link) );}))))
;
1938
1939 GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE)((void)0);
1940 return hint >= effective_prio(rq);
1941}
1942
1943static bool_Bool
1944timeslice_yield(const struct intel_engine_execlists *el,
1945 const struct i915_request *rq)
1946{
1947 /*
1948 * Once bitten, forever smitten!
1949 *
1950 * If the active context ever busy-waited on a semaphore,
1951 * it will be treated as a hog until the end of its timeslice (i.e.
1952 * until it is scheduled out and replaced by a new submission,
1953 * possibly even its own lite-restore). The HW only sends an interrupt
1954 * on the first miss, and we do know if that semaphore has been
1955 * signaled, or even if it is now stuck on another semaphore. Play
1956 * safe, yield if it might be stuck -- it will be given a fresh
1957 * timeslice in the near future.
1958 */
1959 return rq->context->lrc.ccid == READ_ONCE(el->yield)({ typeof(el->yield) __tmp = *(volatile typeof(el->yield
) *)&(el->yield); membar_datadep_consumer(); __tmp; })
;
1960}
1961
1962static bool_Bool
1963timeslice_expired(const struct intel_engine_execlists *el,
1964 const struct i915_request *rq)
1965{
1966 return timer_expired(&el->timer) || timeslice_yield(el, rq);
1967}
1968
1969static int
1970switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
1971{
1972 if (list_is_last(&rq->sched.link, &engine->active.requests))
1973 return engine->execlists.queue_priority_hint;
1974
1975 return rq_prio(list_next_entry(rq, sched.link)({ const __typeof( ((typeof(*(rq)) *)0)->sched.link ) *__mptr
= (((rq)->sched.link.next)); (typeof(*(rq)) *)( (char *)__mptr
- __builtin_offsetof(typeof(*(rq)), sched.link) );})
);
1976}
1977
1978static inline unsigned long
1979timeslice(const struct intel_engine_cs *engine)
1980{
1981 return READ_ONCE(engine->props.timeslice_duration_ms)({ typeof(engine->props.timeslice_duration_ms) __tmp = *(volatile
typeof(engine->props.timeslice_duration_ms) *)&(engine
->props.timeslice_duration_ms); membar_datadep_consumer();
__tmp; })
;
1982}
1983
1984static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1985{
1986 const struct intel_engine_execlists *execlists = &engine->execlists;
1987 const struct i915_request *rq = *execlists->active;
1988
1989 if (!rq || i915_request_completed(rq))
1990 return 0;
1991
1992 if (READ_ONCE(execlists->switch_priority_hint)({ typeof(execlists->switch_priority_hint) __tmp = *(volatile
typeof(execlists->switch_priority_hint) *)&(execlists
->switch_priority_hint); membar_datadep_consumer(); __tmp;
})
< effective_prio(rq))
1993 return 0;
1994
1995 return timeslice(engine);
1996}
1997
1998static void set_timeslice(struct intel_engine_cs *engine)
1999{
2000 unsigned long duration;
2001
2002 if (!intel_engine_has_timeslices(engine))
2003 return;
2004
2005 duration = active_timeslice(engine);
2006 ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2007
2008 set_timer_ms(&engine->execlists.timer, duration);
2009}
2010
2011static void start_timeslice(struct intel_engine_cs *engine, int prio)
2012{
2013 struct intel_engine_execlists *execlists = &engine->execlists;
2014 unsigned long duration;
2015
2016 if (!intel_engine_has_timeslices(engine))
2017 return;
2018
2019 WRITE_ONCE(execlists->switch_priority_hint, prio)({ typeof(execlists->switch_priority_hint) __tmp = (prio);
*(volatile typeof(execlists->switch_priority_hint) *)&
(execlists->switch_priority_hint) = __tmp; __tmp; })
;
2020 if (prio == INT_MIN(-0x7fffffff-1))
2021 return;
2022
2023 if (timer_pending(&execlists->timer)(((&execlists->timer))->to_flags & 0x02))
2024 return;
2025
2026 duration = timeslice(engine);
2027 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2028 "start timeslicing, prio:%d, interval:%lu",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2029 prio, duration)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2030
2031 set_timer_ms(&execlists->timer, duration);
2032}
2033
2034static void record_preemption(struct intel_engine_execlists *execlists)
2035{
2036 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++)0;
2037}
2038
2039static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2040 const struct i915_request *rq)
2041{
2042 if (!rq)
2043 return 0;
2044
2045 /* Force a fast reset for terminated contexts (ignoring sysfs!) */
2046 if (unlikely(intel_context_is_banned(rq->context))__builtin_expect(!!(intel_context_is_banned(rq->context)),
0)
)
2047 return 1;
2048
2049 return READ_ONCE(engine->props.preempt_timeout_ms)({ typeof(engine->props.preempt_timeout_ms) __tmp = *(volatile
typeof(engine->props.preempt_timeout_ms) *)&(engine->
props.preempt_timeout_ms); membar_datadep_consumer(); __tmp; }
)
;
2050}
2051
2052static void set_preempt_timeout(struct intel_engine_cs *engine,
2053 const struct i915_request *rq)
2054{
2055 if (!intel_engine_has_preempt_reset(engine))
2056 return;
2057
2058 set_timer_ms(&engine->execlists.preempt,
2059 active_preempt_timeout(engine, rq));
2060}
2061
2062static inline void clear_ports(struct i915_request **ports, int count)
2063{
2064 memset_p((void **)ports, NULL((void *)0), count);
2065}
2066
2067static inline void
2068copy_ports(struct i915_request **dst, struct i915_request **src, int count)
2069{
2070 /* A memcpy_p() would be very useful here! */
2071 while (count--)
2072 WRITE_ONCE(*dst++, *src++)({ typeof(*dst++) __tmp = (*src++); *(volatile typeof(*dst++)
*)&(*dst++) = __tmp; __tmp; })
; /* avoid write tearing */
2073}
2074
2075static void execlists_dequeue(struct intel_engine_cs *engine)
2076{
2077 struct intel_engine_execlists * const execlists = &engine->execlists;
2078 struct i915_request **port = execlists->pending;
2079 struct i915_request ** const last_port = port + execlists->port_mask;
2080 struct i915_request * const *active;
2081 struct i915_request *last;
2082 struct rb_node *rb;
2083 bool_Bool submit = false0;
2084
2085 /*
2086 * Hardware submission is through 2 ports. Conceptually each port
2087 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2088 * static for a context, and unique to each, so we only execute
2089 * requests belonging to a single context from each ring. RING_HEAD
2090 * is maintained by the CS in the context image, it marks the place
2091 * where it got up to last time, and through RING_TAIL we tell the CS
2092 * where we want to execute up to this time.
2093 *
2094 * In this list the requests are in order of execution. Consecutive
2095 * requests from the same context are adjacent in the ringbuffer. We
2096 * can combine these requests into a single RING_TAIL update:
2097 *
2098 * RING_HEAD...req1...req2
2099 * ^- RING_TAIL
2100 * since to execute req2 the CS must first execute req1.
2101 *
2102 * Our goal then is to point each port to the end of a consecutive
2103 * sequence of requests as being the most optimal (fewest wake ups
2104 * and context switches) submission.
2105 */
2106
2107 for (rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
; rb; ) {
2108 struct virtual_engine *ve =
2109 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
2110 struct i915_request *rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
2111
2112 if (!rq) { /* lazily cleanup after another engine handled rq */
2113 rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->virtual)->rb_root), (rb))
;
2114 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
2115 rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
;
2116 continue;
2117 }
2118
2119 if (!virtual_matches(ve, rq, engine)) {
2120 rb = rb_next(rb)linux_root_RB_NEXT((rb));
2121 continue;
2122 }
2123
2124 break;
2125 }
2126
2127 /*
2128 * If the queue is higher priority than the last
2129 * request in the currently active context, submit afresh.
2130 * We will resubmit again afterwards in case we need to split
2131 * the active context to interject the preemption request,
2132 * i.e. we will retrigger preemption following the ack in case
2133 * of trouble.
2134 */
2135 active = READ_ONCE(execlists->active)({ typeof(execlists->active) __tmp = *(volatile typeof(execlists
->active) *)&(execlists->active); membar_datadep_consumer
(); __tmp; })
;
2136
2137 /*
2138 * In theory we can skip over completed contexts that have not
2139 * yet been processed by events (as those events are in flight):
2140 *
2141 * while ((last = *active) && i915_request_completed(last))
2142 * active++;
2143 *
2144 * However, the GPU cannot handle this as it will ultimately
2145 * find itself trying to jump back into a context it has just
2146 * completed and barf.
2147 */
2148
2149 if ((last = *active)) {
2150 if (need_preempt(engine, last, rb)) {
2151 if (i915_request_completed(last)) {
2152 tasklet_hi_schedule(&execlists->tasklet);
2153 return;
2154 }
2155
2156 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2157 "preempting last=%llx:%lld, prio=%d, hint=%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2158 last->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2159 last->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2160 last->sched.attr.priority,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2161 execlists->queue_priority_hint)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2162 record_preemption(execlists);
2163
2164 /*
2165 * Don't let the RING_HEAD advance past the breadcrumb
2166 * as we unwind (and until we resubmit) so that we do
2167 * not accidentally tell it to go backwards.
2168 */
2169 ring_set_paused(engine, 1);
2170
2171 /*
2172 * Note that we have not stopped the GPU at this point,
2173 * so we are unwinding the incomplete requests as they
2174 * remain inflight and so by the time we do complete
2175 * the preemption, some of the unwound requests may
2176 * complete!
2177 */
2178 __unwind_incomplete_requests(engine);
2179
2180 last = NULL((void *)0);
2181 } else if (need_timeslice(engine, last, rb) &&
2182 timeslice_expired(execlists, last)) {
2183 if (i915_request_completed(last)) {
2184 tasklet_hi_schedule(&execlists->tasklet);
2185 return;
2186 }
2187
2188 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2189 "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2190 last->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2191 last->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2192 last->sched.attr.priority,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2193 execlists->queue_priority_hint,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2194 yesno(timeslice_yield(execlists, last)))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2195
2196 ring_set_paused(engine, 1);
2197 defer_active(engine);
2198
2199 /*
2200 * Unlike for preemption, if we rewind and continue
2201 * executing the same context as previously active,
2202 * the order of execution will remain the same and
2203 * the tail will only advance. We do not need to
2204 * force a full context restore, as a lite-restore
2205 * is sufficient to resample the monotonic TAIL.
2206 *
2207 * If we switch to any other context, similarly we
2208 * will not rewind TAIL of current context, and
2209 * normal save/restore will preserve state and allow
2210 * us to later continue executing the same request.
2211 */
2212 last = NULL((void *)0);
2213 } else {
2214 /*
2215 * Otherwise if we already have a request pending
2216 * for execution after the current one, we can
2217 * just wait until the next CS event before
2218 * queuing more. In either case we will force a
2219 * lite-restore preemption event, but if we wait
2220 * we hopefully coalesce several updates into a single
2221 * submission.
2222 */
2223 if (!list_is_last(&last->sched.link,
2224 &engine->active.requests)) {
2225 /*
2226 * Even if ELSP[1] is occupied and not worthy
2227 * of timeslices, our queue might be.
2228 */
2229 start_timeslice(engine, queue_prio(execlists));
2230 return;
2231 }
2232 }
2233 }
2234
2235 while (rb) { /* XXX virtual is always taking precedence */
2236 struct virtual_engine *ve =
2237 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
2238 struct i915_request *rq;
2239
2240 spin_lock(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
2241
2242 rq = ve->request;
2243 if (unlikely(!rq)__builtin_expect(!!(!rq), 0)) { /* lost the race to a sibling */
2244 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2245 rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->virtual)->rb_root), (rb))
;
2246 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
2247 rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
;
2248 continue;
2249 }
2250
2251 GEM_BUG_ON(rq != ve->request)((void)0);
2252 GEM_BUG_ON(rq->engine != &ve->base)((void)0);
2253 GEM_BUG_ON(rq->context != &ve->context)((void)0);
2254
2255 if (rq_prio(rq) >= queue_prio(execlists)) {
2256 if (!virtual_matches(ve, rq, engine)) {
2257 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2258 rb = rb_next(rb)linux_root_RB_NEXT((rb));
2259 continue;
2260 }
2261
2262 if (last && !can_merge_rq(last, rq)) {
2263 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2264 start_timeslice(engine, rq_prio(rq));
2265 return; /* leave this for another sibling */
2266 }
2267
2268 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2269 "virtual rq=%llx:%lld%s, new engine? %s\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2270 rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2271 rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2272 i915_request_completed(rq) ? "!" :do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2273 i915_request_started(rq) ? "*" :do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2274 "",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2275 yesno(engine != ve->siblings[0]))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2276
2277 WRITE_ONCE(ve->request, NULL)({ typeof(ve->request) __tmp = (((void *)0)); *(volatile typeof
(ve->request) *)&(ve->request) = __tmp; __tmp; })
;
2278 WRITE_ONCE(ve->base.execlists.queue_priority_hint,({ typeof(ve->base.execlists.queue_priority_hint) __tmp = (
(-0x7fffffff-1)); *(volatile typeof(ve->base.execlists.queue_priority_hint
) *)&(ve->base.execlists.queue_priority_hint) = __tmp;
__tmp; })
2279 INT_MIN)({ typeof(ve->base.execlists.queue_priority_hint) __tmp = (
(-0x7fffffff-1)); *(volatile typeof(ve->base.execlists.queue_priority_hint
) *)&(ve->base.execlists.queue_priority_hint) = __tmp;
__tmp; })
;
2280 rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->virtual)->rb_root), (rb))
;
2281 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
2282
2283 GEM_BUG_ON(!(rq->execution_mask & engine->mask))((void)0);
2284 WRITE_ONCE(rq->engine, engine)({ typeof(rq->engine) __tmp = (engine); *(volatile typeof(
rq->engine) *)&(rq->engine) = __tmp; __tmp; })
;
2285
2286 if (__i915_request_submit(rq)) {
2287 /*
2288 * Only after we confirm that we will submit
2289 * this request (i.e. it has not already
2290 * completed), do we want to update the context.
2291 *
2292 * This serves two purposes. It avoids
2293 * unnecessary work if we are resubmitting an
2294 * already completed request after timeslicing.
2295 * But more importantly, it prevents us altering
2296 * ve->siblings[] on an idle context, where
2297 * we may be using ve->siblings[] in
2298 * virtual_context_enter / virtual_context_exit.
2299 */
2300 virtual_xfer_context(ve, engine);
2301 GEM_BUG_ON(ve->siblings[0] != engine)((void)0);
2302
2303 submit = true1;
2304 last = rq;
2305 }
2306 i915_request_put(rq);
2307
2308 /*
2309 * Hmm, we have a bunch of virtual engine requests,
2310 * but the first one was already completed (thanks
2311 * preempt-to-busy!). Keep looking at the veng queue
2312 * until we have no more relevant requests (i.e.
2313 * the normal submit queue has higher priority).
2314 */
2315 if (!submit) {
2316 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2317 rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
;
2318 continue;
2319 }
2320 }
2321
2322 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2323 break;
2324 }
2325
2326 while ((rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->queue)->rb_root), -1)
)) {
2327 struct i915_priolist *p = to_priolist(rb);
2328 struct i915_request *rq, *rn;
2329 int i;
2330
2331 priolist_for_each_request_consume(rq, rn, p, i)for (; (p)->used ? (i = __builtin_ctzl((p)->used)), 1 :
0; (p)->used &= ~(1UL << (i))) for (rq = ({ const
__typeof( ((__typeof(*rq) *)0)->sched.link ) *__mptr = ((
&(p)->requests[i])->next); (__typeof(*rq) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rq), sched.link) );}
), rn = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rq), sched.link) );}
); &rq->sched.link != (&(p)->requests[i]); rq =
rn, rn = ({ const __typeof( ((__typeof(*rn) *)0)->sched.link
) *__mptr = (rn->sched.link.next); (__typeof(*rn) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rn), sched.link) );}
))
{
2332 bool_Bool merge = true1;
2333
2334 /*
2335 * Can we combine this request with the current port?
2336 * It has to be the same context/ringbuffer and not
2337 * have any exceptions (e.g. GVT saying never to
2338 * combine contexts).
2339 *
2340 * If we can combine the requests, we can execute both
2341 * by updating the RING_TAIL to point to the end of the
2342 * second request, and so we never need to tell the
2343 * hardware about the first.
2344 */
2345 if (last && !can_merge_rq(last, rq)) {
2346 /*
2347 * If we are on the second port and cannot
2348 * combine this request with the last, then we
2349 * are done.
2350 */
2351 if (port == last_port)
2352 goto done;
2353
2354 /*
2355 * We must not populate both ELSP[] with the
2356 * same LRCA, i.e. we must submit 2 different
2357 * contexts if we submit 2 ELSP.
2358 */
2359 if (last->context == rq->context)
2360 goto done;
2361
2362 if (i915_request_has_sentinel(last))
2363 goto done;
2364
2365 /*
2366 * If GVT overrides us we only ever submit
2367 * port[0], leaving port[1] empty. Note that we
2368 * also have to be careful that we don't queue
2369 * the same context (even though a different
2370 * request) to the second port.
2371 */
2372 if (ctx_single_port_submission(last->context) ||
2373 ctx_single_port_submission(rq->context))
2374 goto done;
2375
2376 merge = false0;
2377 }
2378
2379 if (__i915_request_submit(rq)) {
2380 if (!merge) {
2381 *port = execlists_schedule_in(last, port - execlists->pending);
2382 port++;
2383 last = NULL((void *)0);
2384 }
2385
2386 GEM_BUG_ON(last &&((void)0)
2387 !can_merge_ctx(last->context,((void)0)
2388 rq->context))((void)0);
2389 GEM_BUG_ON(last &&((void)0)
2390 i915_seqno_passed(last->fence.seqno,((void)0)
2391 rq->fence.seqno))((void)0);
2392
2393 submit = true1;
2394 last = rq;
2395 }
2396 }
2397
2398 rb_erase_cached(&p->node, &execlists->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->queue)->rb_root), (&p->node))
;
2399 i915_priolist_free(p);
2400 }
2401
2402done:
2403 /*
2404 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2405 *
2406 * We choose the priority hint such that if we add a request of greater
2407 * priority than this, we kick the submission tasklet to decide on
2408 * the right order of submitting the requests to hardware. We must
2409 * also be prepared to reorder requests as they are in-flight on the
2410 * HW. We derive the priority hint then as the first "hole" in
2411 * the HW submission ports and if there are no available slots,
2412 * the priority of the lowest executing request, i.e. last.
2413 *
2414 * When we do receive a higher priority request ready to run from the
2415 * user, see queue_request(), the priority hint is bumped to that
2416 * request triggering preemption on the next dequeue (or subsequent
2417 * interrupt for secondary ports).
2418 */
2419 execlists->queue_priority_hint = queue_prio(execlists);
2420
2421 if (submit) {
2422 *port = execlists_schedule_in(last, port - execlists->pending);
2423 execlists->switch_priority_hint =
2424 switch_prio(engine, *execlists->pending);
2425
2426 /*
2427 * Skip if we ended up with exactly the same set of requests,
2428 * e.g. trying to timeslice a pair of ordered contexts
2429 */
2430 if (!memcmp(active, execlists->pending,__builtin_memcmp((active), (execlists->pending), ((port - execlists
->pending + 1) * sizeof(*port)))
2431 (port - execlists->pending + 1) * sizeof(*port))__builtin_memcmp((active), (execlists->pending), ((port - execlists
->pending + 1) * sizeof(*port)))
) {
2432 do
2433 execlists_schedule_out(fetch_and_zero(port)({ typeof(*port) __T = *(port); *(port) = (typeof(*port))0; __T
; })
);
2434 while (port-- != execlists->pending);
2435
2436 goto skip_submit;
2437 }
2438 clear_ports(port + 1, last_port - port);
2439
2440 WRITE_ONCE(execlists->yield, -1)({ typeof(execlists->yield) __tmp = (-1); *(volatile typeof
(execlists->yield) *)&(execlists->yield) = __tmp; __tmp
; })
;
2441 set_preempt_timeout(engine, *active);
2442 execlists_submit_ports(engine);
2443 } else {
2444 start_timeslice(engine, execlists->queue_priority_hint);
2445skip_submit:
2446 ring_set_paused(engine, 0);
2447 }
2448}
2449
2450static void
2451cancel_port_requests(struct intel_engine_execlists * const execlists)
2452{
2453 struct i915_request * const *port;
2454
2455 for (port = execlists->pending; *port; port++)
2456 execlists_schedule_out(*port);
2457 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)(sizeof((execlists->pending)) / sizeof((execlists->pending
)[0]))
);
2458
2459 /* Mark the end of active before we overwrite *active */
2460 for (port = xchg(&execlists->active, execlists->pending)__sync_lock_test_and_set(&execlists->active, execlists
->pending)
; *port; port++)
2461 execlists_schedule_out(*port);
2462 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)(sizeof((execlists->inflight)) / sizeof((execlists->inflight
)[0]))
);
2463
2464 smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* complete the seqlock for execlists_active() */
2465 WRITE_ONCE(execlists->active, execlists->inflight)({ typeof(execlists->active) __tmp = (execlists->inflight
); *(volatile typeof(execlists->active) *)&(execlists->
active) = __tmp; __tmp; })
;
2466}
2467
2468static inline void
2469invalidate_csb_entries(const u64 *first, const u64 *last)
2470{
2471 clflush((vaddr_t)first);
2472 clflush((vaddr_t)last);
2473}
2474
2475/*
2476 * Starting with Gen12, the status has a new format:
2477 *
2478 * bit 0: switched to new queue
2479 * bit 1: reserved
2480 * bit 2: semaphore wait mode (poll or signal), only valid when
2481 * switch detail is set to "wait on semaphore"
2482 * bits 3-5: engine class
2483 * bits 6-11: engine instance
2484 * bits 12-14: reserved
2485 * bits 15-25: sw context id of the lrc the GT switched to
2486 * bits 26-31: sw counter of the lrc the GT switched to
2487 * bits 32-35: context switch detail
2488 * - 0: ctx complete
2489 * - 1: wait on sync flip
2490 * - 2: wait on vblank
2491 * - 3: wait on scanline
2492 * - 4: wait on semaphore
2493 * - 5: context preempted (not on SEMAPHORE_WAIT or
2494 * WAIT_FOR_EVENT)
2495 * bit 36: reserved
2496 * bits 37-43: wait detail (for switch detail 1 to 4)
2497 * bits 44-46: reserved
2498 * bits 47-57: sw context id of the lrc the GT switched away from
2499 * bits 58-63: sw counter of the lrc the GT switched away from
2500 */
2501static inline bool_Bool gen12_csb_parse(const u64 *csb)
2502{
2503 bool_Bool ctx_away_valid;
2504 bool_Bool new_queue;
2505 u64 entry;
2506
2507 /* HSD#22011248461 */
2508 entry = READ_ONCE(*csb)({ typeof(*csb) __tmp = *(volatile typeof(*csb) *)&(*csb)
; membar_datadep_consumer(); __tmp; })
;
2509 if (unlikely(entry == -1)__builtin_expect(!!(entry == -1), 0)) {
2510 preempt_disable();
2511 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50)({ extern char _ctassert[(!(!__builtin_constant_p(50))) ? 1 :
-1 ] __attribute__((__unused__)); extern char _ctassert[(!((
50) > 50000)) ? 1 : -1 ] __attribute__((__unused__)); ({ int
cpu, ret, timeout = ((50)) * 1000; u64 base; do { } while (0
); if (!(1)) { ; cpu = (({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_cpuid); } base = local_clock
(); for (;;) { u64 now = local_clock(); if (!(1)) ; __asm volatile
("" : : : "memory"); if (((entry = ({ typeof(*csb) __tmp = *(
volatile typeof(*csb) *)&(*csb); membar_datadep_consumer(
); __tmp; })) != -1)) { ret = 0; break; } if (now - base >=
timeout) { ret = -60; break; } cpu_relax(); if (!(1)) { ; if
(__builtin_expect(!!(cpu != (({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_cpuid)), 0)) { timeout -=
now - base; cpu = (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0"
: "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_cpuid); base = local_clock(); } } } ret; }
); })
)
2512 GEM_WARN_ON("50us CSB timeout")({ __builtin_expect(!!(!!("50us CSB timeout")), 0); });
2513 preempt_enable();
2514 }
2515 WRITE_ONCE(*(u64 *)csb, -1)({ typeof(*(u64 *)csb) __tmp = (-1); *(volatile typeof(*(u64 *
)csb) *)&(*(u64 *)csb) = __tmp; __tmp; })
;
2516
2517 ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry))(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((((u32)(((entry) >> 16) >> 16))) &
((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (
15))))) >> (__builtin_ffsll((((~0UL) >> (64 - (25
) - 1)) & ((~0UL) << (15)))) - 1))) != 0x7FF)
;
2518 new_queue =
2519 lower_32_bits(entry)((u32)(entry)) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(0x1);
2520
2521 /*
2522 * The context switch detail is not guaranteed to be 5 when a preemption
2523 * occurs, so we can't just check for that. The check below works for
2524 * all the cases we care about, including preemptions of WAIT
2525 * instructions and lite-restore. Preempt-to-idle via the CTRL register
2526 * would require some extra handling, but we don't support that.
2527 */
2528 if (!ctx_away_valid || new_queue) {
2529 GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)))((void)0);
2530 return true1;
2531 }
2532
2533 /*
2534 * switch detail = 5 is covered by the case above and we do not expect a
2535 * context switch on an unsuccessful wait instruction since we always
2536 * use polling mode.
2537 */
2538 GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)))((void)0);
2539 return false0;
2540}
2541
2542static inline bool_Bool gen8_csb_parse(const u64 *csb)
2543{
2544 return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE(1 << 0) | GEN8_CTX_STATUS_PREEMPTED(1 << 1));
2545}
2546
2547static void process_csb(struct intel_engine_cs *engine)
2548{
2549 struct intel_engine_execlists * const execlists = &engine->execlists;
2550 const u64 * const buf = execlists->csb_status;
2551 const u8 num_entries = execlists->csb_size;
2552 u8 head, tail;
2553
2554 /*
2555 * As we modify our execlists state tracking we require exclusive
2556 * access. Either we are inside the tasklet, or the tasklet is disabled
2557 * and we assume that is only inside the reset paths and so serialised.
2558 */
2559 GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&((void)0)
2560 !reset_in_progress(execlists))((void)0);
2561 GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine))((void)0);
2562
2563 /*
2564 * Note that csb_write, csb_status may be either in HWSP or mmio.
2565 * When reading from the csb_write mmio register, we have to be
2566 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2567 * the low 4bits. As it happens we know the next 4bits are always
2568 * zero and so we can simply masked off the low u8 of the register
2569 * and treat it identically to reading from the HWSP (without having
2570 * to use explicit shifting and masking, and probably bifurcating
2571 * the code to handle the legacy mmio read).
2572 */
2573 head = execlists->csb_head;
2574 tail = READ_ONCE(*execlists->csb_write)({ typeof(*execlists->csb_write) __tmp = *(volatile typeof
(*execlists->csb_write) *)&(*execlists->csb_write);
membar_datadep_consumer(); __tmp; })
;
2575 if (unlikely(head == tail)__builtin_expect(!!(head == tail), 0))
2576 return;
2577
2578 /*
2579 * We will consume all events from HW, or at least pretend to.
2580 *
2581 * The sequence of events from the HW is deterministic, and derived
2582 * from our writes to the ELSP, with a smidgen of variability for
2583 * the arrival of the asynchronous requests wrt to the inflight
2584 * execution. If the HW sends an event that does not correspond with
2585 * the one we are expecting, we have to abandon all hope as we lose
2586 * all tracking of what the engine is actually executing. We will
2587 * only detect we are out of sequence with the HW when we get an
2588 * 'impossible' event because we have already drained our own
2589 * preemption/promotion queue. If this occurs, we know that we likely
2590 * lost track of execution earlier and must unwind and restart, the
2591 * simplest way is by stop processing the event queue and force the
2592 * engine to reset.
2593 */
2594 execlists->csb_head = tail;
2595 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2596
2597 /*
2598 * Hopefully paired with a wmb() in HW!
2599 *
2600 * We must complete the read of the write pointer before any reads
2601 * from the CSB, so that we do not see stale values. Without an rmb
2602 * (lfence) the HW may speculatively perform the CSB[] reads *before*
2603 * we perform the READ_ONCE(*csb_write).
2604 */
2605 rmb()do { __asm volatile("lfence" ::: "memory"); } while (0);
2606 do {
2607 bool_Bool promote;
2608
2609 if (++head == num_entries)
2610 head = 0;
2611
2612 /*
2613 * We are flying near dragons again.
2614 *
2615 * We hold a reference to the request in execlist_port[]
2616 * but no more than that. We are operating in softirq
2617 * context and so cannot hold any mutex or sleep. That
2618 * prevents us stopping the requests we are processing
2619 * in port[] from being retired simultaneously (the
2620 * breadcrumb will be complete before we see the
2621 * context-switch). As we only hold the reference to the
2622 * request, any pointer chasing underneath the request
2623 * is subject to a potential use-after-free. Thus we
2624 * store all of the bookkeeping within port[] as
2625 * required, and avoid using unguarded pointers beneath
2626 * request itself. The same applies to the atomic
2627 * status notifier.
2628 */
2629
2630 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2631 head,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2632 upper_32_bits(buf[head]),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2633 lower_32_bits(buf[head]))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2634
2635 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
2636 promote = gen12_csb_parse(buf + head);
2637 else
2638 promote = gen8_csb_parse(buf + head);
2639 if (promote) {
2640 struct i915_request * const *old = execlists->active;
2641
2642 if (GEM_WARN_ON(!*execlists->pending)({ __builtin_expect(!!(!!(!*execlists->pending)), 0); })) {
2643 execlists->error_interrupt |= ERROR_CSB(1UL << (31));
2644 break;
2645 }
2646
2647 ring_set_paused(engine, 0);
2648
2649 /* Point active to the new ELSP; prevent overwriting */
2650 WRITE_ONCE(execlists->active, execlists->pending)({ typeof(execlists->active) __tmp = (execlists->pending
); *(volatile typeof(execlists->active) *)&(execlists->
active) = __tmp; __tmp; })
;
2651 smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* notify execlists_active() */
2652
2653 /* cancel old inflight, prepare for switch */
2654 trace_ports(execlists, "preempted", old);
2655 while (*old)
2656 execlists_schedule_out(*old++);
2657
2658 /* switch pending to inflight */
2659 GEM_BUG_ON(!assert_pending_valid(execlists, "promote"))((void)0);
2660 copy_ports(execlists->inflight,
2661 execlists->pending,
2662 execlists_num_ports(execlists));
2663 smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* complete the seqlock */
2664 WRITE_ONCE(execlists->active, execlists->inflight)({ typeof(execlists->active) __tmp = (execlists->inflight
); *(volatile typeof(execlists->active) *)&(execlists->
active) = __tmp; __tmp; })
;
2665
2666 /* XXX Magic delay for tgl */
2667 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x3a0) })))
;
2668
2669 WRITE_ONCE(execlists->pending[0], NULL)({ typeof(execlists->pending[0]) __tmp = (((void *)0)); *(
volatile typeof(execlists->pending[0]) *)&(execlists->
pending[0]) = __tmp; __tmp; })
;
2670 } else {
2671 if (GEM_WARN_ON(!*execlists->active)({ __builtin_expect(!!(!!(!*execlists->active)), 0); })) {
2672 execlists->error_interrupt |= ERROR_CSB(1UL << (31));
2673 break;
2674 }
2675
2676 /* port0 completed, advanced to port1 */
2677 trace_ports(execlists, "completed", execlists->active);
2678
2679 /*
2680 * We rely on the hardware being strongly
2681 * ordered, that the breadcrumb write is
2682 * coherent (visible from the CPU) before the
2683 * user interrupt is processed. One might assume
2684 * that the breadcrumb write being before the
2685 * user interrupt and the CS event for the context
2686 * switch would therefore be before the CS event
2687 * itself...
2688 */
2689 if (GEM_SHOW_DEBUG()(0) &&
2690 !i915_request_completed(*execlists->active)) {
2691 struct i915_request *rq = *execlists->active;
2692 const u32 *regs __maybe_unused__attribute__((__unused__)) =
2693 rq->context->lrc_reg_state;
2694
2695 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2696 "context completed before request!\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2697 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2698 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2699 ENGINE_READ(engine, RING_START),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2700 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2701 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2702 ENGINE_READ(engine, RING_CTL),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2703 ENGINE_READ(engine, RING_MI_MODE))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2704 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2705 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2706 i915_ggtt_offset(rq->ring->vma),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2707 rq->head, rq->tail,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2708 rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2709 lower_32_bits(rq->fence.seqno),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2710 hwsp_seqno(rq))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2711 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2712 "ctx:{start:%08x, head:%04x, tail:%04x}, ",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2713 regs[CTX_RING_START],do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2714 regs[CTX_RING_HEAD],do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2715 regs[CTX_RING_TAIL])do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2716 }
2717
2718 execlists_schedule_out(*execlists->active++);
2719
2720 GEM_BUG_ON(execlists->active - execlists->inflight >((void)0)
2721 execlists_num_ports(execlists))((void)0);
2722 }
2723 } while (head != tail);
2724
2725 set_timeslice(engine);
2726
2727 /*
2728 * Gen11 has proven to fail wrt global observation point between
2729 * entry and tail update, failing on the ordering and thus
2730 * we see an old entry in the context status buffer.
2731 *
2732 * Forcibly evict out entries for the next gpu csb update,
2733 * to increase the odds that we get a fresh entries with non
2734 * working hardware. The cost for doing so comes out mostly with
2735 * the wash as hardware, working or not, will need to do the
2736 * invalidation before.
2737 */
2738 invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2739}
2740
2741static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2742{
2743 lockdep_assert_held(&engine->active.lock)do { (void)(&engine->active.lock); } while(0);
2744 if (!READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = *(volatile
typeof(engine->execlists.pending[0]) *)&(engine->execlists
.pending[0]); membar_datadep_consumer(); __tmp; })
) {
2745 rcu_read_lock(); /* protect peeking at execlists->active */
2746 execlists_dequeue(engine);
2747 rcu_read_unlock();
2748 }
2749}
2750
2751static void __execlists_hold(struct i915_request *rq)
2752{
2753 DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
2754
2755 do {
2756 struct i915_dependency *p;
2757
2758 if (i915_request_is_active(rq))
2759 __i915_request_unsubmit(rq);
2760
2761 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2762 list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2763 i915_request_set_hold(rq);
2764 RQ_TRACE(rq, "on hold\n")do { const struct i915_request *rq__ = (rq); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (rq__->
engine); do { } while (0); } while (0); } while (0)
;
2765
2766 for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->wait_link
) *__mptr = ((&(rq)->sched.waiters_list)->next); (
__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), wait_link) );}); &p->wait_link != (&(rq)->
sched.waiters_list); p = ({ const __typeof( ((__typeof(*p) *)
0)->wait_link ) *__mptr = (p->wait_link.next); (__typeof
(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof(*p), wait_link
) );}))
{
2767 struct i915_request *w =
2768 container_of(p->waiter, typeof(*w), sched)({ const __typeof( ((typeof(*w) *)0)->sched ) *__mptr = (p
->waiter); (typeof(*w) *)( (char *)__mptr - __builtin_offsetof
(typeof(*w), sched) );})
;
2769
2770 /* Leave semaphores spinning on the other engines */
2771 if (w->engine != rq->engine)
2772 continue;
2773
2774 if (!i915_request_is_ready(w))
2775 continue;
2776
2777 if (i915_request_completed(w))
2778 continue;
2779
2780 if (i915_request_on_hold(w))
2781 continue;
2782
2783 list_move_tail(&w->sched.link, &list);
2784 }
2785
2786 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link)(list_empty(&list) ? ((void *)0) : ({ const __typeof( ((typeof
(*rq) *)0)->sched.link ) *__mptr = ((&list)->next);
(typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(typeof(
*rq), sched.link) );}))
;
2787 } while (rq);
2788}
2789
2790static bool_Bool execlists_hold(struct intel_engine_cs *engine,
2791 struct i915_request *rq)
2792{
2793 if (i915_request_on_hold(rq))
2794 return false0;
2795
2796 spin_lock_irq(&engine->active.lock)mtx_enter(&engine->active.lock);
2797
2798 if (i915_request_completed(rq)) { /* too late! */
2799 rq = NULL((void *)0);
2800 goto unlock;
2801 }
2802
2803 if (rq->engine != engine) { /* preempted virtual engine */
2804 struct virtual_engine *ve = to_virtual_engine(rq->engine);
2805
2806 /*
2807 * intel_context_inflight() is only protected by virtue
2808 * of process_csb() being called only by the tasklet (or
2809 * directly from inside reset while the tasklet is suspended).
2810 * Assert that neither of those are allowed to run while we
2811 * poke at the request queues.
2812 */
2813 GEM_BUG_ON(!reset_in_progress(&engine->execlists))((void)0);
2814
2815 /*
2816 * An unsubmitted request along a virtual engine will
2817 * remain on the active (this) engine until we are able
2818 * to process the context switch away (and so mark the
2819 * context as no longer in flight). That cannot have happened
2820 * yet, otherwise we would not be hanging!
2821 */
2822 spin_lock(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
2823 GEM_BUG_ON(intel_context_inflight(rq->context) != engine)((void)0);
2824 GEM_BUG_ON(ve->request != rq)((void)0);
2825 ve->request = NULL((void *)0);
2826 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2827 i915_request_put(rq);
2828
2829 rq->engine = engine;
2830 }
2831
2832 /*
2833 * Transfer this request onto the hold queue to prevent it
2834 * being resumbitted to HW (and potentially completed) before we have
2835 * released it. Since we may have already submitted following
2836 * requests, we need to remove those as well.
2837 */
2838 GEM_BUG_ON(i915_request_on_hold(rq))((void)0);
2839 GEM_BUG_ON(rq->engine != engine)((void)0);
2840 __execlists_hold(rq);
2841 GEM_BUG_ON(list_empty(&engine->active.hold))((void)0);
2842
2843unlock:
2844 spin_unlock_irq(&engine->active.lock)mtx_leave(&engine->active.lock);
2845 return rq;
2846}
2847
2848static bool_Bool hold_request(const struct i915_request *rq)
2849{
2850 struct i915_dependency *p;
2851 bool_Bool result = false0;
2852
2853 /*
2854 * If one of our ancestors is on hold, we must also be on hold,
2855 * otherwise we will bypass it and execute before it.
2856 */
2857 rcu_read_lock();
2858 for_each_signaler(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->signal_link
) *__mptr = ((&(rq)->sched.signalers_list)->next);
(__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), signal_link) );}); &p->signal_link != (&(rq)
->sched.signalers_list); p = ({ const __typeof( ((__typeof
(*p) *)0)->signal_link ) *__mptr = (p->signal_link.next
); (__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), signal_link) );}))
{
2859 const struct i915_request *s =
2860 container_of(p->signaler, typeof(*s), sched)({ const __typeof( ((typeof(*s) *)0)->sched ) *__mptr = (p
->signaler); (typeof(*s) *)( (char *)__mptr - __builtin_offsetof
(typeof(*s), sched) );})
;
2861
2862 if (s->engine != rq->engine)
2863 continue;
2864
2865 result = i915_request_on_hold(s);
2866 if (result)
2867 break;
2868 }
2869 rcu_read_unlock();
2870
2871 return result;
2872}
2873
2874static void __execlists_unhold(struct i915_request *rq)
2875{
2876 DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
2877
2878 do {
2879 struct i915_dependency *p;
2880
2881 RQ_TRACE(rq, "hold release\n")do { const struct i915_request *rq__ = (rq); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (rq__->
engine); do { } while (0); } while (0); } while (0)
;
2882
2883 GEM_BUG_ON(!i915_request_on_hold(rq))((void)0);
2884 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit))((void)0);
2885
2886 i915_request_clear_hold(rq);
2887 list_move_tail(&rq->sched.link,
2888 i915_sched_lookup_priolist(rq->engine,
2889 rq_prio(rq)));
2890 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2891
2892 /* Also release any children on this engine that are ready */
2893 for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->wait_link
) *__mptr = ((&(rq)->sched.waiters_list)->next); (
__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), wait_link) );}); &p->wait_link != (&(rq)->
sched.waiters_list); p = ({ const __typeof( ((__typeof(*p) *)
0)->wait_link ) *__mptr = (p->wait_link.next); (__typeof
(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof(*p), wait_link
) );}))
{
2894 struct i915_request *w =
2895 container_of(p->waiter, typeof(*w), sched)({ const __typeof( ((typeof(*w) *)0)->sched ) *__mptr = (p
->waiter); (typeof(*w) *)( (char *)__mptr - __builtin_offsetof
(typeof(*w), sched) );})
;
2896
2897 /* Propagate any change in error status */
2898 if (rq->fence.error)
2899 i915_request_set_error_once(w, rq->fence.error);
2900
2901 if (w->engine != rq->engine)
2902 continue;
2903
2904 if (!i915_request_on_hold(w))
2905 continue;
2906
2907 /* Check that no other parents are also on hold */
2908 if (hold_request(w))
2909 continue;
2910
2911 list_move_tail(&w->sched.link, &list);
2912 }
2913
2914 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link)(list_empty(&list) ? ((void *)0) : ({ const __typeof( ((typeof
(*rq) *)0)->sched.link ) *__mptr = ((&list)->next);
(typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(typeof(
*rq), sched.link) );}))
;
2915 } while (rq);
2916}
2917
2918static void execlists_unhold(struct intel_engine_cs *engine,
2919 struct i915_request *rq)
2920{
2921 spin_lock_irq(&engine->active.lock)mtx_enter(&engine->active.lock);
2922
2923 /*
2924 * Move this request back to the priority queue, and all of its
2925 * children and grandchildren that were suspended along with it.
2926 */
2927 __execlists_unhold(rq);
2928
2929 if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2930 engine->execlists.queue_priority_hint = rq_prio(rq);
2931 tasklet_hi_schedule(&engine->execlists.tasklet);
2932 }
2933
2934 spin_unlock_irq(&engine->active.lock)mtx_leave(&engine->active.lock);
2935}
2936
2937struct execlists_capture {
2938 struct work_struct work;
2939 struct i915_request *rq;
2940 struct i915_gpu_coredump *error;
2941};
2942
2943static void execlists_capture_work(struct work_struct *work)
2944{
2945 struct execlists_capture *cap = container_of(work, typeof(*cap), work)({ const __typeof( ((typeof(*cap) *)0)->work ) *__mptr = (
work); (typeof(*cap) *)( (char *)__mptr - __builtin_offsetof(
typeof(*cap), work) );})
;
2946 const gfp_t gfp = GFP_KERNEL(0x0001 | 0x0004) | __GFP_RETRY_MAYFAIL0 | __GFP_NOWARN0;
2947 struct intel_engine_cs *engine = cap->rq->engine;
2948 struct intel_gt_coredump *gt = cap->error->gt;
2949 struct intel_engine_capture_vma *vma;
2950
2951 /* Compress all the objects attached to the request, slow! */
2952 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2953 if (vma) {
2954 struct i915_vma_compress *compress =
2955 i915_vma_capture_prepare(gt);
2956
2957 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2958 i915_vma_capture_finish(gt, compress);
2959 }
2960
2961 gt->simulated = gt->engine->simulated;
2962 cap->error->simulated = gt->simulated;
2963
2964 /* Publish the error state, and announce it to the world */
2965 i915_error_state_store(cap->error);
2966 i915_gpu_coredump_put(cap->error);
2967
2968 /* Return this request and all that depend upon it for signaling */
2969 execlists_unhold(engine, cap->rq);
2970 i915_request_put(cap->rq);
2971
2972 kfree(cap);
2973}
2974
2975static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2976{
2977 const gfp_t gfp = GFP_ATOMIC0x0002 | __GFP_NOWARN0;
2978 struct execlists_capture *cap;
2979
2980 cap = kmalloc(sizeof(*cap), gfp);
2981 if (!cap)
2982 return NULL((void *)0);
2983
2984 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2985 if (!cap->error)
2986 goto err_cap;
2987
2988 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2989 if (!cap->error->gt)
2990 goto err_gpu;
2991
2992 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2993 if (!cap->error->gt->engine)
2994 goto err_gt;
2995
2996 return cap;
2997
2998err_gt:
2999 kfree(cap->error->gt);
3000err_gpu:
3001 kfree(cap->error);
3002err_cap:
3003 kfree(cap);
3004 return NULL((void *)0);
3005}
3006
3007static struct i915_request *
3008active_context(struct intel_engine_cs *engine, u32 ccid)
3009{
3010 const struct intel_engine_execlists * const el = &engine->execlists;
3011 struct i915_request * const *port, *rq;
3012
3013 /*
3014 * Use the most recent result from process_csb(), but just in case
3015 * we trigger an error (via interrupt) before the first CS event has
3016 * been written, peek at the next submission.
3017 */
3018
3019 for (port = el->active; (rq = *port); port++) {
3020 if (rq->context->lrc.ccid == ccid) {
3021 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3022 "ccid found at active:%zd\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3023 port - el->active)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3024 return rq;
3025 }
3026 }
3027
3028 for (port = el->pending; (rq = *port); port++) {
3029 if (rq->context->lrc.ccid == ccid) {
3030 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3031 "ccid found at pending:%zd\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3032 port - el->pending)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3033 return rq;
3034 }
3035 }
3036
3037 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3038 return NULL((void *)0);
3039}
3040
3041static u32 active_ccid(struct intel_engine_cs *engine)
3042{
3043 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x234 + 4) }))
;
3044}
3045
3046static void execlists_capture(struct intel_engine_cs *engine)
3047{
3048 struct execlists_capture *cap;
3049
3050 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1)
3051 return;
3052
3053 /*
3054 * We need to _quickly_ capture the engine state before we reset.
3055 * We are inside an atomic section (softirq) here and we are delaying
3056 * the forced preemption event.
3057 */
3058 cap = capture_regs(engine);
3059 if (!cap)
3060 return;
3061
3062 spin_lock_irq(&engine->active.lock)mtx_enter(&engine->active.lock);
3063 cap->rq = active_context(engine, active_ccid(engine));
3064 if (cap->rq) {
3065 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3066 cap->rq = i915_request_get_rcu(cap->rq);
3067 }
3068 spin_unlock_irq(&engine->active.lock)mtx_leave(&engine->active.lock);
3069 if (!cap->rq)
3070 goto err_free;
3071
3072 /*
3073 * Remove the request from the execlists queue, and take ownership
3074 * of the request. We pass it to our worker who will _slowly_ compress
3075 * all the pages the _user_ requested for debugging their batch, after
3076 * which we return it to the queue for signaling.
3077 *
3078 * By removing them from the execlists queue, we also remove the
3079 * requests from being processed by __unwind_incomplete_requests()
3080 * during the intel_engine_reset(), and so they will *not* be replayed
3081 * afterwards.
3082 *
3083 * Note that because we have not yet reset the engine at this point,
3084 * it is possible for the request that we have identified as being
3085 * guilty, did in fact complete and we will then hit an arbitration
3086 * point allowing the outstanding preemption to succeed. The likelihood
3087 * of that is very low (as capturing of the engine registers should be
3088 * fast enough to run inside an irq-off atomic section!), so we will
3089 * simply hold that request accountable for being non-preemptible
3090 * long enough to force the reset.
3091 */
3092 if (!execlists_hold(engine, cap->rq))
3093 goto err_rq;
3094
3095 INIT_WORK(&cap->work, execlists_capture_work);
3096 schedule_work(&cap->work);
3097 return;
3098
3099err_rq:
3100 i915_request_put(cap->rq);
3101err_free:
3102 i915_gpu_coredump_put(cap->error);
3103 kfree(cap);
3104}
3105
3106static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
3107{
3108 const unsigned int bit = I915_RESET_ENGINE2 + engine->id;
3109 unsigned long *lock = &engine->gt->reset.flags;
3110
3111 if (!intel_has_reset_engine(engine->gt))
3112 return;
3113
3114 if (test_and_set_bit(bit, lock))
3115 return;
3116
3117 ENGINE_TRACE(engine, "reset for %s\n", msg)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3118
3119 /* Mark this tasklet as disabled to avoid waiting for it to complete */
3120 tasklet_disable_nosync(&engine->execlists.tasklet);
3121
3122 ring_set_paused(engine, 1); /* Freeze the current request in place */
3123 execlists_capture(engine);
3124 intel_engine_reset(engine, msg);
3125
3126 tasklet_enable(&engine->execlists.tasklet);
3127 clear_and_wake_up_bit(bit, lock);
3128}
3129
3130static bool_Bool preempt_timeout(const struct intel_engine_cs *const engine)
3131{
3132 const struct timeout *t = &engine->execlists.preempt;
3133
3134 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT640)
3135 return false0;
3136
3137 if (!timer_expired(t))
3138 return false0;
3139
3140 return READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = *(volatile
typeof(engine->execlists.pending[0]) *)&(engine->execlists
.pending[0]); membar_datadep_consumer(); __tmp; })
;
3141}
3142
3143/*
3144 * Check the unread Context Status Buffers and manage the submission of new
3145 * contexts to the ELSP accordingly.
3146 */
3147static void execlists_submission_tasklet(unsigned long data)
3148{
3149 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3150 bool_Bool timeout = preempt_timeout(engine);
3151
3152 process_csb(engine);
3153
3154 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))__builtin_expect(!!(({ typeof(engine->execlists.error_interrupt
) __tmp = *(volatile typeof(engine->execlists.error_interrupt
) *)&(engine->execlists.error_interrupt); membar_datadep_consumer
(); __tmp; })), 0)
) {
3155 const char *msg;
3156
3157 /* Generate the error message in priority wrt to the user! */
3158 if (engine->execlists.error_interrupt & GENMASK(15, 0)(((~0UL) >> (64 - (15) - 1)) & ((~0UL) << (0)
))
)
3159 msg = "CS error"; /* thrown by a user payload */
3160 else if (engine->execlists.error_interrupt & ERROR_CSB(1UL << (31)))
3161 msg = "invalid CSB event";
3162 else
3163 msg = "internal error";
3164
3165 engine->execlists.error_interrupt = 0;
3166 execlists_reset(engine, msg);
3167 }
3168
3169 if (!READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = *(volatile
typeof(engine->execlists.pending[0]) *)&(engine->execlists
.pending[0]); membar_datadep_consumer(); __tmp; })
|| timeout) {
3170 unsigned long flags;
3171
3172 spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while
(0)
;
3173 __execlists_submission_tasklet(engine);
3174 spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); }
while (0)
;
3175
3176 /* Recheck after serialising with direct-submission */
3177 if (unlikely(timeout && preempt_timeout(engine))__builtin_expect(!!(timeout && preempt_timeout(engine
)), 0)
) {
3178 cancel_timer(&engine->execlists.preempt);
3179 execlists_reset(engine, "preemption time out");
3180 }
3181 }
3182}
3183
3184static void __execlists_kick(struct intel_engine_execlists *execlists)
3185{
3186 /* Kick the tasklet for some interrupt coalescing and reset handling */
3187 tasklet_hi_schedule(&execlists->tasklet);
3188}
3189
3190#define execlists_kick(t, member)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->member ) *__mptr = (t); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, member) );}))
\
3191 __execlists_kick(container_of(t, struct intel_engine_execlists, member)({ const __typeof( ((struct intel_engine_execlists *)0)->member
) *__mptr = (t); (struct intel_engine_execlists *)( (char *)
__mptr - __builtin_offsetof(struct intel_engine_execlists, member
) );})
)
3192
3193#ifdef __linux__
3194
3195static void execlists_timeslice(struct timer_list *timer)
3196{
3197 execlists_kick(timer, timer)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->timer ) *__mptr = (timer); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, timer) );}))
;
3198}
3199
3200static void execlists_preempt(struct timer_list *timer)
3201{
3202 execlists_kick(timer, preempt)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->preempt ) *__mptr = (timer); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, preempt) );}))
;
3203}
3204
3205#else
3206
3207static void execlists_timeslice(void *arg)
3208{
3209 struct timeout *timer = arg;
3210 execlists_kick(timer, timer)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->timer ) *__mptr = (timer); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, timer) );}))
;
3211}
3212
3213static void execlists_preempt(void *arg)
3214{
3215 struct timeout *timer = arg;
3216 execlists_kick(timer, preempt)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->preempt ) *__mptr = (timer); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, preempt) );}))
;
3217}
3218
3219#endif
3220
3221static void queue_request(struct intel_engine_cs *engine,
3222 struct i915_request *rq)
3223{
3224 GEM_BUG_ON(!list_empty(&rq->sched.link))((void)0);
3225 list_add_tail(&rq->sched.link,
3226 i915_sched_lookup_priolist(engine, rq_prio(rq)));
3227 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3228}
3229
3230static void __submit_queue_imm(struct intel_engine_cs *engine)
3231{
3232 struct intel_engine_execlists * const execlists = &engine->execlists;
3233
3234 if (reset_in_progress(execlists))
3235 return; /* defer until we restart the engine following reset */
3236
3237 __execlists_submission_tasklet(engine);
3238}
3239
3240static void submit_queue(struct intel_engine_cs *engine,
3241 const struct i915_request *rq)
3242{
3243 struct intel_engine_execlists *execlists = &engine->execlists;
3244
3245 if (rq_prio(rq) <= execlists->queue_priority_hint)
3246 return;
3247
3248 execlists->queue_priority_hint = rq_prio(rq);
3249 __submit_queue_imm(engine);
3250}
3251
3252static bool_Bool ancestor_on_hold(const struct intel_engine_cs *engine,
3253 const struct i915_request *rq)
3254{
3255 GEM_BUG_ON(i915_request_on_hold(rq))((void)0);
3256 return !list_empty(&engine->active.hold) && hold_request(rq);
3257}
3258
3259static void flush_csb(struct intel_engine_cs *engine)
3260{
3261 struct intel_engine_execlists *el = &engine->execlists;
3262
3263 if (READ_ONCE(el->pending[0])({ typeof(el->pending[0]) __tmp = *(volatile typeof(el->
pending[0]) *)&(el->pending[0]); membar_datadep_consumer
(); __tmp; })
&& tasklet_trylock(&el->tasklet)) {
3264 if (!reset_in_progress(el))
3265 process_csb(engine);
3266 tasklet_unlock(&el->tasklet);
3267 }
3268}
3269
3270static void execlists_submit_request(struct i915_request *request)
3271{
3272 struct intel_engine_cs *engine = request->engine;
3273 unsigned long flags;
3274
3275 /* Hopefully we clear execlists->pending[] to let us through */
3276 flush_csb(engine);
3277
3278 /* Will be called from irq-context when using foreign fences. */
3279 spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while
(0)
;
3280
3281 if (unlikely(ancestor_on_hold(engine, request))__builtin_expect(!!(ancestor_on_hold(engine, request)), 0)) {
3282 RQ_TRACE(request, "ancestor on hold\n")do { const struct i915_request *rq__ = (request); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (rq__->
engine); do { } while (0); } while (0); } while (0)
;
3283 list_add_tail(&request->sched.link, &engine->active.hold);
3284 i915_request_set_hold(request);
3285 } else {
3286 queue_request(engine, request);
3287
3288 GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))((void)0);
3289 GEM_BUG_ON(list_empty(&request->sched.link))((void)0);
3290
3291 submit_queue(engine, request);
3292 }
3293
3294 spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); }
while (0)
;
3295}
3296
3297static void __execlists_context_fini(struct intel_context *ce)
3298{
3299 intel_ring_put(ce->ring);
3300 i915_vma_put(ce->state);
3301}
3302
3303static void execlists_context_destroy(struct kref *kref)
3304{
3305 struct intel_context *ce = container_of(kref, typeof(*ce), ref)({ const __typeof( ((typeof(*ce) *)0)->ref ) *__mptr = (kref
); (typeof(*ce) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ce), ref) );})
;
3306
3307 GEM_BUG_ON(!i915_active_is_idle(&ce->active))((void)0);
3308 GEM_BUG_ON(intel_context_is_pinned(ce))((void)0);
3309
3310 if (ce->state)
3311 __execlists_context_fini(ce);
3312
3313 intel_context_fini(ce);
3314 intel_context_free(ce);
3315}
3316
3317static void
3318set_redzone(void *vaddr, const struct intel_engine_cs *engine)
3319{
3320 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
3321 return;
3322
3323 vaddr += engine->context_size;
3324
3325 memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)__builtin_memset((vaddr), (0xdb), ((1ULL << (12))));
3326}
3327
3328static void
3329check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
3330{
3331 if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
3332 return;
3333
3334 vaddr += engine->context_size;
3335
3336 if (memchr_inv(vaddr, CONTEXT_REDZONE0xdb, I915_GTT_PAGE_SIZE(1ULL << (12))))
3337 drm_err_once(&engine->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "%s context redzone overwritten!\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name)
3338 "%s context redzone overwritten!\n",printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "%s context redzone overwritten!\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name)
3339 engine->name)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "%s context redzone overwritten!\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name)
;
3340}
3341
3342static void execlists_context_unpin(struct intel_context *ce)
3343{
3344 check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)),
3345 ce->engine);
3346}
3347
3348static void execlists_context_post_unpin(struct intel_context *ce)
3349{
3350 i915_gem_object_unpin_map(ce->state->obj);
3351}
3352
3353static u32 *
3354gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3355{
3356 *cs++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) | (2)) |
3357 MI_SRM_LRM_GLOBAL_GTT(1<<22) |
3358 MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3359 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) * 8) }));
3360 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)) +
3361 CTX_TIMESTAMP(0x22 + 1) * sizeof(u32);
3362 *cs++ = 0;
3363
3364 *cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) | (1)) |
3365 MI_LRR_SOURCE_CS_MMIO((u32)((1UL << (18)) + 0)) |
3366 MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3367 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) * 8) }));
3368 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)((const i915_reg_t){ .reg = ((0) + 0x3a8) }));
3369
3370 *cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) | (1)) |
3371 MI_LRR_SOURCE_CS_MMIO((u32)((1UL << (18)) + 0)) |
3372 MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3373 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) * 8) }));
3374 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)((const i915_reg_t){ .reg = ((0) + 0x3a8) }));
3375
3376 return cs;
3377}
3378
3379static u32 *
3380gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3381{
3382 GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1)((void)0);
3383
3384 *cs++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) | (2)) |
3385 MI_SRM_LRM_GLOBAL_GTT(1<<22) |
3386 MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3387 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) * 8) }));
3388 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)) +
3389 (lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3390 *cs++ = 0;
3391
3392 return cs;
3393}
3394
3395static u32 *
3396gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
3397{
3398 GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1)((void)0);
3399
3400 *cs++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) | (2)) |
3401 MI_SRM_LRM_GLOBAL_GTT(1<<22) |
3402 MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3403 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) * 8) }));
3404 *cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)) +
3405 (lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3406 *cs++ = 0;
3407
3408 *cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) | (1)) |
3409 MI_LRR_SOURCE_CS_MMIO((u32)((1UL << (18)) + 0)) |
3410 MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3411 *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) * 8) }));
3412 *cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)((const i915_reg_t){ .reg = ((0) + 0x84) }));
3413
3414 return cs;
3415}
3416
3417static u32 *
3418gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
3419{
3420 cs = gen12_emit_timestamp_wa(ce, cs);
3421 cs = gen12_emit_cmd_buf_wa(ce, cs);
3422 cs = gen12_emit_restore_scratch(ce, cs);
3423
3424 return cs;
3425}
3426
3427static u32 *
3428gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3429{
3430 cs = gen12_emit_timestamp_wa(ce, cs);
3431 cs = gen12_emit_restore_scratch(ce, cs);
3432
3433 return cs;
3434}
3435
3436static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3437{
3438 return PAGE_SIZE(1 << 12) * ce->wa_bb_page;
3439}
3440
3441static u32 *context_indirect_bb(const struct intel_context *ce)
3442{
3443 void *ptr;
3444
3445 GEM_BUG_ON(!ce->wa_bb_page)((void)0);
3446
3447 ptr = ce->lrc_reg_state;
3448 ptr -= LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)); /* back to start of context image */
3449 ptr += context_wa_bb_offset(ce);
3450
3451 return ptr;
3452}
3453
3454static void
3455setup_indirect_ctx_bb(const struct intel_context *ce,
3456 const struct intel_engine_cs *engine,
3457 u32 *(*emit)(const struct intel_context *, u32 *))
3458{
3459 u32 * const start = context_indirect_bb(ce);
3460 u32 *cs;
3461
3462 cs = emit(ce, start);
3463 GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs))((void)0);
3464 while ((unsigned long)cs % CACHELINE_BYTES64)
3465 *cs++ = MI_NOOP(((0) << 23) | (0));
3466
3467 lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3468 i915_ggtt_offset(ce->state) +
3469 context_wa_bb_offset(ce),
3470 (cs - start) * sizeof(*cs));
3471}
3472
3473static void
3474__execlists_update_reg_state(const struct intel_context *ce,
3475 const struct intel_engine_cs *engine,
3476 u32 head)
3477{
3478 struct intel_ring *ring = ce->ring;
3479 u32 *regs = ce->lrc_reg_state;
3480
3481 GEM_BUG_ON(!intel_ring_offset_valid(ring, head))((void)0);
3482 GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail))((void)0);
3483
3484 regs[CTX_RING_START(0x08 + 1)] = i915_ggtt_offset(ring->vma);
3485 regs[CTX_RING_HEAD(0x04 + 1)] = head;
3486 regs[CTX_RING_TAIL(0x06 + 1)] = ring->tail;
3487 regs[CTX_RING_CTL(0x0a + 1)] = RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) | RING_VALID0x00000001;
3488
3489 /* RPCS */
3490 if (engine->class == RENDER_CLASS0) {
3491 regs[CTX_R_PWR_CLK_STATE(0x42 + 1)] =
3492 intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3493
3494 i915_oa_init_reg_state(ce, engine);
3495 }
3496
3497 if (ce->wa_bb_page) {
3498 u32 *(*fn)(const struct intel_context *ce, u32 *cs);
3499
3500 fn = gen12_emit_indirect_ctx_xcs;
3501 if (ce->engine->class == RENDER_CLASS0)
3502 fn = gen12_emit_indirect_ctx_rcs;
3503
3504 /* Mutually exclusive wrt to global indirect bb */
3505 GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size)((void)0);
3506 setup_indirect_ctx_bb(ce, engine, fn);
3507 }
3508}
3509
3510static int
3511execlists_context_pre_pin(struct intel_context *ce,
3512 struct i915_gem_ww_ctx *ww, void **vaddr)
3513{
3514 GEM_BUG_ON(!ce->state)((void)0);
3515 GEM_BUG_ON(!i915_vma_is_pinned(ce->state))((void)0);
3516
3517 *vaddr = i915_gem_object_pin_map(ce->state->obj,
3518 i915_coherent_map_type(ce->engine->i915) |
3519 I915_MAP_OVERRIDE(1UL << (31)));
3520
3521 return PTR_ERR_OR_ZERO(*vaddr);
3522}
3523
3524static int
3525__execlists_context_pin(struct intel_context *ce,
3526 struct intel_engine_cs *engine,
3527 void *vaddr)
3528{
3529 ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE(1ULL << (2));
3530 ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12));
3531 __execlists_update_reg_state(ce, engine, ce->ring->tail);
3532
3533 return 0;
3534}
3535
3536static int execlists_context_pin(struct intel_context *ce, void *vaddr)
3537{
3538 return __execlists_context_pin(ce, ce->engine, vaddr);
3539}
3540
3541static int execlists_context_alloc(struct intel_context *ce)
3542{
3543 return __execlists_context_alloc(ce, ce->engine);
3544}
3545
3546static void execlists_context_reset(struct intel_context *ce)
3547{
3548 CE_TRACE(ce, "reset\n")do { const struct intel_context *ce__ = (ce); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (ce__->
engine); do { } while (0); } while (0); } while (0)
;
3549 GEM_BUG_ON(!intel_context_is_pinned(ce))((void)0);
3550
3551 intel_ring_reset(ce->ring, ce->ring->emit);
3552
3553 /* Scrub away the garbage */
3554 execlists_init_reg_state(ce->lrc_reg_state,
3555 ce, ce->engine, ce->ring, true1);
3556 __execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3557
3558 ce->lrc.desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2));
3559}
3560
3561static const struct intel_context_ops execlists_context_ops = {
3562 .alloc = execlists_context_alloc,
3563
3564 .pre_pin = execlists_context_pre_pin,
3565 .pin = execlists_context_pin,
3566 .unpin = execlists_context_unpin,
3567 .post_unpin = execlists_context_post_unpin,
3568
3569 .enter = intel_context_enter_engine,
3570 .exit = intel_context_exit_engine,
3571
3572 .reset = execlists_context_reset,
3573 .destroy = execlists_context_destroy,
3574};
3575
3576static u32 hwsp_offset(const struct i915_request *rq)
3577{
3578 const struct intel_timeline_cacheline *cl;
3579
3580 /* Before the request is executed, the timeline/cachline is fixed */
3581
3582 cl = rcu_dereference_protected(rq->hwsp_cacheline, 1)(rq->hwsp_cacheline);
3583 if (cl)
3584 return cl->ggtt_offset;
3585
3586 return rcu_dereference_protected(rq->timeline, 1)(rq->timeline)->hwsp_offset;
3587}
3588
3589static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3590{
3591 u32 *cs;
3592
3593 GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq))((void)0);
3594 if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3595 return 0;
3596
3597 cs = intel_ring_begin(rq, 6);
3598 if (IS_ERR(cs))
3599 return PTR_ERR(cs);
3600
3601 /*
3602 * Check if we have been preempted before we even get started.
3603 *
3604 * After this point i915_request_started() reports true, even if
3605 * we get preempted and so are no longer running.
3606 */
3607 *cs++ = MI_ARB_CHECK(((0x05) << 23) | (0));
3608 *cs++ = MI_NOOP(((0) << 23) | (0));
3609
3610 *cs++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) | (2)) | MI_USE_GGTT(1 << 22);
3611 *cs++ = hwsp_offset(rq);
3612 *cs++ = 0;
3613 *cs++ = rq->fence.seqno - 1;
3614
3615 intel_ring_advance(rq, cs);
3616
3617 /* Record the updated position of the request's payload */
3618 rq->infix = intel_ring_offset(rq, cs);
3619
3620 __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3621
3622 return 0;
3623}
3624
3625static int emit_pdps(struct i915_request *rq)
3626{
3627 const struct intel_engine_cs * const engine = rq->engine;
3628 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3629 int err, i;
3630 u32 *cs;
3631
3632 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915))((void)0);
3633
3634 /*
3635 * Beware ye of the dragons, this sequence is magic!
3636 *
3637 * Small changes to this sequence can cause anything from
3638 * GPU hangs to forcewake errors and machine lockups!
3639 */
3640
3641 /* Flush any residual operations from the context load */
3642 err = engine->emit_flush(rq, EMIT_FLUSH(1UL << (1)));
3643 if (err)
3644 return err;
3645
3646 /* Magic required to prevent forcewake errors! */
3647 err = engine->emit_flush(rq, EMIT_INVALIDATE(1UL << (0)));
3648 if (err)
3649 return err;
3650
3651 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES4 + 2);
3652 if (IS_ERR(cs))
3653 return PTR_ERR(cs);
3654
3655 /* Ensure the LRI have landed before we invalidate & continue */
3656 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES)(((0x22) << 23) | (2*(2 * 4)-1)) | MI_LRI_FORCE_POSTED(1<<12);
3657 for (i = GEN8_3LVL_PDPES4; i--; ) {
3658 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3659 u32 base = engine->mmio_base;
3660
3661 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)((const i915_reg_t){ .reg = ((base) + 0x270 + (i) * 8 + 4) }));
3662 *cs++ = upper_32_bits(pd_daddr)((u32)(((pd_daddr) >> 16) >> 16));
3663 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)((const i915_reg_t){ .reg = ((base) + 0x270 + (i) * 8) }));
3664 *cs++ = lower_32_bits(pd_daddr)((u32)(pd_daddr));
3665 }
3666 *cs++ = MI_NOOP(((0) << 23) | (0));
3667
3668 intel_ring_advance(rq, cs);
3669
3670 return 0;
3671}
3672
3673static int execlists_request_alloc(struct i915_request *request)
3674{
3675 int ret;
3676
3677 GEM_BUG_ON(!intel_context_is_pinned(request->context))((void)0);
3678
3679 /*
3680 * Flush enough space to reduce the likelihood of waiting after
3681 * we start building the request - in which case we will just
3682 * have to repeat work.
3683 */
3684 request->reserved_space += EXECLISTS_REQUEST_SIZE64;
3685
3686 /*
3687 * Note that after this point, we have committed to using
3688 * this request as it is being used to both track the
3689 * state of engine initialisation and liveness of the
3690 * golden renderstate above. Think twice before you try
3691 * to cancel/unwind this request now.
3692 */
3693
3694 if (!i915_vm_is_4lvl(request->context->vm)) {
3695 ret = emit_pdps(request);
3696 if (ret)
3697 return ret;
3698 }
3699
3700 /* Unconditionally invalidate GPU caches and TLBs. */
3701 ret = request->engine->emit_flush(request, EMIT_INVALIDATE(1UL << (0)));
3702 if (ret)
3703 return ret;
3704
3705 request->reserved_space -= EXECLISTS_REQUEST_SIZE64;
3706 return 0;
3707}
3708
3709/*
3710 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3711 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
3712 * but there is a slight complication as this is applied in WA batch where the
3713 * values are only initialized once so we cannot take register value at the
3714 * beginning and reuse it further; hence we save its value to memory, upload a
3715 * constant value with bit21 set and then we restore it back with the saved value.
3716 * To simplify the WA, a constant value is formed by using the default value
3717 * of this register. This shouldn't be a problem because we are only modifying
3718 * it for a short period and this batch in non-premptible. We can ofcourse
3719 * use additional instructions that read the actual value of the register
3720 * at that time and set our bit of interest but it makes the WA complicated.
3721 *
3722 * This WA is also required for Gen9 so extracting as a function avoids
3723 * code duplication.
3724 */
3725static u32 *
3726gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
3727{
3728 /* NB no one else is allowed to scribble over scratch + 256! */
3729 *batch++ = MI_STORE_REGISTER_MEM_GEN8(((0x24) << 23) | (2)) | MI_SRM_LRM_GLOBAL_GTT(1<<22);
3730 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4((const i915_reg_t){ .reg = (0xb118) }));
3731 *batch++ = intel_gt_scratch_offset(engine->gt,
3732 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3733 *batch++ = 0;
3734
3735 *batch++ = MI_LOAD_REGISTER_IMM(1)(((0x22) << 23) | (2*(1)-1));
3736 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4((const i915_reg_t){ .reg = (0xb118) }));
3737 *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES(1 << 21);
3738
3739 batch = gen8_emit_pipe_control(batch,
3740 PIPE_CONTROL_CS_STALL(1<<20) |
3741 PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5),
3742 0);
3743
3744 *batch++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) | (2)) | MI_SRM_LRM_GLOBAL_GTT(1<<22);
3745 *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4((const i915_reg_t){ .reg = (0xb118) }));
3746 *batch++ = intel_gt_scratch_offset(engine->gt,
3747 INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3748 *batch++ = 0;
3749
3750 return batch;
3751}
3752
3753/*
3754 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
3755 * initialized at the beginning and shared across all contexts but this field
3756 * helps us to have multiple batches at different offsets and select them based
3757 * on a criteria. At the moment this batch always start at the beginning of the page
3758 * and at this point we don't have multiple wa_ctx batch buffers.
3759 *
3760 * The number of WA applied are not known at the beginning; we use this field
3761 * to return the no of DWORDS written.
3762 *
3763 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3764 * so it adds NOOPs as padding to make it cacheline aligned.
3765 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3766 * makes a complete batch buffer.
3767 */
3768static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3769{
3770 /* WaDisableCtxRestoreArbitration:bdw,chv */
3771 *batch++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_DISABLE(0<<0);
3772
3773 /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3774 if (IS_BROADWELL(engine->i915)IS_PLATFORM(engine->i915, INTEL_BROADWELL))
3775 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3776
3777 /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3778 /* Actual scratch location is at 128 bytes offset */
3779 batch = gen8_emit_pipe_control(batch,
3780 PIPE_CONTROL_FLUSH_L3(1<<27) |
3781 PIPE_CONTROL_STORE_DATA_INDEX(1<<21) |
3782 PIPE_CONTROL_CS_STALL(1<<20) |
3783 PIPE_CONTROL_QW_WRITE(1<<14),
3784 LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
3785
3786 *batch++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_ENABLE(1<<0);
3787
3788 /* Pad to end of cacheline */
3789 while ((unsigned long)batch % CACHELINE_BYTES64)
3790 *batch++ = MI_NOOP(((0) << 23) | (0));
3791
3792 /*
3793 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3794 * execution depends on the length specified in terms of cache lines
3795 * in the register CTX_RCS_INDIRECT_CTX
3796 */
3797
3798 return batch;
3799}
3800
3801struct lri {
3802 i915_reg_t reg;
3803 u32 value;
3804};
3805
3806static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
3807{
3808 GEM_BUG_ON(!count || count > 63)((void)0);
3809
3810 *batch++ = MI_LOAD_REGISTER_IMM(count)(((0x22) << 23) | (2*(count)-1));
3811 do {
3812 *batch++ = i915_mmio_reg_offset(lri->reg);
3813 *batch++ = lri->value;
3814 } while (lri++, --count);
3815 *batch++ = MI_NOOP(((0) << 23) | (0));
3816
3817 return batch;
3818}
3819
3820static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3821{
3822 static const struct lri lri[] = {
3823 /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3824 {
3825 COMMON_SLICE_CHICKEN2((const i915_reg_t){ .reg = (0x7014) }),
3826 __MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,(((1 << 12)) << 16 | (0))
3827 0)(((1 << 12)) << 16 | (0)),
3828 },
3829
3830 /* BSpec: 11391 */
3831 {
3832 FF_SLICE_CHICKEN((const i915_reg_t){ .reg = (0x2088) }),
3833 __MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,(((1 << 1)) << 16 | ((1 << 1)))
3834 FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX)(((1 << 1)) << 16 | ((1 << 1))),
3835 },
3836
3837 /* BSpec: 11299 */
3838 {
3839 _3D_CHICKEN3((const i915_reg_t){ .reg = (0x2090) }),
3840 __MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,(((1 << 12)) << 16 | ((1 << 12)))
3841 _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX)(((1 << 12)) << 16 | ((1 << 12))),
3842 }
3843 };
3844
3845 *batch++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_DISABLE(0<<0);
3846
3847 /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3848 batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3849
3850 /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3851 batch = gen8_emit_pipe_control(batch,
3852 PIPE_CONTROL_FLUSH_L3(1<<27) |
3853 PIPE_CONTROL_STORE_DATA_INDEX(1<<21) |
3854 PIPE_CONTROL_CS_STALL(1<<20) |
3855 PIPE_CONTROL_QW_WRITE(1<<14),
3856 LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
3857
3858 batch = emit_lri(batch, lri, ARRAY_SIZE(lri)(sizeof((lri)) / sizeof((lri)[0])));
3859
3860 /* WaMediaPoolStateCmdInWABB:bxt,glk */
3861 if (HAS_POOLED_EU(engine->i915)((&(engine->i915)->__info)->has_pooled_eu)) {
3862 /*
3863 * EU pool configuration is setup along with golden context
3864 * during context initialization. This value depends on
3865 * device type (2x6 or 3x6) and needs to be updated based
3866 * on which subslice is disabled especially for 2x6
3867 * devices, however it is safe to load default
3868 * configuration of 3x6 device instead of masking off
3869 * corresponding bits because HW ignores bits of a disabled
3870 * subslice and drops down to appropriate config. Please
3871 * see render_state_setup() in i915_gem_render_state.c for
3872 * possible configurations, to avoid duplication they are
3873 * not shown here again.
3874 */
3875 *batch++ = GEN9_MEDIA_POOL_STATE((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4
)
;
3876 *batch++ = GEN9_MEDIA_POOL_ENABLE(1 << 31);
3877 *batch++ = 0x00777000;
3878 *batch++ = 0;
3879 *batch++ = 0;
3880 *batch++ = 0;
3881 }
3882
3883 *batch++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_ENABLE(1<<0);
3884
3885 /* Pad to end of cacheline */
3886 while ((unsigned long)batch % CACHELINE_BYTES64)
3887 *batch++ = MI_NOOP(((0) << 23) | (0));
3888
3889 return batch;
3890}
3891
3892static u32 *
3893gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
3894{
3895 int i;
3896
3897 /*
3898 * WaPipeControlBefore3DStateSamplePattern: cnl
3899 *
3900 * Ensure the engine is idle prior to programming a
3901 * 3DSTATE_SAMPLE_PATTERN during a context restore.
3902 */
3903 batch = gen8_emit_pipe_control(batch,
3904 PIPE_CONTROL_CS_STALL(1<<20),
3905 0);
3906 /*
3907 * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3908 * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3909 * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3910 * confusing. Since gen8_emit_pipe_control() already advances the
3911 * batch by 6 dwords, we advance the other 10 here, completing a
3912 * cacheline. It's not clear if the workaround requires this padding
3913 * before other commands, or if it's just the regular padding we would
3914 * already have for the workaround bb, so leave it here for now.
3915 */
3916 for (i = 0; i < 10; i++)
3917 *batch++ = MI_NOOP(((0) << 23) | (0));
3918
3919 /* Pad to end of cacheline */
3920 while ((unsigned long)batch % CACHELINE_BYTES64)
3921 *batch++ = MI_NOOP(((0) << 23) | (0));
3922
3923 return batch;
3924}
3925
3926#define CTX_WA_BB_OBJ_SIZE((1 << 12)) (PAGE_SIZE(1 << 12))
3927
3928static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3929{
3930 struct drm_i915_gem_object *obj;
3931 struct i915_vma *vma;
3932 int err;
3933
3934 obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE((1 << 12)));
3935 if (IS_ERR(obj))
3936 return PTR_ERR(obj);
3937
3938 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL((void *)0));
3939 if (IS_ERR(vma)) {
3940 err = PTR_ERR(vma);
3941 goto err;
3942 }
3943
3944 err = i915_ggtt_pin(vma, NULL((void *)0), 0, PIN_HIGH(1ULL << (5)));
3945 if (err)
3946 goto err;
3947
3948 engine->wa_ctx.vma = vma;
3949 return 0;
3950
3951err:
3952 i915_gem_object_put(obj);
3953 return err;
3954}
3955
3956static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3957{
3958 i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3959
3960 /* Called on error unwind, clear all flags to prevent further use */
3961 memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx))__builtin_memset((&engine->wa_ctx), (0), (sizeof(engine
->wa_ctx)))
;
3962}
3963
3964typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
3965
3966static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3967{
3968 struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3969 struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3970 &wa_ctx->per_ctx };
3971 wa_bb_func_t wa_bb_fn[2];
3972 void *batch, *batch_ptr;
3973 unsigned int i;
3974 int ret;
3975
3976 if (engine->class != RENDER_CLASS0)
3977 return 0;
3978
3979 switch (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen)) {
3980 case 12:
3981 case 11:
3982 return 0;
3983 case 10:
3984 wa_bb_fn[0] = gen10_init_indirectctx_bb;
3985 wa_bb_fn[1] = NULL((void *)0);
3986 break;
3987 case 9:
3988 wa_bb_fn[0] = gen9_init_indirectctx_bb;
3989 wa_bb_fn[1] = NULL((void *)0);
3990 break;
3991 case 8:
3992 wa_bb_fn[0] = gen8_init_indirectctx_bb;
3993 wa_bb_fn[1] = NULL((void *)0);
3994 break;
3995 default:
3996 MISSING_CASE(INTEL_GEN(engine->i915))({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n"
, "((&(engine->i915)->__info)->gen)", (long)(((&
(engine->i915)->__info)->gen))); __builtin_expect(!!
(__ret), 0); })
;
3997 return 0;
3998 }
3999
4000 ret = lrc_setup_wa_ctx(engine);
4001 if (ret) {
4002 drm_dbg(&engine->i915->drm,drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "Failed to setup context WA page: %d\n", ret)
4003 "Failed to setup context WA page: %d\n", ret)drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "Failed to setup context WA page: %d\n", ret)
;
4004 return ret;
4005 }
4006
4007 batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
4008
4009 /*
4010 * Emit the two workaround batch buffers, recording the offset from the
4011 * start of the workaround batch buffer object for each and their
4012 * respective sizes.
4013 */
4014 batch_ptr = batch;
4015 for (i = 0; i < ARRAY_SIZE(wa_bb_fn)(sizeof((wa_bb_fn)) / sizeof((wa_bb_fn)[0])); i++) {
4016 wa_bb[i]->offset = batch_ptr - batch;
4017 if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,({ ((void)0); 0; })
4018 CACHELINE_BYTES))({ ((void)0); 0; })) {
4019 ret = -EINVAL22;
4020 break;
4021 }
4022 if (wa_bb_fn[i])
4023 batch_ptr = wa_bb_fn[i](engine, batch_ptr);
4024 wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
4025 }
4026 GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE)((void)0);
4027
4028 __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
4029 __i915_gem_object_release_map(wa_ctx->vma->obj);
4030 if (ret)
4031 lrc_destroy_wa_ctx(engine);
4032
4033 return ret;
4034}
4035
4036static void reset_csb_pointers(struct intel_engine_cs *engine)
4037{
4038 struct intel_engine_execlists * const execlists = &engine->execlists;
4039 const unsigned int reset_value = execlists->csb_size - 1;
4040
4041 ring_set_paused(engine, 0);
4042
4043 /*
4044 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
4045 * Bludgeon them with a mmio update to be sure.
4046 */
4047 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
4048 0xffff << 16 | reset_value << 8 | reset_value)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
;
4049 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x3a0) })))
;
4050
4051 /*
4052 * After a reset, the HW starts writing into CSB entry [0]. We
4053 * therefore have to set our HEAD pointer back one entry so that
4054 * the *first* entry we check is entry 0. To complicate this further,
4055 * as we don't wait for the first interrupt after reset, we have to
4056 * fake the HW write to point back to the last entry so that our
4057 * inline comparison of our cached head position against the last HW
4058 * write works even before the first interrupt.
4059 */
4060 execlists->csb_head = reset_value;
4061 WRITE_ONCE(*execlists->csb_write, reset_value)({ typeof(*execlists->csb_write) __tmp = (reset_value); *(
volatile typeof(*execlists->csb_write) *)&(*execlists->
csb_write) = __tmp; __tmp; })
;
4062 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0); /* Make sure this is visible to HW (paranoia?) */
4063
4064 /* Check that the GPU does indeed update the CSB entries! */
4065 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64))__builtin_memset((execlists->csb_status), (-1), ((reset_value
+ 1) * sizeof(u64)))
;
4066 invalidate_csb_entries(&execlists->csb_status[0],
4067 &execlists->csb_status[reset_value]);
4068
4069 /* Once more for luck and our trusty paranoia */
4070 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
4071 0xffff << 16 | reset_value << 8 | reset_value)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
;
4072 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x3a0) })))
;
4073
4074 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value)((void)0);
4075}
4076
4077static void execlists_sanitize(struct intel_engine_cs *engine)
4078{
4079 /*
4080 * Poison residual state on resume, in case the suspend didn't!
4081 *
4082 * We have to assume that across suspend/resume (or other loss
4083 * of control) that the contents of our pinned buffers has been
4084 * lost, replaced by garbage. Since this doesn't always happen,
4085 * let's poison such state so that we more quickly spot when
4086 * we falsely assume it has been preserved.
4087 */
4088 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
4089 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE)__builtin_memset((engine->status_page.addr), (0xdb), ((1 <<
12)))
;
4090
4091 reset_csb_pointers(engine);
4092
4093 /*
4094 * The kernel_context HWSP is stored in the status_page. As above,
4095 * that may be lost on resume/initialisation, and so we need to
4096 * reset the value in the HWSP.
4097 */
4098 intel_timeline_reset_seqno(engine->kernel_context->timeline);
4099
4100 /* And scrub the dirty cachelines for the HWSP */
4101 clflush_cache_range(engine->status_page.addr, PAGE_SIZE)pmap_flush_cache((vaddr_t)(engine->status_page.addr), (1 <<
12))
;
4102}
4103
4104static void enable_error_interrupt(struct intel_engine_cs *engine)
4105{
4106 u32 status;
4107
4108 engine->execlists.error_interrupt = 0;
4109 ENGINE_WRITE(engine, RING_EMR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb4) }), (~0u))
;
4110 ENGINE_WRITE(engine, RING_EIR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb0) }), (~0u))
; /* clear all existing errors */
4111
4112 status = ENGINE_READ(engine, RING_ESR)intel_uncore_read(((engine))->uncore, ((const i915_reg_t){
.reg = (((engine)->mmio_base) + 0xb8) }))
;
4113 if (unlikely(status)__builtin_expect(!!(status), 0)) {
4114 drm_err(&engine->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "engine '%s' resumed still in error: %08x\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name, status)
4115 "engine '%s' resumed still in error: %08x\n",printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "engine '%s' resumed still in error: %08x\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name, status)
4116 engine->name, status)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "engine '%s' resumed still in error: %08x\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name, status)
;
4117 __intel_gt_reset(engine->gt, engine->mask);
4118 }
4119
4120 /*
4121 * On current gen8+, we have 2 signals to play with
4122 *
4123 * - I915_ERROR_INSTUCTION (bit 0)
4124 *
4125 * Generate an error if the command parser encounters an invalid
4126 * instruction
4127 *
4128 * This is a fatal error.
4129 *
4130 * - CP_PRIV (bit 2)
4131 *
4132 * Generate an error on privilege violation (where the CP replaces
4133 * the instruction with a no-op). This also fires for writes into
4134 * read-only scratch pages.
4135 *
4136 * This is a non-fatal error, parsing continues.
4137 *
4138 * * there are a few others defined for odd HW that we do not use
4139 *
4140 * Since CP_PRIV fires for cases where we have chosen to ignore the
4141 * error (as the HW is validating and suppressing the mistakes), we
4142 * only unmask the instruction error bit.
4143 */
4144 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb4) }), (~(1 << 0
)))
;
4145}
4146
4147static void enable_execlists(struct intel_engine_cs *engine)
4148{
4149 u32 mode;
4150
4151 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4152
4153 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4154
4155 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
4156 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)({ typeof((1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })
;
4157 else
4158 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)({ typeof((1 << 15)) _a = ((1 << 15)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })
;
4159 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode)__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x29c) }), (mode))
;
4160
4161 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x9c) }), ((({ if (__builtin_constant_p
(((1 << 8)))) do { } while (0); if (__builtin_constant_p
(0)) do { } while (0); if (__builtin_constant_p(((1 << 8
))) && __builtin_constant_p(0)) do { } while (0); (((
(1 << 8))) << 16 | (0)); }))))
;
4162
4163 ENGINE_WRITE_FW(engine,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
4164 RING_HWS_PGA,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
4165 i915_ggtt_offset(engine->status_page.vma))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
;
4166 ENGINE_POSTING_READ(engine, RING_HWS_PGA)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) })))
;
4167
4168 enable_error_interrupt(engine);
4169
4170 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0)(((~0UL) >> (64 - (64 - 2) - 1)) & ((~0UL) <<
(0)))
;
4171}
4172
4173static bool_Bool unexpected_starting_state(struct intel_engine_cs *engine)
4174{
4175 bool_Bool unexpected = false0;
4176
4177 if (ENGINE_READ_FW(engine, RING_MI_MODE)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x9c) }))
& STOP_RING(1 << 8)) {
4178 drm_dbg(&engine->i915->drm,drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "STOP_RING still set in RING_MI_MODE\n")
4179 "STOP_RING still set in RING_MI_MODE\n")drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "STOP_RING still set in RING_MI_MODE\n")
;
4180 unexpected = true1;
4181 }
4182
4183 return unexpected;
4184}
4185
4186static int execlists_resume(struct intel_engine_cs *engine)
4187{
4188 intel_mocs_init_engine(engine);
4189
4190 intel_breadcrumbs_reset(engine->breadcrumbs);
4191
4192 if (GEM_SHOW_DEBUG()(0) && unexpected_starting_state(engine)) {
4193 struct drm_printer p = drm_debug_printer(__func__);
4194
4195 intel_engine_dump(engine, &p, NULL((void *)0));
4196 }
4197
4198 enable_execlists(engine);
4199
4200 return 0;
4201}
4202
4203static void execlists_reset_prepare(struct intel_engine_cs *engine)
4204{
4205 struct intel_engine_execlists * const execlists = &engine->execlists;
4206 unsigned long flags;
4207
4208 ENGINE_TRACE(engine, "depth<-%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
4209 atomic_read(&execlists->tasklet.count))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
4210
4211 /*
4212 * Prevent request submission to the hardware until we have
4213 * completed the reset in i915_gem_reset_finish(). If a request
4214 * is completed by one engine, it may then queue a request
4215 * to a second via its execlists->tasklet *just* as we are
4216 * calling engine->resume() and also writing the ELSP.
4217 * Turning off the execlists->tasklet until the reset is over
4218 * prevents the race.
4219 */
4220 __tasklet_disable_sync_once(&execlists->tasklet);
4221 GEM_BUG_ON(!reset_in_progress(execlists))((void)0);
4222
4223 /* And flush any current direct submission. */
4224 spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while
(0)
;
4225 spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); }
while (0)
;
4226
4227 /*
4228 * We stop engines, otherwise we might get failed reset and a
4229 * dead gpu (on elk). Also as modern gpu as kbl can suffer
4230 * from system hang if batchbuffer is progressing when
4231 * the reset is issued, regardless of READY_TO_RESET ack.
4232 * Thus assume it is best to stop engines on all gens
4233 * where we have a gpu reset.
4234 *
4235 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4236 *
4237 * FIXME: Wa for more modern gens needs to be validated
4238 */
4239 ring_set_paused(engine, 1);
4240 intel_engine_stop_cs(engine);
4241
4242 engine->execlists.reset_ccid = active_ccid(engine);
4243}
4244
4245static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
4246{
4247 int x;
4248
4249 x = lrc_ring_mi_mode(engine);
4250 if (x != -1) {
4251 regs[x + 1] &= ~STOP_RING(1 << 8);
4252 regs[x + 1] |= STOP_RING(1 << 8) << 16;
4253 }
4254}
4255
4256static void __execlists_reset_reg_state(const struct intel_context *ce,
4257 const struct intel_engine_cs *engine)
4258{
4259 u32 *regs = ce->lrc_reg_state;
4260
4261 __reset_stop_ring(regs, engine);
4262}
4263
4264static void __execlists_reset(struct intel_engine_cs *engine, bool_Bool stalled)
4265{
4266 struct intel_engine_execlists * const execlists = &engine->execlists;
4267 struct intel_context *ce;
4268 struct i915_request *rq;
4269 u32 head;
4270
4271 mb()do { __asm volatile("mfence" ::: "memory"); } while (0); /* paranoia: read the CSB pointers from after the reset */
4272 clflush((vaddr_t)execlists->csb_write);
4273 mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
4274
4275 process_csb(engine); /* drain preemption events */
4276
4277 /* Following the reset, we need to reload the CSB read/write pointers */
4278 reset_csb_pointers(engine);
4279
4280 /*
4281 * Save the currently executing context, even if we completed
4282 * its request, it was still running at the time of the
4283 * reset and will have been clobbered.
4284 */
4285 rq = active_context(engine, engine->execlists.reset_ccid);
4286 if (!rq)
4287 goto unwind;
4288
4289 ce = rq->context;
4290 GEM_BUG_ON(!i915_vma_is_pinned(ce->state))((void)0);
4291
4292 if (i915_request_completed(rq)) {
4293 /* Idle context; tidy up the ring so we can restart afresh */
4294 head = intel_ring_wrap(ce->ring, rq->tail);
4295 goto out_replay;
4296 }
4297
4298 /* We still have requests in-flight; the engine should be active */
4299 GEM_BUG_ON(!intel_engine_pm_is_awake(engine))((void)0);
4300
4301 /* Context has requests still in-flight; it should not be idle! */
4302 GEM_BUG_ON(i915_active_is_idle(&ce->active))((void)0);
4303
4304 rq = active_request(ce->timeline, rq);
4305 head = intel_ring_wrap(ce->ring, rq->head);
4306 GEM_BUG_ON(head == ce->ring->tail)((void)0);
4307
4308 /*
4309 * If this request hasn't started yet, e.g. it is waiting on a
4310 * semaphore, we need to avoid skipping the request or else we
4311 * break the signaling chain. However, if the context is corrupt
4312 * the request will not restart and we will be stuck with a wedged
4313 * device. It is quite often the case that if we issue a reset
4314 * while the GPU is loading the context image, that the context
4315 * image becomes corrupt.
4316 *
4317 * Otherwise, if we have not started yet, the request should replay
4318 * perfectly and we do not need to flag the result as being erroneous.
4319 */
4320 if (!i915_request_started(rq))
4321 goto out_replay;
4322
4323 /*
4324 * If the request was innocent, we leave the request in the ELSP
4325 * and will try to replay it on restarting. The context image may
4326 * have been corrupted by the reset, in which case we may have
4327 * to service a new GPU hang, but more likely we can continue on
4328 * without impact.
4329 *
4330 * If the request was guilty, we presume the context is corrupt
4331 * and have to at least restore the RING register in the context
4332 * image back to the expected values to skip over the guilty request.
4333 */
4334 __i915_request_reset(rq, stalled);
4335
4336 /*
4337 * We want a simple context + ring to execute the breadcrumb update.
4338 * We cannot rely on the context being intact across the GPU hang,
4339 * so clear it and rebuild just what we need for the breadcrumb.
4340 * All pending requests for this context will be zapped, and any
4341 * future request will be after userspace has had the opportunity
4342 * to recreate its own state.
4343 */
4344out_replay:
4345 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
4346 head, ce->ring->tail)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
4347 __execlists_reset_reg_state(ce, engine);
4348 __execlists_update_reg_state(ce, engine, head);
4349 ce->lrc.desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2)); /* paranoid: GPU was reset! */
4350
4351unwind:
4352 /* Push back any incomplete requests for replay after the reset. */
4353 cancel_port_requests(execlists);
4354 __unwind_incomplete_requests(engine);
4355}
4356
4357static void execlists_reset_rewind(struct intel_engine_cs *engine, bool_Bool stalled)
4358{
4359 unsigned long flags;
4360
4361 ENGINE_TRACE(engine, "\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
4362
4363 spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while
(0)
;
4364
4365 __execlists_reset(engine, stalled);
4366
4367 spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); }
while (0)
;
4368}
4369
4370static void nop_submission_tasklet(unsigned long data)
4371{
4372 struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4373
4374 /* The driver is wedged; don't process any more events. */
4375 WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN)({ typeof(engine->execlists.queue_priority_hint) __tmp = (
(-0x7fffffff-1)); *(volatile typeof(engine->execlists.queue_priority_hint
) *)&(engine->execlists.queue_priority_hint) = __tmp; __tmp
; })
;
4376}
4377
4378static void execlists_reset_cancel(struct intel_engine_cs *engine)
4379{
4380 struct intel_engine_execlists * const execlists = &engine->execlists;
4381 struct i915_request *rq, *rn;
4382 struct rb_node *rb;
4383 unsigned long flags;
4384
4385 ENGINE_TRACE(engine, "\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
4386
4387 /*
4388 * Before we call engine->cancel_requests(), we should have exclusive
4389 * access to the submission state. This is arranged for us by the
4390 * caller disabling the interrupt generation, the tasklet and other
4391 * threads that may then access the same state, giving us a free hand
4392 * to reset state. However, we still need to let lockdep be aware that
4393 * we know this state may be accessed in hardirq context, so we
4394 * disable the irq around this manipulation and we want to keep
4395 * the spinlock focused on its duties and not accidentally conflate
4396 * coverage to the submission's irq state. (Similarly, although we
4397 * shouldn't need to disable irq around the manipulation of the
4398 * submission's irq state, we also wish to remind ourselves that
4399 * it is irq state.)
4400 */
4401 spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while
(0)
;
4402
4403 __execlists_reset(engine, true1);
4404
4405 /* Mark all executing requests as skipped. */
4406 list_for_each_entry(rq, &engine->active.requests, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->active.requests)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&engine->active
.requests); rq = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}))
4407 mark_eio(rq);
4408
4409 /* Flush the queued requests to the timeline list (for retiring). */
4410 while ((rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->queue)->rb_root), -1)
)) {
4411 struct i915_priolist *p = to_priolist(rb);
4412 int i;
4413
4414 priolist_for_each_request_consume(rq, rn, p, i)for (; (p)->used ? (i = __builtin_ctzl((p)->used)), 1 :
0; (p)->used &= ~(1UL << (i))) for (rq = ({ const
__typeof( ((__typeof(*rq) *)0)->sched.link ) *__mptr = ((
&(p)->requests[i])->next); (__typeof(*rq) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rq), sched.link) );}
), rn = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rq), sched.link) );}
); &rq->sched.link != (&(p)->requests[i]); rq =
rn, rn = ({ const __typeof( ((__typeof(*rn) *)0)->sched.link
) *__mptr = (rn->sched.link.next); (__typeof(*rn) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rn), sched.link) );}
))
{
4415 mark_eio(rq);
4416 __i915_request_submit(rq);
4417 }
4418
4419 rb_erase_cached(&p->node, &execlists->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->queue)->rb_root), (&p->node))
;
4420 i915_priolist_free(p);
4421 }
4422
4423 /* On-hold requests will be flushed to timeline upon their release */
4424 list_for_each_entry(rq, &engine->active.hold, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->active.hold)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&engine->active
.hold); rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched
.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched.link
) );}))
4425 mark_eio(rq);
4426
4427 /* Cancel all attached virtual engines */
4428 while ((rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
)) {
4429 struct virtual_engine *ve =
4430 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
4431
4432 rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->virtual)->rb_root), (rb))
;
4433 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
4434
4435 spin_lock(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
4436 rq = fetch_and_zero(&ve->request)({ typeof(*&ve->request) __T = *(&ve->request);
*(&ve->request) = (typeof(*&ve->request))0; __T
; })
;
4437 if (rq) {
4438 mark_eio(rq);
4439
4440 rq->engine = engine;
4441 __i915_request_submit(rq);
4442 i915_request_put(rq);
4443
4444 ve->base.execlists.queue_priority_hint = INT_MIN(-0x7fffffff-1);
4445 }
4446 spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
4447 }
4448
4449 /* Remaining _unready_ requests will be nop'ed when submitted */
4450
4451 execlists->queue_priority_hint = INT_MIN(-0x7fffffff-1);
4452 execlists->queue = RB_ROOT_CACHED(struct rb_root_cached) { ((void *)0) };
4453
4454 GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet))((void)0);
4455 execlists->tasklet.func = nop_submission_tasklet;
4456
4457 spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); }
while (0)
;
4458}
4459
4460static void execlists_reset_finish(struct intel_engine_cs *engine)
4461{
4462 struct intel_engine_execlists * const execlists = &engine->execlists;
4463
4464 /*
4465 * After a GPU reset, we may have requests to replay. Do so now while
4466 * we still have the forcewake to be sure that the GPU is not allowed
4467 * to sleep before we restart and reload a context.
4468 */
4469 GEM_BUG_ON(!reset_in_progress(execlists))((void)0);
4470 if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)((&execlists->queue.rb_root)->rb_node == ((void *)0
))
)
4471 execlists->tasklet.func(execlists->tasklet.data);
4472
4473 if (__tasklet_enable(&execlists->tasklet))
4474 /* And kick in case we missed a new request submission. */
4475 tasklet_hi_schedule(&execlists->tasklet);
4476 ENGINE_TRACE(engine, "depth->%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
4477 atomic_read(&execlists->tasklet.count))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
4478}
4479
4480static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4481 u64 offset, u32 len,
4482 const unsigned int flags)
4483{
4484 u32 *cs;
4485
4486 cs = intel_ring_begin(rq, 4);
4487 if (IS_ERR(cs))
4488 return PTR_ERR(cs);
4489
4490 /*
4491 * WaDisableCtxRestoreArbitration:bdw,chv
4492 *
4493 * We don't need to perform MI_ARB_ENABLE as often as we do (in
4494 * particular all the gen that do not need the w/a at all!), if we
4495 * took care to make sure that on every switch into this context
4496 * (both ordinary and for preemption) that arbitrartion was enabled
4497 * we would be fine. However, for gen8 there is another w/a that
4498 * requires us to not preempt inside GPGPU execution, so we keep
4499 * arbitration disabled for gen8 batches. Arbitration will be
4500 * re-enabled before we close the request
4501 * (engine->emit_fini_breadcrumb).
4502 */
4503 *cs++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_DISABLE(0<<0);
4504
4505 /* FIXME(BDW+): Address space and security selectors. */
4506 *cs++ = MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) | (1)) |
4507 (flags & I915_DISPATCH_SECURE(1UL << (0)) ? 0 : BIT(8)(1UL << (8)));
4508 *cs++ = lower_32_bits(offset)((u32)(offset));
4509 *cs++ = upper_32_bits(offset)((u32)(((offset) >> 16) >> 16));
4510
4511 intel_ring_advance(rq, cs);
4512
4513 return 0;
4514}
4515
4516static int gen8_emit_bb_start(struct i915_request *rq,
4517 u64 offset, u32 len,
4518 const unsigned int flags)
4519{
4520 u32 *cs;
4521
4522 cs = intel_ring_begin(rq, 6);
4523 if (IS_ERR(cs))
4524 return PTR_ERR(cs);
4525
4526 *cs++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_ENABLE(1<<0);
4527
4528 *cs++ = MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) | (1)) |
4529 (flags & I915_DISPATCH_SECURE(1UL << (0)) ? 0 : BIT(8)(1UL << (8)));
4530 *cs++ = lower_32_bits(offset)((u32)(offset));
4531 *cs++ = upper_32_bits(offset)((u32)(((offset) >> 16) >> 16));
4532
4533 *cs++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_DISABLE(0<<0);
4534 *cs++ = MI_NOOP(((0) << 23) | (0));
4535
4536 intel_ring_advance(rq, cs);
4537
4538 return 0;
4539}
4540
4541static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4542{
4543 ENGINE_WRITE(engine, RING_IMR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xa8) }), (~(engine->
irq_enable_mask | engine->irq_keep_mask)))
4544 ~(engine->irq_enable_mask | engine->irq_keep_mask))intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xa8) }), (~(engine->
irq_enable_mask | engine->irq_keep_mask)))
;
4545 ENGINE_POSTING_READ(engine, RING_IMR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0xa8) })))
;
4546}
4547
4548static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4549{
4550 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xa8) }), (~engine->irq_keep_mask
))
;
4551}
4552
4553static int gen8_emit_flush(struct i915_request *request, u32 mode)
4554{
4555 u32 cmd, *cs;
4556
4557 cs = intel_ring_begin(request, 4);
4558 if (IS_ERR(cs))
4559 return PTR_ERR(cs);
4560
4561 cmd = MI_FLUSH_DW(((0x26) << 23) | (1)) + 1;
4562
4563 /* We always require a command barrier so that subsequent
4564 * commands, such as breadcrumb interrupts, are strictly ordered
4565 * wrt the contents of the write cache being flushed to memory
4566 * (and thus being coherent from the CPU).
4567 */
4568 cmd |= MI_FLUSH_DW_STORE_INDEX(1<<21) | MI_FLUSH_DW_OP_STOREDW(1<<14);
4569
4570 if (mode & EMIT_INVALIDATE(1UL << (0))) {
4571 cmd |= MI_INVALIDATE_TLB(1<<18);
4572 if (request->engine->class == VIDEO_DECODE_CLASS1)
4573 cmd |= MI_INVALIDATE_BSD(1<<7);
4574 }
4575
4576 *cs++ = cmd;
4577 *cs++ = LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32));
4578 *cs++ = 0; /* upper addr */
4579 *cs++ = 0; /* value */
4580 intel_ring_advance(request, cs);
4581
4582 return 0;
4583}
4584
4585static int gen8_emit_flush_render(struct i915_request *request,
4586 u32 mode)
4587{
4588 bool_Bool vf_flush_wa = false0, dc_flush_wa = false0;
4589 u32 *cs, flags = 0;
4590 int len;
4591
4592 flags |= PIPE_CONTROL_CS_STALL(1<<20);
4593
4594 if (mode & EMIT_FLUSH(1UL << (1))) {
4595 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12);
4596 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0);
4597 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5);
4598 flags |= PIPE_CONTROL_FLUSH_ENABLE(1<<7);
4599 }
4600
4601 if (mode & EMIT_INVALIDATE(1UL << (0))) {
4602 flags |= PIPE_CONTROL_TLB_INVALIDATE(1<<18);
4603 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11);
4604 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10);
4605 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4);
4606 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3);
4607 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2);
4608 flags |= PIPE_CONTROL_QW_WRITE(1<<14);
4609 flags |= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4610
4611 /*
4612 * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4613 * pipe control.
4614 */
4615 if (IS_GEN(request->engine->i915, 9)(0 + (&(request->engine->i915)->__info)->gen ==
(9))
)
4616 vf_flush_wa = true1;
4617
4618 /* WaForGAMHang:kbl */
4619 if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0)(IS_PLATFORM(request->engine->i915, INTEL_KABYLAKE) &&
kbl_revids[((request->engine->i915)->drm.pdev->revision
)].gt_stepping >= 0 && kbl_revids[((request->engine
->i915)->drm.pdev->revision)].gt_stepping <= KBL_REVID_B0
)
)
4620 dc_flush_wa = true1;
4621 }
4622
4623 len = 6;
4624
4625 if (vf_flush_wa)
4626 len += 6;
4627
4628 if (dc_flush_wa)
4629 len += 12;
4630
4631 cs = intel_ring_begin(request, len);
4632 if (IS_ERR(cs))
4633 return PTR_ERR(cs);
4634
4635 if (vf_flush_wa)
4636 cs = gen8_emit_pipe_control(cs, 0, 0);
4637
4638 if (dc_flush_wa)
4639 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5),
4640 0);
4641
4642 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4643
4644 if (dc_flush_wa)
4645 cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL(1<<20), 0);
4646
4647 intel_ring_advance(request, cs);
4648
4649 return 0;
4650}
4651
4652static int gen11_emit_flush_render(struct i915_request *request,
4653 u32 mode)
4654{
4655 if (mode & EMIT_FLUSH(1UL << (1))) {
4656 u32 *cs;
4657 u32 flags = 0;
4658
4659 flags |= PIPE_CONTROL_CS_STALL(1<<20);
4660
4661 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28);
4662 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12);
4663 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0);
4664 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5);
4665 flags |= PIPE_CONTROL_FLUSH_ENABLE(1<<7);
4666 flags |= PIPE_CONTROL_QW_WRITE(1<<14);
4667 flags |= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4668
4669 cs = intel_ring_begin(request, 6);
4670 if (IS_ERR(cs))
4671 return PTR_ERR(cs);
4672
4673 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4674 intel_ring_advance(request, cs);
4675 }
4676
4677 if (mode & EMIT_INVALIDATE(1UL << (0))) {
4678 u32 *cs;
4679 u32 flags = 0;
4680
4681 flags |= PIPE_CONTROL_CS_STALL(1<<20);
4682
4683 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE(1<<29);
4684 flags |= PIPE_CONTROL_TLB_INVALIDATE(1<<18);
4685 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11);
4686 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10);
4687 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4);
4688 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3);
4689 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2);
4690 flags |= PIPE_CONTROL_QW_WRITE(1<<14);
4691 flags |= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4692
4693 cs = intel_ring_begin(request, 6);
4694 if (IS_ERR(cs))
4695 return PTR_ERR(cs);
4696
4697 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4698 intel_ring_advance(request, cs);
4699 }
4700
4701 return 0;
4702}
4703
4704static u32 preparser_disable(bool_Bool state)
4705{
4706 return MI_ARB_CHECK(((0x05) << 23) | (0)) | 1 << 8 | state;
4707}
4708
4709static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4710{
4711 static const i915_reg_t vd[] = {
4712 GEN12_VD0_AUX_NV((const i915_reg_t){ .reg = (0x4218) }),
4713 GEN12_VD1_AUX_NV((const i915_reg_t){ .reg = (0x4228) }),
4714 GEN12_VD2_AUX_NV((const i915_reg_t){ .reg = (0x4298) }),
4715 GEN12_VD3_AUX_NV((const i915_reg_t){ .reg = (0x42A8) }),
4716 };
4717
4718 static const i915_reg_t ve[] = {
4719 GEN12_VE0_AUX_NV((const i915_reg_t){ .reg = (0x4238) }),
4720 GEN12_VE1_AUX_NV((const i915_reg_t){ .reg = (0x42B8) }),
4721 };
4722
4723 if (engine->class == VIDEO_DECODE_CLASS1)
4724 return vd[engine->instance];
4725
4726 if (engine->class == VIDEO_ENHANCEMENT_CLASS2)
4727 return ve[engine->instance];
4728
4729 GEM_BUG_ON("unknown aux_inv_reg\n")((void)0);
4730
4731 return INVALID_MMIO_REG((const i915_reg_t){ .reg = (0) });
4732}
4733
4734static u32 *
4735gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4736{
4737 *cs++ = MI_LOAD_REGISTER_IMM(1)(((0x22) << 23) | (2*(1)-1));
4738 *cs++ = i915_mmio_reg_offset(inv_reg);
4739 *cs++ = AUX_INV((u32)((1UL << (0)) + 0));
4740 *cs++ = MI_NOOP(((0) << 23) | (0));
4741
4742 return cs;
4743}
4744
4745static int gen12_emit_flush_render(struct i915_request *request,
4746 u32 mode)
4747{
4748 if (mode & EMIT_FLUSH(1UL << (1))) {
4749 u32 flags = 0;
4750 u32 *cs;
4751
4752 flags |= PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28);
4753 flags |= PIPE_CONTROL_FLUSH_L3(1<<27);
4754 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12);
4755 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0);
4756 /* Wa_1409600907:tgl */
4757 flags |= PIPE_CONTROL_DEPTH_STALL(1<<13);
4758 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5);
4759 flags |= PIPE_CONTROL_FLUSH_ENABLE(1<<7);
4760
4761 flags |= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4762 flags |= PIPE_CONTROL_QW_WRITE(1<<14);
4763
4764 flags |= PIPE_CONTROL_CS_STALL(1<<20);
4765
4766 cs = intel_ring_begin(request, 6);
4767 if (IS_ERR(cs))
4768 return PTR_ERR(cs);
4769
4770 cs = gen12_emit_pipe_control(cs,
4771 PIPE_CONTROL0_HDC_PIPELINE_FLUSH((u32)((1UL << (9)) + 0)),
4772 flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4773 intel_ring_advance(request, cs);
4774 }
4775
4776 if (mode & EMIT_INVALIDATE(1UL << (0))) {
4777 u32 flags = 0;
4778 u32 *cs;
4779
4780 flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE(1<<29);
4781 flags |= PIPE_CONTROL_TLB_INVALIDATE(1<<18);
4782 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11);
4783 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10);
4784 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4);
4785 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3);
4786 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2);
4787
4788 flags |= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4789 flags |= PIPE_CONTROL_QW_WRITE(1<<14);
4790
4791 flags |= PIPE_CONTROL_CS_STALL(1<<20);
4792
4793 cs = intel_ring_begin(request, 8 + 4);
4794 if (IS_ERR(cs))
4795 return PTR_ERR(cs);
4796
4797 /*
4798 * Prevent the pre-parser from skipping past the TLB
4799 * invalidate and loading a stale page for the batch
4800 * buffer / request payload.
4801 */
4802 *cs++ = preparser_disable(true1);
4803
4804 cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4805
4806 /* hsdes: 1809175790 */
4807 cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV((const i915_reg_t){ .reg = (0x4208) }), cs);
4808
4809 *cs++ = preparser_disable(false0);
4810 intel_ring_advance(request, cs);
4811 }
4812
4813 return 0;
4814}
4815
4816static int gen12_emit_flush(struct i915_request *request, u32 mode)
4817{
4818 intel_engine_mask_t aux_inv = 0;
4819 u32 cmd, *cs;
4820
4821 cmd = 4;
4822 if (mode & EMIT_INVALIDATE(1UL << (0)))
4823 cmd += 2;
4824 if (mode & EMIT_INVALIDATE(1UL << (0)))
4825 aux_inv = request->engine->mask & ~BIT(BCS0)(1UL << (BCS0));
4826 if (aux_inv)
4827 cmd += 2 * hweight8(aux_inv) + 2;
4828
4829 cs = intel_ring_begin(request, cmd);
4830 if (IS_ERR(cs))
4831 return PTR_ERR(cs);
4832
4833 if (mode & EMIT_INVALIDATE(1UL << (0)))
4834 *cs++ = preparser_disable(true1);
4835
4836 cmd = MI_FLUSH_DW(((0x26) << 23) | (1)) + 1;
4837
4838 /* We always require a command barrier so that subsequent
4839 * commands, such as breadcrumb interrupts, are strictly ordered
4840 * wrt the contents of the write cache being flushed to memory
4841 * (and thus being coherent from the CPU).
4842 */
4843 cmd |= MI_FLUSH_DW_STORE_INDEX(1<<21) | MI_FLUSH_DW_OP_STOREDW(1<<14);
4844
4845 if (mode & EMIT_INVALIDATE(1UL << (0))) {
4846 cmd |= MI_INVALIDATE_TLB(1<<18);
4847 if (request->engine->class == VIDEO_DECODE_CLASS1)
4848 cmd |= MI_INVALIDATE_BSD(1<<7);
4849 }
4850
4851 *cs++ = cmd;
4852 *cs++ = LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32));
4853 *cs++ = 0; /* upper addr */
4854 *cs++ = 0; /* value */
4855
4856 if (aux_inv) { /* hsdes: 1809175790 */
4857 struct intel_engine_cs *engine;
4858 unsigned int tmp;
4859
4860 *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv))(((0x22) << 23) | (2*(hweight8(aux_inv))-1));
4861 for_each_engine_masked(engine, request->engine->gt,for ((tmp) = (aux_inv) & (request->engine->gt)->
info.engine_mask; (tmp) ? ((engine) = (request->engine->
gt)->engine[({ int __idx = ffs(tmp) - 1; tmp &= ~(1UL <<
(__idx)); __idx; })]), 1 : 0;)
4862 aux_inv, tmp)for ((tmp) = (aux_inv) & (request->engine->gt)->
info.engine_mask; (tmp) ? ((engine) = (request->engine->
gt)->engine[({ int __idx = ffs(tmp) - 1; tmp &= ~(1UL <<
(__idx)); __idx; })]), 1 : 0;)
{
4863 *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4864 *cs++ = AUX_INV((u32)((1UL << (0)) + 0));
4865 }
4866 *cs++ = MI_NOOP(((0) << 23) | (0));
4867 }
4868
4869 if (mode & EMIT_INVALIDATE(1UL << (0)))
4870 *cs++ = preparser_disable(false0);
4871
4872 intel_ring_advance(request, cs);
4873
4874 return 0;
4875}
4876
4877static void assert_request_valid(struct i915_request *rq)
4878{
4879 struct intel_ring *ring __maybe_unused__attribute__((__unused__)) = rq->ring;
4880
4881 /* Can we unwind this request without appearing to go forwards? */
4882 GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0)((void)0);
4883}
4884
4885/*
4886 * Reserve space for 2 NOOPs at the end of each request to be
4887 * used as a workaround for not being allowed to do lite
4888 * restore with HEAD==TAIL (WaIdleLiteRestore).
4889 */
4890static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
4891{
4892 /* Ensure there's always at least one preemption point per-request. */
4893 *cs++ = MI_ARB_CHECK(((0x05) << 23) | (0));
4894 *cs++ = MI_NOOP(((0) << 23) | (0));
4895 request->wa_tail = intel_ring_offset(request, cs);
4896
4897 /* Check that entire request is less than half the ring */
4898 assert_request_valid(request);
4899
4900 return cs;
4901}
4902
4903static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
4904{
4905 *cs++ = MI_SEMAPHORE_WAIT(((0x1c) << 23) | (2)) |
4906 MI_SEMAPHORE_GLOBAL_GTT(1<<22) |
4907 MI_SEMAPHORE_POLL(1 << 15) |
4908 MI_SEMAPHORE_SAD_EQ_SDD(4 << 12);
4909 *cs++ = 0;
4910 *cs++ = intel_hws_preempt_address(request->engine);
4911 *cs++ = 0;
4912
4913 return cs;
4914}
4915
4916static __always_inline__attribute__((__always_inline__)) u32*
4917gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
4918{
4919 *cs++ = MI_USER_INTERRUPT(((0x02) << 23) | (0));
4920
4921 *cs++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_ENABLE(1<<0);
4922 if (intel_engine_has_semaphores(request->engine))
4923 cs = emit_preempt_busywait(request, cs);
4924
4925 request->tail = intel_ring_offset(request, cs);
4926 assert_ring_tail_valid(request->ring, request->tail);
4927
4928 return gen8_emit_wa_tail(request, cs);
4929}
4930
4931static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
4932{
4933 return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
4934}
4935
4936static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
4937{
4938 return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4939}
4940
4941static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4942{
4943 cs = gen8_emit_pipe_control(cs,
4944 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12) |
4945 PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0) |
4946 PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5),
4947 0);
4948
4949 /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4950 cs = gen8_emit_ggtt_write_rcs(cs,
4951 request->fence.seqno,
4952 hwsp_offset(request),
4953 PIPE_CONTROL_FLUSH_ENABLE(1<<7) |
4954 PIPE_CONTROL_CS_STALL(1<<20));
4955
4956 return gen8_emit_fini_breadcrumb_tail(request, cs);
4957}
4958
4959static u32 *
4960gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
4961{
4962 cs = gen8_emit_ggtt_write_rcs(cs,
4963 request->fence.seqno,
4964 hwsp_offset(request),
4965 PIPE_CONTROL_CS_STALL(1<<20) |
4966 PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28) |
4967 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12) |
4968 PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0) |
4969 PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5) |
4970 PIPE_CONTROL_FLUSH_ENABLE(1<<7));
4971
4972 return gen8_emit_fini_breadcrumb_tail(request, cs);
4973}
4974
4975/*
4976 * Note that the CS instruction pre-parser will not stall on the breadcrumb
4977 * flush and will continue pre-fetching the instructions after it before the
4978 * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4979 * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4980 * of the next request before the memory has been flushed, we're guaranteed that
4981 * we won't access the batch itself too early.
4982 * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4983 * so, if the current request is modifying an instruction in the next request on
4984 * the same intel_context, we might pre-fetch and then execute the pre-update
4985 * instruction. To avoid this, the users of self-modifying code should either
4986 * disable the parser around the code emitting the memory writes, via a new flag
4987 * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4988 * the in-kernel use-cases we've opted to use a separate context, see
4989 * reloc_gpu() as an example.
4990 * All the above applies only to the instructions themselves. Non-inline data
4991 * used by the instructions is not pre-fetched.
4992 */
4993
4994static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
4995{
4996 *cs++ = MI_SEMAPHORE_WAIT_TOKEN(((0x1c) << 23) | (3)) |
4997 MI_SEMAPHORE_GLOBAL_GTT(1<<22) |
4998 MI_SEMAPHORE_POLL(1 << 15) |
4999 MI_SEMAPHORE_SAD_EQ_SDD(4 << 12);
5000 *cs++ = 0;
5001 *cs++ = intel_hws_preempt_address(request->engine);
5002 *cs++ = 0;
5003 *cs++ = 0;
5004 *cs++ = MI_NOOP(((0) << 23) | (0));
5005
5006 return cs;
5007}
5008
5009static __always_inline__attribute__((__always_inline__)) u32*
5010gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
5011{
5012 *cs++ = MI_USER_INTERRUPT(((0x02) << 23) | (0));
5013
5014 *cs++ = MI_ARB_ON_OFF(((0x08) << 23) | (0)) | MI_ARB_ENABLE(1<<0);
5015 if (intel_engine_has_semaphores(request->engine))
5016 cs = gen12_emit_preempt_busywait(request, cs);
5017
5018 request->tail = intel_ring_offset(request, cs);
5019 assert_ring_tail_valid(request->ring, request->tail);
5020
5021 return gen8_emit_wa_tail(request, cs);
5022}
5023
5024static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
5025{
5026 /* XXX Stalling flush before seqno write; post-sync not */
5027 cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
5028 return gen12_emit_fini_breadcrumb_tail(rq, cs);
5029}
5030
5031static u32 *
5032gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
5033{
5034 cs = gen12_emit_ggtt_write_rcs(cs,
5035 request->fence.seqno,
5036 hwsp_offset(request),
5037 PIPE_CONTROL0_HDC_PIPELINE_FLUSH((u32)((1UL << (9)) + 0)),
5038 PIPE_CONTROL_CS_STALL(1<<20) |
5039 PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28) |
5040 PIPE_CONTROL_FLUSH_L3(1<<27) |
5041 PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12) |
5042 PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0) |
5043 /* Wa_1409600907:tgl */
5044 PIPE_CONTROL_DEPTH_STALL(1<<13) |
5045 PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5) |
5046 PIPE_CONTROL_FLUSH_ENABLE(1<<7));
5047
5048 return gen12_emit_fini_breadcrumb_tail(request, cs);
5049}
5050
5051static void execlists_park(struct intel_engine_cs *engine)
5052{
5053 cancel_timer(&engine->execlists.timer);
5054 cancel_timer(&engine->execlists.preempt);
5055}
5056
5057void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5058{
5059 engine->submit_request = execlists_submit_request;
5060 engine->schedule = i915_schedule;
5061 engine->execlists.tasklet.func = execlists_submission_tasklet;
5062
5063 engine->reset.prepare = execlists_reset_prepare;
5064 engine->reset.rewind = execlists_reset_rewind;
5065 engine->reset.cancel = execlists_reset_cancel;
5066 engine->reset.finish = execlists_reset_finish;
5067
5068 engine->park = execlists_park;
5069 engine->unpark = NULL((void *)0);
5070
5071 engine->flags |= I915_ENGINE_SUPPORTS_STATS(1UL << (1));
5072 if (!intel_vgpu_active(engine->i915)) {
5073 engine->flags |= I915_ENGINE_HAS_SEMAPHORES(1UL << (3));
5074 if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)((&(engine->i915)->__info)->has_logical_ring_preemption
)
) {
5075 engine->flags |= I915_ENGINE_HAS_PREEMPTION(1UL << (2));
5076 if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)((1) != 0))
5077 engine->flags |= I915_ENGINE_HAS_TIMESLICES(1UL << (4));
5078 }
5079 }
5080
5081 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
5082 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO(1UL << (7));
5083
5084 if (intel_engine_has_preemption(engine))
5085 engine->emit_bb_start = gen8_emit_bb_start;
5086 else
5087 engine->emit_bb_start = gen8_emit_bb_start_noarb;
5088}
5089
5090static void execlists_shutdown(struct intel_engine_cs *engine)
5091{
5092 /* Synchronise with residual timers and any softirq they raise */
5093 del_timer_sync(&engine->execlists.timer)timeout_del_barrier((&engine->execlists.timer));
5094 del_timer_sync(&engine->execlists.preempt)timeout_del_barrier((&engine->execlists.preempt));
5095 tasklet_kill(&engine->execlists.tasklet);
5096}
5097
5098static void execlists_release(struct intel_engine_cs *engine)
5099{
5100 engine->sanitize = NULL((void *)0); /* no longer in control, nothing to sanitize */
5101
5102 execlists_shutdown(engine);
5103
5104 intel_engine_cleanup_common(engine);
5105 lrc_destroy_wa_ctx(engine);
5106}
5107
5108static void
5109logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5110{
5111 /* Default vfuncs which can be overriden by each engine. */
5112
5113 engine->resume = execlists_resume;
5114
5115 engine->cops = &execlists_context_ops;
5116 engine->request_alloc = execlists_request_alloc;
5117
5118 engine->emit_flush = gen8_emit_flush;
5119 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5120 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5121 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12) {
5122 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5123 engine->emit_flush = gen12_emit_flush;
5124 }
5125 engine->set_default_submission = intel_execlists_set_default_submission;
5126
5127 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) < 11) {
5128 engine->irq_enable = gen8_logical_ring_enable_irq;
5129 engine->irq_disable = gen8_logical_ring_disable_irq;
5130 } else {
5131 /*
5132 * TODO: On Gen11 interrupt masks need to be clear
5133 * to allow C6 entry. Keep interrupts enabled at
5134 * and take the hit of generating extra interrupts
5135 * until a more refined solution exists.
5136 */
5137 }
5138}
5139
5140static inline void
5141logical_ring_default_irqs(struct intel_engine_cs *engine)
5142{
5143 unsigned int shift = 0;
5144
5145 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) < 11) {
5146 const u8 irq_shifts[] = {
5147 [RCS0] = GEN8_RCS_IRQ_SHIFT0,
5148 [BCS0] = GEN8_BCS_IRQ_SHIFT16,
5149 [VCS0] = GEN8_VCS0_IRQ_SHIFT0,
5150 [VCS1] = GEN8_VCS1_IRQ_SHIFT16,
5151 [VECS0] = GEN8_VECS_IRQ_SHIFT0,
5152 };
5153
5154 shift = irq_shifts[engine->id];
5155 }
5156
5157 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT(1 << 0) << shift;
5158 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT(1 << 8) << shift;
5159 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT((u32)((1UL << (3)) + 0)) << shift;
5160 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT((u32)((1UL << (11)) + 0)) << shift;
5161}
5162
5163static void rcs_submission_override(struct intel_engine_cs *engine)
5164{
5165 switch (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen)) {
5166 case 12:
5167 engine->emit_flush = gen12_emit_flush_render;
5168 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5169 break;
5170 case 11:
5171 engine->emit_flush = gen11_emit_flush_render;
5172 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5173 break;
5174 default:
5175 engine->emit_flush = gen8_emit_flush_render;
5176 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5177 break;
5178 }
5179}
5180
5181int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5182{
5183 struct intel_engine_execlists * const execlists = &engine->execlists;
5184 struct drm_i915_privateinteldrm_softc *i915 = engine->i915;
5185 struct intel_uncore *uncore = engine->uncore;
5186 u32 base = engine->mmio_base;
5187
5188 tasklet_init(&engine->execlists.tasklet,
5189 execlists_submission_tasklet, (unsigned long)engine);
5190#ifdef __linux__
5191 timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5192 timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5193#else
5194 timeout_set(&engine->execlists.timer, execlists_timeslice,
5195 &engine->execlists.timer);
5196 timeout_set(&engine->execlists.preempt, execlists_preempt,
5197 &engine->execlists.preempt);
5198#endif
5199
5200 logical_ring_default_vfuncs(engine);
5201 logical_ring_default_irqs(engine);
5202
5203 if (engine->class == RENDER_CLASS0)
5204 rcs_submission_override(engine);
5205
5206 if (intel_init_workaround_bb(engine))
5207 /*
5208 * We continue even if we fail to initialize WA batch
5209 * because we only expect rare glitches but nothing
5210 * critical to prevent us from using GPU
5211 */
5212 drm_err(&i915->drm, "WA batch buffer initialization failed\n")printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "WA batch buffer initialization failed\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
5213
5214 if (HAS_LOGICAL_RING_ELSQ(i915)((&(i915)->__info)->has_logical_ring_elsq)) {
5215 execlists->submit_reg = uncore->regs +
5216 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)((const i915_reg_t){ .reg = ((base) + 0x510) }));
5217 execlists->ctrl_reg = uncore->regs +
5218 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)((const i915_reg_t){ .reg = ((base) + 0x550) }));
5219 } else {
5220 execlists->submit_reg = uncore->regs +
5221 i915_mmio_reg_offset(RING_ELSP(base)((const i915_reg_t){ .reg = ((base) + 0x230) }));
5222 }
5223
5224 execlists->csb_status =
5225 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX0x10];
5226
5227 execlists->csb_write =
5228 &engine->status_page.addr[intel_hws_csb_write_index(i915)];
5229
5230 if (INTEL_GEN(i915)((&(i915)->__info)->gen) < 11)
5231 execlists->csb_size = GEN8_CSB_ENTRIES6;
5232 else
5233 execlists->csb_size = GEN11_CSB_ENTRIES12;
5234
5235 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11) {
5236 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT48 - 32);
5237 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT61 - 32);
5238 }
5239
5240 /* Finally, take ownership and responsibility for cleanup! */
5241 engine->sanitize = execlists_sanitize;
5242 engine->release = execlists_release;
5243
5244 return 0;
5245}
5246
5247static void init_common_reg_state(u32 * const regs,
5248 const struct intel_engine_cs *engine,
5249 const struct intel_ring *ring,
5250 bool_Bool inhibit)
5251{
5252 u32 ctl;
5253
5254 ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)({ typeof((1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })
;
5255 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)(({ if (__builtin_constant_p(((1 << 0)))) do { } while (
0); if (__builtin_constant_p(0)) do { } while (0); if (__builtin_constant_p
(((1 << 0))) && __builtin_constant_p(0)) do { }
while (0); ((((1 << 0))) << 16 | (0)); }))
;
5256 if (inhibit)
5257 ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT(1 << 0);
5258 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) < 11)
5259 ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |(({ if (__builtin_constant_p(((1 << 2) | (1 << 1)
))) do { } while (0); if (__builtin_constant_p(0)) do { } while
(0); if (__builtin_constant_p(((1 << 2) | (1 << 1
))) && __builtin_constant_p(0)) do { } while (0); (((
(1 << 2) | (1 << 1))) << 16 | (0)); }))
5260 CTX_CTRL_RS_CTX_ENABLE)(({ if (__builtin_constant_p(((1 << 2) | (1 << 1)
))) do { } while (0); if (__builtin_constant_p(0)) do { } while
(0); if (__builtin_constant_p(((1 << 2) | (1 << 1
))) && __builtin_constant_p(0)) do { } while (0); (((
(1 << 2) | (1 << 1))) << 16 | (0)); }))
;
5261 regs[CTX_CONTEXT_CONTROL(0x02 + 1)] = ctl;
5262
5263 regs[CTX_RING_CTL(0x0a + 1)] = RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) | RING_VALID0x00000001;
5264 regs[CTX_TIMESTAMP(0x22 + 1)] = 0;
5265}
5266
5267static void init_wa_bb_reg_state(u32 * const regs,
5268 const struct intel_engine_cs *engine)
5269{
5270 const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5271
5272 if (wa_ctx->per_ctx.size) {
5273 const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5274
5275 GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1)((void)0);
5276 regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5277 (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
5278 }
5279
5280 if (wa_ctx->indirect_ctx.size) {
5281 lrc_ring_setup_indirect_ctx(regs, engine,
5282 i915_ggtt_offset(wa_ctx->vma) +
5283 wa_ctx->indirect_ctx.offset,
5284 wa_ctx->indirect_ctx.size);
5285 }
5286}
5287
5288static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
5289{
5290 if (i915_vm_is_4lvl(&ppgtt->vm)) {
5291 /* 64b PPGTT (48bit canonical)
5292 * PDP0_DESCRIPTOR contains the base address to PML4 and
5293 * other PDP Descriptors are ignored.
5294 */
5295 ASSIGN_CTX_PML4(ppgtt, regs)do { u32 *reg_state__ = (regs); const u64 addr__ = (__px_dma(
__builtin_choose_expr( __builtin_types_compatible_p(typeof(ppgtt
->pd), struct drm_i915_gem_object *) || __builtin_types_compatible_p
(typeof(ppgtt->pd), const struct drm_i915_gem_object *), (
{ struct drm_i915_gem_object * __x = (struct drm_i915_gem_object
*)(ppgtt->pd); __x; }), __builtin_choose_expr( __builtin_types_compatible_p
(typeof(ppgtt->pd), struct i915_page_table *) || __builtin_types_compatible_p
(typeof(ppgtt->pd), const struct i915_page_table *), ({ struct
i915_page_table * __x = (struct i915_page_table *)(ppgtt->
pd); __x->base; }), __builtin_choose_expr( __builtin_types_compatible_p
(typeof(ppgtt->pd), struct i915_page_directory *) || __builtin_types_compatible_p
(typeof(ppgtt->pd), const struct i915_page_directory *), (
{ struct i915_page_directory * __x = (struct i915_page_directory
*)(ppgtt->pd); __x->pt.base; }), (void)0))))); (reg_state__
)[(0x30 + 1)] = ((u32)(((addr__) >> 16) >> 16)); (
reg_state__)[(0x32 + 1)] = ((u32)(addr__)); } while (0)
;
5296 } else {
5297 ASSIGN_CTX_PDP(ppgtt, regs, 3)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr
((ppgtt), (3)); (reg_state__)[(0x24 + 1)] = ((u32)(((addr__) >>
16) >> 16)); (reg_state__)[(0x26 + 1)] = ((u32)(addr__
)); } while (0)
;
5298 ASSIGN_CTX_PDP(ppgtt, regs, 2)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr
((ppgtt), (2)); (reg_state__)[(0x28 + 1)] = ((u32)(((addr__) >>
16) >> 16)); (reg_state__)[(0x2a + 1)] = ((u32)(addr__
)); } while (0)
;
5299 ASSIGN_CTX_PDP(ppgtt, regs, 1)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr
((ppgtt), (1)); (reg_state__)[(0x2c + 1)] = ((u32)(((addr__) >>
16) >> 16)); (reg_state__)[(0x2e + 1)] = ((u32)(addr__
)); } while (0)
;
5300 ASSIGN_CTX_PDP(ppgtt, regs, 0)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr
((ppgtt), (0)); (reg_state__)[(0x30 + 1)] = ((u32)(((addr__) >>
16) >> 16)); (reg_state__)[(0x32 + 1)] = ((u32)(addr__
)); } while (0)
;
5301 }
5302}
5303
5304static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
5305{
5306 if (i915_is_ggtt(vm)((vm)->is_ggtt))
5307 return i915_vm_to_ggtt(vm)->alias;
5308 else
5309 return i915_vm_to_ppgtt(vm);
5310}
5311
5312static void execlists_init_reg_state(u32 *regs,
5313 const struct intel_context *ce,
5314 const struct intel_engine_cs *engine,
5315 const struct intel_ring *ring,
5316 bool_Bool inhibit)
5317{
5318 /*
5319 * A context is actually a big batch buffer with several
5320 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5321 * values we are setting here are only for the first context restore:
5322 * on a subsequent save, the GPU will recreate this batchbuffer with new
5323 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
5324 * we are not initializing here).
5325 *
5326 * Must keep consistent with virtual_update_register_offsets().
5327 */
5328 set_offsets(regs, reg_offsets(engine), engine, inhibit);
5329
5330 init_common_reg_state(regs, engine, ring, inhibit);
5331 init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5332
5333 init_wa_bb_reg_state(regs, engine);
5334
5335 __reset_stop_ring(regs, engine);
5336}
5337
5338static int
5339populate_lr_context(struct intel_context *ce,
5340 struct drm_i915_gem_object *ctx_obj,
5341 struct intel_engine_cs *engine,
5342 struct intel_ring *ring)
5343{
5344 bool_Bool inhibit = true1;
5345 void *vaddr;
5346
5347 vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5348 if (IS_ERR(vaddr)) {
5349 drm_dbg(&engine->i915->drm, "Could not map object pages!\n")drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "Could not map object pages!\n")
;
5350 return PTR_ERR(vaddr);
5351 }
5352
5353 set_redzone(vaddr, engine);
5354
5355 if (engine->default_state) {
5356#ifdef __linux__
5357 shmem_read(engine->default_state, 0,
5358 vaddr, engine->context_size);
5359#else
5360 uao_read(engine->default_state, 0,
5361 vaddr, engine->context_size);
5362#endif
5363 __set_bit(CONTEXT_VALID_BIT2, &ce->flags);
5364 inhibit = false0;
5365 }
5366
5367 /* Clear the ppHWSP (inc. per-context counters) */
5368 memset(vaddr, 0, PAGE_SIZE)__builtin_memset((vaddr), (0), ((1 << 12)));
5369
5370 /*
5371 * The second page of the context object contains some registers which
5372 * must be set up prior to the first execution.
5373 */
5374 execlists_init_reg_state(vaddr + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)),
5375 ce, engine, ring, inhibit);
5376
5377 __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5378 i915_gem_object_unpin_map(ctx_obj);
5379 return 0;
5380}
5381
5382static struct intel_timeline *pinned_timeline(struct intel_context *ce)
5383{
5384 struct intel_timeline *tl = fetch_and_zero(&ce->timeline)({ typeof(*&ce->timeline) __T = *(&ce->timeline
); *(&ce->timeline) = (typeof(*&ce->timeline))0
; __T; })
;
5385
5386 return intel_timeline_create_from_engine(ce->engine,
5387 page_unmask_bits(tl)((unsigned long)(tl) & ((1UL << (12)) - 1)));
5388}
5389
5390static int __execlists_context_alloc(struct intel_context *ce,
5391 struct intel_engine_cs *engine)
5392{
5393 struct drm_i915_gem_object *ctx_obj;
5394 struct intel_ring *ring;
5395 struct i915_vma *vma;
5396 u32 context_size;
5397 int ret;
5398
5399 GEM_BUG_ON(ce->state)((void)0);
5400 context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE)((((engine->context_size) + (((1ULL << (12))) - 1)) /
((1ULL << (12)))) * ((1ULL << (12))))
;
5401
5402 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
5403 context_size += I915_GTT_PAGE_SIZE(1ULL << (12)); /* for redzone */
5404
5405 if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) == 12) {
5406 ce->wa_bb_page = context_size / PAGE_SIZE(1 << 12);
5407 context_size += PAGE_SIZE(1 << 12);
5408 }
5409
5410 ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5411 if (IS_ERR(ctx_obj))
5412 return PTR_ERR(ctx_obj);
5413
5414 vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL((void *)0));
5415 if (IS_ERR(vma)) {
5416 ret = PTR_ERR(vma);
5417 goto error_deref_obj;
5418 }
5419
5420 if (!page_mask_bits(ce->timeline)({ unsigned long __v = (unsigned long)(ce->timeline); (typeof
(ce->timeline))(__v & -(1UL << (12))); })
) {
5421 struct intel_timeline *tl;
5422
5423 /*
5424 * Use the static global HWSP for the kernel context, and
5425 * a dynamically allocated cacheline for everyone else.
5426 */
5427 if (unlikely(ce->timeline)__builtin_expect(!!(ce->timeline), 0))
5428 tl = pinned_timeline(ce);
5429 else
5430 tl = intel_timeline_create(engine->gt);
5431 if (IS_ERR(tl)) {
5432 ret = PTR_ERR(tl);
5433 goto error_deref_obj;
5434 }
5435
5436 ce->timeline = tl;
5437 }
5438
5439 ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5440 if (IS_ERR(ring)) {
5441 ret = PTR_ERR(ring);
5442 goto error_deref_obj;
5443 }
5444
5445 ret = populate_lr_context(ce, ctx_obj, engine, ring);
5446 if (ret) {
5447 drm_dbg(&engine->i915->drm,drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "Failed to populate LRC: %d\n", ret)
5448 "Failed to populate LRC: %d\n", ret)drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER
, "Failed to populate LRC: %d\n", ret)
;
5449 goto error_ring_free;
5450 }
5451
5452 ce->ring = ring;
5453 ce->state = vma;
5454
5455 return 0;
5456
5457error_ring_free:
5458 intel_ring_put(ring);
5459error_deref_obj:
5460 i915_gem_object_put(ctx_obj);
5461 return ret;
5462}
5463
5464static struct list_head *virtual_queue(struct virtual_engine *ve)
5465{
5466 return &ve->base.execlists.default_priolist.requests[0];
5467}
5468
5469static void rcu_virtual_context_destroy(struct work_struct *wrk)
5470{
5471 struct virtual_engine *ve =
5472 container_of(wrk, typeof(*ve), rcu.work)({ const __typeof( ((typeof(*ve) *)0)->rcu.work ) *__mptr =
(wrk); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(
typeof(*ve), rcu.work) );})
;
5473 unsigned int n;
5474
5475 GEM_BUG_ON(ve->context.inflight)((void)0);
5476
5477 /* Preempt-to-busy may leave a stale request behind. */
5478 if (unlikely(ve->request)__builtin_expect(!!(ve->request), 0)) {
5479 struct i915_request *old;
5480
5481 spin_lock_irq(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
5482
5483 old = fetch_and_zero(&ve->request)({ typeof(*&ve->request) __T = *(&ve->request);
*(&ve->request) = (typeof(*&ve->request))0; __T
; })
;
5484 if (old) {
5485 GEM_BUG_ON(!i915_request_completed(old))((void)0);
5486 __i915_request_submit(old);
5487 i915_request_put(old);
5488 }
5489
5490 spin_unlock_irq(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
5491 }
5492
5493 /*
5494 * Flush the tasklet in case it is still running on another core.
5495 *
5496 * This needs to be done before we remove ourselves from the siblings'
5497 * rbtrees as in the case it is running in parallel, it may reinsert
5498 * the rb_node into a sibling.
5499 */
5500 tasklet_kill(&ve->base.execlists.tasklet);
5501
5502 /* Decouple ourselves from the siblings, no more access allowed. */
5503 for (n = 0; n < ve->num_siblings; n++) {
5504 struct intel_engine_cs *sibling = ve->siblings[n];
5505 struct rb_node *node = &ve->nodes[sibling->id].rb;
5506
5507 if (RB_EMPTY_NODE(node)((node)->__entry.rbe_parent == node))
5508 continue;
5509
5510 spin_lock_irq(&sibling->active.lock)mtx_enter(&sibling->active.lock);
5511
5512 /* Detachment is lazily performed in the execlists tasklet */
5513 if (!RB_EMPTY_NODE(node)((node)->__entry.rbe_parent == node))
5514 rb_erase_cached(node, &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (node))
;
5515
5516 spin_unlock_irq(&sibling->active.lock)mtx_leave(&sibling->active.lock);
5517 }
5518 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet))((void)0);
5519 GEM_BUG_ON(!list_empty(virtual_queue(ve)))((void)0);
5520
5521 if (ve->context.state)
5522 __execlists_context_fini(&ve->context);
5523 intel_context_fini(&ve->context);
5524
5525 intel_breadcrumbs_free(ve->base.breadcrumbs);
5526 intel_engine_free_request_pool(&ve->base);
5527
5528 kfree(ve->bonds);
5529 kfree(ve);
5530}
5531
5532static void virtual_context_destroy(struct kref *kref)
5533{
5534 struct virtual_engine *ve =
5535 container_of(kref, typeof(*ve), context.ref)({ const __typeof( ((typeof(*ve) *)0)->context.ref ) *__mptr
= (kref); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), context.ref) );})
;
5536
5537 GEM_BUG_ON(!list_empty(&ve->context.signals))((void)0);
5538
5539 /*
5540 * When destroying the virtual engine, we have to be aware that
5541 * it may still be in use from an hardirq/softirq context causing
5542 * the resubmission of a completed request (background completion
5543 * due to preempt-to-busy). Before we can free the engine, we need
5544 * to flush the submission code and tasklets that are still potentially
5545 * accessing the engine. Flushing the tasklets requires process context,
5546 * and since we can guard the resubmit onto the engine with an RCU read
5547 * lock, we can delegate the free of the engine to an RCU worker.
5548 */
5549 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5550 queue_rcu_work(system_wq, &ve->rcu);
5551}
5552
5553static void virtual_engine_initial_hint(struct virtual_engine *ve)
5554{
5555 int swp;
5556
5557 /*
5558 * Pick a random sibling on starting to help spread the load around.
5559 *
5560 * New contexts are typically created with exactly the same order
5561 * of siblings, and often started in batches. Due to the way we iterate
5562 * the array of sibling when submitting requests, sibling[0] is
5563 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
5564 * randomised across the system, we also help spread the load by the
5565 * first engine we inspect being different each time.
5566 *
5567 * NB This does not force us to execute on this engine, it will just
5568 * typically be the first we inspect for submission.
5569 */
5570 swp = prandom_u32_max(ve->num_siblings);
5571 if (swp)
5572 swap(ve->siblings[swp], ve->siblings[0])do { __typeof(ve->siblings[swp]) __tmp = (ve->siblings[
swp]); (ve->siblings[swp]) = (ve->siblings[0]); (ve->
siblings[0]) = __tmp; } while(0)
;
5573}
5574
5575static int virtual_context_alloc(struct intel_context *ce)
5576{
5577 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
5578
5579 return __execlists_context_alloc(ce, ve->siblings[0]);
5580}
5581
5582static int virtual_context_pin(struct intel_context *ce, void *vaddr)
5583{
5584 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
5585
5586 /* Note: we must use a real engine class for setting up reg state */
5587 return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5588}
5589
5590static void virtual_context_enter(struct intel_context *ce)
5591{
5592 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
5593 unsigned int n;
5594
5595 for (n = 0; n < ve->num_siblings; n++)
5596 intel_engine_pm_get(ve->siblings[n]);
5597
5598 intel_timeline_enter(ce->timeline);
5599}
5600
5601static void virtual_context_exit(struct intel_context *ce)
5602{
5603 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
5604 unsigned int n;
5605
5606 intel_timeline_exit(ce->timeline);
5607
5608 for (n = 0; n < ve->num_siblings; n++)
5609 intel_engine_pm_put(ve->siblings[n]);
5610}
5611
5612static const struct intel_context_ops virtual_context_ops = {
5613 .alloc = virtual_context_alloc,
5614
5615 .pre_pin = execlists_context_pre_pin,
5616 .pin = virtual_context_pin,
5617 .unpin = execlists_context_unpin,
5618 .post_unpin = execlists_context_post_unpin,
5619
5620 .enter = virtual_context_enter,
5621 .exit = virtual_context_exit,
5622
5623 .destroy = virtual_context_destroy,
5624};
5625
5626static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5627{
5628 struct i915_request *rq;
5629 intel_engine_mask_t mask;
5630
5631 rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
5632 if (!rq)
5633 return 0;
5634
5635 /* The rq is ready for submission; rq->execution_mask is now stable. */
5636 mask = rq->execution_mask;
5637 if (unlikely(!mask)__builtin_expect(!!(!mask), 0)) {
5638 /* Invalid selection, submit to a random engine in error */
5639 i915_request_set_error_once(rq, -ENODEV19);
5640 mask = ve->siblings[0]->mask;
5641 }
5642
5643 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
5644 rq->fence.context, rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
5645 mask, ve->base.execlists.queue_priority_hint)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
;
5646
5647 return mask;
5648}
5649
5650static void virtual_submission_tasklet(unsigned long data)
5651{
5652 struct virtual_engine * const ve = (struct virtual_engine *)data;
5653 const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint)({ typeof(ve->base.execlists.queue_priority_hint) __tmp = *
(volatile typeof(ve->base.execlists.queue_priority_hint) *
)&(ve->base.execlists.queue_priority_hint); membar_datadep_consumer
(); __tmp; })
;
5654 intel_engine_mask_t mask;
5655 unsigned int n;
5656
5657 rcu_read_lock();
5658 mask = virtual_submission_mask(ve);
5659 rcu_read_unlock();
5660 if (unlikely(!mask)__builtin_expect(!!(!mask), 0))
5661 return;
5662
5663 local_irq_disable()intr_disable();
5664 for (n = 0; n < ve->num_siblings; n++) {
5665 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n])({ typeof(ve->siblings[n]) __tmp = *(volatile typeof(ve->
siblings[n]) *)&(ve->siblings[n]); membar_datadep_consumer
(); __tmp; })
;
5666 struct ve_node * const node = &ve->nodes[sibling->id];
5667 struct rb_node **parent, *rb;
5668 bool_Bool first;
5669
5670 if (!READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
)
5671 break; /* already handled by a sibling's tasklet */
5672
5673 if (unlikely(!(mask & sibling->mask))__builtin_expect(!!(!(mask & sibling->mask)), 0)) {
5674 if (!RB_EMPTY_NODE(&node->rb)((&node->rb)->__entry.rbe_parent == &node->rb
)
) {
5675 spin_lock(&sibling->active.lock)mtx_enter(&sibling->active.lock);
5676 rb_erase_cached(&node->rb,linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
5677 &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
;
5678 RB_CLEAR_NODE(&node->rb)(((&node->rb))->__entry.rbe_parent = (&node->
rb))
;
5679 spin_unlock(&sibling->active.lock)mtx_leave(&sibling->active.lock);
5680 }
5681 continue;
5682 }
5683
5684 spin_lock(&sibling->active.lock)mtx_enter(&sibling->active.lock);
5685
5686 if (!RB_EMPTY_NODE(&node->rb)((&node->rb)->__entry.rbe_parent == &node->rb
)
) {
5687 /*
5688 * Cheat and avoid rebalancing the tree if we can
5689 * reuse this node in situ.
5690 */
5691 first = rb_first_cached(&sibling->execlists.virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), -1)
==
5692 &node->rb;
5693 if (prio == node->prio || (prio > node->prio && first))
5694 goto submit_engine;
5695
5696 rb_erase_cached(&node->rb, &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
;
5697 }
5698
5699 rb = NULL((void *)0);
5700 first = true1;
5701 parent = &sibling->execlists.virtual.rb_root.rb_node;
5702 while (*parent) {
5703 struct ve_node *other;
5704
5705 rb = *parent;
5706 other = rb_entry(rb, typeof(*other), rb)({ const __typeof( ((typeof(*other) *)0)->rb ) *__mptr = (
rb); (typeof(*other) *)( (char *)__mptr - __builtin_offsetof(
typeof(*other), rb) );})
;
5707 if (prio > other->prio) {
5708 parent = &rb->rb_left__entry.rbe_left;
5709 } else {
5710 parent = &rb->rb_right__entry.rbe_right;
5711 first = false0;
5712 }
5713 }
5714
5715 rb_link_node(&node->rb, rb, parent);
5716 rb_insert_color_cached(&node->rb,linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
5717 &sibling->execlists.virtual,linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
5718 first)linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
;
5719
5720submit_engine:
5721 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb))((void)0);
5722 node->prio = prio;
5723 if (first && prio > sibling->execlists.queue_priority_hint)
5724 tasklet_hi_schedule(&sibling->execlists.tasklet);
5725
5726 spin_unlock(&sibling->active.lock)mtx_leave(&sibling->active.lock);
5727 }
5728 local_irq_enable()intr_enable();
5729}
5730
5731static void virtual_submit_request(struct i915_request *rq)
5732{
5733 struct virtual_engine *ve = to_virtual_engine(rq->engine);
5734 struct i915_request *old;
5735 unsigned long flags;
5736
5737 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
5738 rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
5739 rq->fence.seqno)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
;
5740
5741 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request)((void)0);
5742
5743 spin_lock_irqsave(&ve->base.active.lock, flags)do { flags = 0; mtx_enter(&ve->base.active.lock); } while
(0)
;
5744
5745 old = ve->request;
5746 if (old) { /* background completion event from preempt-to-busy */
5747 GEM_BUG_ON(!i915_request_completed(old))((void)0);
5748 __i915_request_submit(old);
5749 i915_request_put(old);
5750 }
5751
5752 if (i915_request_completed(rq)) {
5753 __i915_request_submit(rq);
5754
5755 ve->base.execlists.queue_priority_hint = INT_MIN(-0x7fffffff-1);
5756 ve->request = NULL((void *)0);
5757 } else {
5758 ve->base.execlists.queue_priority_hint = rq_prio(rq);
5759 ve->request = i915_request_get(rq);
5760
5761 GEM_BUG_ON(!list_empty(virtual_queue(ve)))((void)0);
5762 list_move_tail(&rq->sched.link, virtual_queue(ve));
5763
5764 tasklet_hi_schedule(&ve->base.execlists.tasklet);
5765 }
5766
5767 spin_unlock_irqrestore(&ve->base.active.lock, flags)do { (void)(flags); mtx_leave(&ve->base.active.lock); }
while (0)
;
5768}
5769
5770static struct ve_bond *
5771virtual_find_bond(struct virtual_engine *ve,
5772 const struct intel_engine_cs *master)
5773{
5774 int i;
5775
5776 for (i = 0; i < ve->num_bonds; i++) {
5777 if (ve->bonds[i].master == master)
5778 return &ve->bonds[i];
5779 }
5780
5781 return NULL((void *)0);
5782}
5783
5784static void
5785virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
5786{
5787 struct virtual_engine *ve = to_virtual_engine(rq->engine);
5788 intel_engine_mask_t allowed, exec;
5789 struct ve_bond *bond;
5790
5791 allowed = ~to_request(signal)->engine->mask;
5792
5793 bond = virtual_find_bond(ve, to_request(signal)->engine);
5794 if (bond)
5795 allowed &= bond->sibling_mask;
5796
5797 /* Restrict the bonded request to run on only the available engines */
5798 exec = READ_ONCE(rq->execution_mask)({ typeof(rq->execution_mask) __tmp = *(volatile typeof(rq
->execution_mask) *)&(rq->execution_mask); membar_datadep_consumer
(); __tmp; })
;
5799 while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)({ __typeof(&rq->execution_mask) __op = (__typeof((&
rq->execution_mask)))(&exec); __typeof(*(&rq->execution_mask
)) __o = *__op; __typeof(*(&rq->execution_mask)) __p =
__sync_val_compare_and_swap((&rq->execution_mask), (__o
), (exec & allowed)); if (__p != __o) *__op = __p; (__p ==
__o); })
)
5800 ;
5801
5802 /* Prevent the master from being re-run on the bonded engines */
5803 to_request(signal)->execution_mask &= ~allowed;
5804}
5805
5806struct intel_context *
5807intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5808 unsigned int count)
5809{
5810 struct virtual_engine *ve;
5811 unsigned int n;
5812 int err;
5813
5814 if (count == 0)
5815 return ERR_PTR(-EINVAL22);
5816
5817 if (count == 1)
5818 return intel_context_create(siblings[0]);
5819
5820 ve = kzalloc(struct_size(ve, siblings, count)(sizeof(*(ve)) + ((count) * (sizeof(*(ve)->siblings)))), GFP_KERNEL(0x0001 | 0x0004));
5821 if (!ve)
5822 return ERR_PTR(-ENOMEM12);
5823
5824 ve->base.i915 = siblings[0]->i915;
5825 ve->base.gt = siblings[0]->gt;
5826 ve->base.uncore = siblings[0]->uncore;
5827 ve->base.id = -1;
5828
5829 ve->base.class = OTHER_CLASS4;
5830 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5831 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
5832 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
5833
5834 /*
5835 * The decision on whether to submit a request using semaphores
5836 * depends on the saturated state of the engine. We only compute
5837 * this during HW submission of the request, and we need for this
5838 * state to be globally applied to all requests being submitted
5839 * to this engine. Virtual engines encompass more than one physical
5840 * engine and so we cannot accurately tell in advance if one of those
5841 * engines is already saturated and so cannot afford to use a semaphore
5842 * and be pessimized in priority for doing so -- if we are the only
5843 * context using semaphores after all other clients have stopped, we
5844 * will be starved on the saturated system. Such a global switch for
5845 * semaphores is less than ideal, but alas is the current compromise.
5846 */
5847 ve->base.saturated = ALL_ENGINES((intel_engine_mask_t)~0ul);
5848
5849 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5850
5851 intel_engine_init_active(&ve->base, ENGINE_VIRTUAL2);
5852 intel_engine_init_execlists(&ve->base);
5853
5854 ve->base.cops = &virtual_context_ops;
5855 ve->base.request_alloc = execlists_request_alloc;
5856
5857 ve->base.schedule = i915_schedule;
5858 ve->base.submit_request = virtual_submit_request;
5859 ve->base.bond_execute = virtual_bond_execute;
5860
5861 INIT_LIST_HEAD(virtual_queue(ve));
5862 ve->base.execlists.queue_priority_hint = INT_MIN(-0x7fffffff-1);
5863 tasklet_init(&ve->base.execlists.tasklet,
5864 virtual_submission_tasklet,
5865 (unsigned long)ve);
5866
5867 intel_context_init(&ve->context, &ve->base);
5868
5869 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL((void *)0));
5870 if (!ve->base.breadcrumbs) {
5871 err = -ENOMEM12;
5872 goto err_put;
5873 }
5874
5875 for (n = 0; n < count; n++) {
5876 struct intel_engine_cs *sibling = siblings[n];
5877
5878 GEM_BUG_ON(!is_power_of_2(sibling->mask))((void)0);
5879 if (sibling->mask & ve->base.mask) {
5880 DRM_DEBUG("duplicate %s entry in load balancer\n",__drm_dbg(DRM_UT_CORE, "duplicate %s entry in load balancer\n"
, sibling->name)
5881 sibling->name)__drm_dbg(DRM_UT_CORE, "duplicate %s entry in load balancer\n"
, sibling->name)
;
5882 err = -EINVAL22;
5883 goto err_put;
5884 }
5885
5886 /*
5887 * The virtual engine implementation is tightly coupled to
5888 * the execlists backend -- we push out request directly
5889 * into a tree inside each physical engine. We could support
5890 * layering if we handle cloning of the requests and
5891 * submitting a copy into each backend.
5892 */
5893 if (sibling->execlists.tasklet.func !=
5894 execlists_submission_tasklet) {
5895 err = -ENODEV19;
5896 goto err_put;
5897 }
5898
5899 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb))((void)0);
5900 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb)(((&ve->nodes[sibling->id].rb))->__entry.rbe_parent
= (&ve->nodes[sibling->id].rb))
;
5901
5902 ve->siblings[ve->num_siblings++] = sibling;
5903 ve->base.mask |= sibling->mask;
5904
5905 /*
5906 * All physical engines must be compatible for their emission
5907 * functions (as we build the instructions during request
5908 * construction and do not alter them before submission
5909 * on the physical engine). We use the engine class as a guide
5910 * here, although that could be refined.
5911 */
5912 if (ve->base.class != OTHER_CLASS4) {
5913 if (ve->base.class != sibling->class) {
5914 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",__drm_dbg(DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n"
, sibling->class, ve->base.class)
5915 sibling->class, ve->base.class)__drm_dbg(DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n"
, sibling->class, ve->base.class)
;
5916 err = -EINVAL22;
5917 goto err_put;
5918 }
5919 continue;
5920 }
5921
5922 ve->base.class = sibling->class;
5923 ve->base.uabi_class = sibling->uabi_class;
5924 snprintf(ve->base.name, sizeof(ve->base.name),
5925 "v%dx%d", ve->base.class, count);
5926 ve->base.context_size = sibling->context_size;
5927
5928 ve->base.emit_bb_start = sibling->emit_bb_start;
5929 ve->base.emit_flush = sibling->emit_flush;
5930 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5931 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5932 ve->base.emit_fini_breadcrumb_dw =
5933 sibling->emit_fini_breadcrumb_dw;
5934
5935 ve->base.flags = sibling->flags;
5936 }
5937
5938 ve->base.flags |= I915_ENGINE_IS_VIRTUAL(1UL << (6));
5939
5940 virtual_engine_initial_hint(ve);
5941 return &ve->context;
5942
5943err_put:
5944 intel_context_put(&ve->context);
5945 return ERR_PTR(err);
5946}
5947
5948struct intel_context *
5949intel_execlists_clone_virtual(struct intel_engine_cs *src)
5950{
5951 struct virtual_engine *se = to_virtual_engine(src);
5952 struct intel_context *dst;
5953
5954 dst = intel_execlists_create_virtual(se->siblings,
5955 se->num_siblings);
5956 if (IS_ERR(dst))
5957 return dst;
5958
5959 if (se->num_bonds) {
5960 struct virtual_engine *de = to_virtual_engine(dst->engine);
5961
5962 de->bonds = kmemdup(se->bonds,
5963 sizeof(*se->bonds) * se->num_bonds,
5964 GFP_KERNEL(0x0001 | 0x0004));
5965 if (!de->bonds) {
5966 intel_context_put(dst);
5967 return ERR_PTR(-ENOMEM12);
5968 }
5969
5970 de->num_bonds = se->num_bonds;
5971 }
5972
5973 return dst;
5974}
5975
5976int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5977 const struct intel_engine_cs *master,
5978 const struct intel_engine_cs *sibling)
5979{
5980 struct virtual_engine *ve = to_virtual_engine(engine);
5981 struct ve_bond *bond;
5982 int n;
5983
5984 /* Sanity check the sibling is part of the virtual engine */
5985 for (n = 0; n < ve->num_siblings; n++)
5986 if (sibling == ve->siblings[n])
5987 break;
5988 if (n == ve->num_siblings)
5989 return -EINVAL22;
5990
5991 bond = virtual_find_bond(ve, master);
5992 if (bond) {
5993 bond->sibling_mask |= sibling->mask;
5994 return 0;
5995 }
5996
5997#ifdef __linux__
5998 bond = krealloc(ve->bonds,
5999 sizeof(*bond) * (ve->num_bonds + 1),
6000 GFP_KERNEL(0x0001 | 0x0004));
6001 if (!bond)
6002 return -ENOMEM12;
6003#else
6004 bond = kmalloc(sizeof(*bond) * (ve->num_bonds + 1),
6005 GFP_KERNEL(0x0001 | 0x0004));
6006 if (!bond)
6007 return -ENOMEM12;
6008
6009 memcpy(bond, ve->bonds, sizeof(*bond) * ve->num_bonds)__builtin_memcpy((bond), (ve->bonds), (sizeof(*bond) * ve->
num_bonds))
;
6010 kfree(ve->bonds);
6011#endif
6012
6013 bond[ve->num_bonds].master = master;
6014 bond[ve->num_bonds].sibling_mask = sibling->mask;
6015
6016 ve->bonds = bond;
6017 ve->num_bonds++;
6018
6019 return 0;
6020}
6021
6022struct intel_engine_cs *
6023intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
6024 unsigned int sibling)
6025{
6026 struct virtual_engine *ve = to_virtual_engine(engine);
6027
6028 if (sibling >= ve->num_siblings)
6029 return NULL((void *)0);
6030
6031 return ve->siblings[sibling];
6032}
6033
6034void intel_execlists_show_requests(struct intel_engine_cs *engine,
6035 struct drm_printer *m,
6036 void (*show_request)(struct drm_printer *m,
6037 struct i915_request *rq,
6038 const char *prefix),
6039 unsigned int max)
6040{
6041 const struct intel_engine_execlists *execlists = &engine->execlists;
6042 struct i915_request *rq, *last;
6043 unsigned long flags;
6044 unsigned int count;
6045 struct rb_node *rb;
6046
6047 spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while
(0)
;
6048
6049 last = NULL((void *)0);
6050 count = 0;
6051 list_for_each_entry(rq, &engine->active.requests, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->active.requests)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&engine->active
.requests); rq = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}))
{
6052 if (count++ < max - 1)
6053 show_request(m, rq, "\t\tE ");
6054 else
6055 last = rq;
6056 }
6057 if (last) {
6058 if (count > max) {
6059 drm_printf(m,
6060 "\t\t...skipping %d executing requests...\n",
6061 count - max);
6062 }
6063 show_request(m, last, "\t\tE ");
6064 }
6065
6066 if (execlists->switch_priority_hint != INT_MIN(-0x7fffffff-1))
6067 drm_printf(m, "\t\tSwitch priority hint: %d\n",
6068 READ_ONCE(execlists->switch_priority_hint)({ typeof(execlists->switch_priority_hint) __tmp = *(volatile
typeof(execlists->switch_priority_hint) *)&(execlists
->switch_priority_hint); membar_datadep_consumer(); __tmp;
})
);
6069 if (execlists->queue_priority_hint != INT_MIN(-0x7fffffff-1))
6070 drm_printf(m, "\t\tQueue priority hint: %d\n",
6071 READ_ONCE(execlists->queue_priority_hint)({ typeof(execlists->queue_priority_hint) __tmp = *(volatile
typeof(execlists->queue_priority_hint) *)&(execlists->
queue_priority_hint); membar_datadep_consumer(); __tmp; })
);
6072
6073 last = NULL((void *)0);
6074 count = 0;
6075 for (rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->queue)->rb_root), -1)
; rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
6076 struct i915_priolist *p = rb_entry(rb, typeof(*p), node)({ const __typeof( ((typeof(*p) *)0)->node ) *__mptr = (rb
); (typeof(*p) *)( (char *)__mptr - __builtin_offsetof(typeof
(*p), node) );})
;
6077 int i;
6078
6079 priolist_for_each_request(rq, p, i)for (i = 0; i < (sizeof(((p)->requests)) / sizeof(((p)->
requests)[0])); i++) for (rq = ({ const __typeof( ((__typeof(
*rq) *)0)->sched.link ) *__mptr = ((&(p)->requests[
i])->next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), sched.link) );}); &rq->sched.link != (
&(p)->requests[i]); rq = ({ const __typeof( ((__typeof
(*rq) *)0)->sched.link ) *__mptr = (rq->sched.link.next
); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rq), sched.link) );}))
{
6080 if (count++ < max - 1)
6081 show_request(m, rq, "\t\tQ ");
6082 else
6083 last = rq;
6084 }
6085 }
6086 if (last) {
6087 if (count > max) {
6088 drm_printf(m,
6089 "\t\t...skipping %d queued requests...\n",
6090 count - max);
6091 }
6092 show_request(m, last, "\t\tQ ");
6093 }
6094
6095 last = NULL((void *)0);
6096 count = 0;
6097 for (rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
; rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
6098 struct virtual_engine *ve =
6099 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
6100 struct i915_request *rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
6101
6102 if (rq) {
6103 if (count++ < max - 1)
6104 show_request(m, rq, "\t\tV ");
6105 else
6106 last = rq;
6107 }
6108 }
6109 if (last) {
6110 if (count > max) {
6111 drm_printf(m,
6112 "\t\t...skipping %d virtual requests...\n",
6113 count - max);
6114 }
6115 show_request(m, last, "\t\tV ");
6116 }
6117
6118 spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); }
while (0)
;
6119}
6120
6121void intel_lr_context_reset(struct intel_engine_cs *engine,
6122 struct intel_context *ce,
6123 u32 head,
6124 bool_Bool scrub)
6125{
6126 GEM_BUG_ON(!intel_context_is_pinned(ce))((void)0);
6127
6128 /*
6129 * We want a simple context + ring to execute the breadcrumb update.
6130 * We cannot rely on the context being intact across the GPU hang,
6131 * so clear it and rebuild just what we need for the breadcrumb.
6132 * All pending requests for this context will be zapped, and any
6133 * future request will be after userspace has had the opportunity
6134 * to recreate its own state.
6135 */
6136 if (scrub)
6137 restore_default_state(ce, engine);
6138
6139 /* Rerun the request; its payload has been neutered (if guilty). */
6140 __execlists_update_reg_state(ce, engine, head);
6141}
6142
6143bool_Bool
6144intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6145{
6146 return engine->set_default_submission ==
6147 intel_execlists_set_default_submission;
6148}
6149
6150#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
6151#include "selftest_lrc.c"
6152#endif