Bug Summary

File:dev/pci/drm/i915/gt/intel_execlists_submission.c
Warning:line 2681, column 33
Dereference of null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name intel_execlists_submission.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2014 Intel Corporation
4 */
5
6/**
7 * DOC: Logical Rings, Logical Ring Contexts and Execlists
8 *
9 * Motivation:
10 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
11 * These expanded contexts enable a number of new abilities, especially
12 * "Execlists" (also implemented in this file).
13 *
14 * One of the main differences with the legacy HW contexts is that logical
15 * ring contexts incorporate many more things to the context's state, like
16 * PDPs or ringbuffer control registers:
17 *
18 * The reason why PDPs are included in the context is straightforward: as
19 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
20 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
21 * instead, the GPU will do it for you on the context switch.
22 *
23 * But, what about the ringbuffer control registers (head, tail, etc..)?
24 * shouldn't we just need a set of those per engine command streamer? This is
25 * where the name "Logical Rings" starts to make sense: by virtualizing the
26 * rings, the engine cs shifts to a new "ring buffer" with every context
27 * switch. When you want to submit a workload to the GPU you: A) choose your
28 * context, B) find its appropriate virtualized ring, C) write commands to it
29 * and then, finally, D) tell the GPU to switch to that context.
30 *
31 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
32 * to a contexts is via a context execution list, ergo "Execlists".
33 *
34 * LRC implementation:
35 * Regarding the creation of contexts, we have:
36 *
37 * - One global default context.
38 * - One local default context for each opened fd.
39 * - One local extra context for each context create ioctl call.
40 *
41 * Now that ringbuffers belong per-context (and not per-engine, like before)
42 * and that contexts are uniquely tied to a given engine (and not reusable,
43 * like before) we need:
44 *
45 * - One ringbuffer per-engine inside each context.
46 * - One backing object per-engine inside each context.
47 *
48 * The global default context starts its life with these new objects fully
49 * allocated and populated. The local default context for each opened fd is
50 * more complex, because we don't know at creation time which engine is going
51 * to use them. To handle this, we have implemented a deferred creation of LR
52 * contexts:
53 *
54 * The local context starts its life as a hollow or blank holder, that only
55 * gets populated for a given engine once we receive an execbuffer. If later
56 * on we receive another execbuffer ioctl for the same context but a different
57 * engine, we allocate/populate a new ringbuffer and context backing object and
58 * so on.
59 *
60 * Finally, regarding local contexts created using the ioctl call: as they are
61 * only allowed with the render ring, we can allocate & populate them right
62 * away (no need to defer anything, at least for now).
63 *
64 * Execlists implementation:
65 * Execlists are the new method by which, on gen8+ hardware, workloads are
66 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
67 * This method works as follows:
68 *
69 * When a request is committed, its commands (the BB start and any leading or
70 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
71 * for the appropriate context. The tail pointer in the hardware context is not
72 * updated at this time, but instead, kept by the driver in the ringbuffer
73 * structure. A structure representing this request is added to a request queue
74 * for the appropriate engine: this structure contains a copy of the context's
75 * tail after the request was written to the ring buffer and a pointer to the
76 * context itself.
77 *
78 * If the engine's request queue was empty before the request was added, the
79 * queue is processed immediately. Otherwise the queue will be processed during
80 * a context switch interrupt. In any case, elements on the queue will get sent
81 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
82 * globally unique 20-bits submission ID.
83 *
84 * When execution of a request completes, the GPU updates the context status
85 * buffer with a context complete event and generates a context switch interrupt.
86 * During the interrupt handling, the driver examines the events in the buffer:
87 * for each context complete event, if the announced ID matches that on the head
88 * of the request queue, then that request is retired and removed from the queue.
89 *
90 * After processing, if any requests were retired and the queue is not empty
91 * then a new execution list can be submitted. The two requests at the front of
92 * the queue are next to be submitted but since a context may not occur twice in
93 * an execution list, if subsequent requests have the same ID as the first then
94 * the two requests must be combined. This is done simply by discarding requests
95 * at the head of the queue until either only one requests is left (in which case
96 * we use a NULL second context) or the first two requests have unique IDs.
97 *
98 * By always executing the first two requests in the queue the driver ensures
99 * that the GPU is kept as busy as possible. In the case where a single context
100 * completes but a second context is still executing, the request for this second
101 * context will be at the head of the queue when we remove the first one. This
102 * request will then be resubmitted along with a new request for a different context,
103 * which will cause the hardware to continue executing the second request and queue
104 * the new request (the GPU detects the condition of a context getting preempted
105 * with the same context and optimizes the context switch flow by not doing
106 * preemption, but just sampling the new tail pointer).
107 *
108 */
109#include <linux/interrupt.h>
110#include <linux/string_helpers.h>
111
112#include "i915_drv.h"
113#include "i915_trace.h"
114#include "i915_vgpu.h"
115#include "gen8_engine_cs.h"
116#include "intel_breadcrumbs.h"
117#include "intel_context.h"
118#include "intel_engine_heartbeat.h"
119#include "intel_engine_pm.h"
120#include "intel_engine_regs.h"
121#include "intel_engine_stats.h"
122#include "intel_execlists_submission.h"
123#include "intel_gt.h"
124#include "intel_gt_irq.h"
125#include "intel_gt_pm.h"
126#include "intel_gt_regs.h"
127#include "intel_gt_requests.h"
128#include "intel_lrc.h"
129#include "intel_lrc_reg.h"
130#include "intel_mocs.h"
131#include "intel_reset.h"
132#include "intel_ring.h"
133#include "intel_workarounds.h"
134#include "shmem_utils.h"
135
136#define RING_EXECLIST_QFULL(1 << 0x2) (1 << 0x2)
137#define RING_EXECLIST1_VALID(1 << 0x3) (1 << 0x3)
138#define RING_EXECLIST0_VALID(1 << 0x4) (1 << 0x4)
139#define RING_EXECLIST_ACTIVE_STATUS(3 << 0xE) (3 << 0xE)
140#define RING_EXECLIST1_ACTIVE(1 << 0x11) (1 << 0x11)
141#define RING_EXECLIST0_ACTIVE(1 << 0x12) (1 << 0x12)
142
143#define GEN8_CTX_STATUS_IDLE_ACTIVE(1 << 0) (1 << 0)
144#define GEN8_CTX_STATUS_PREEMPTED(1 << 1) (1 << 1)
145#define GEN8_CTX_STATUS_ELEMENT_SWITCH(1 << 2) (1 << 2)
146#define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3) (1 << 3)
147#define GEN8_CTX_STATUS_COMPLETE(1 << 4) (1 << 4)
148#define GEN8_CTX_STATUS_LITE_RESTORE(1 << 15) (1 << 15)
149
150#define GEN8_CTX_STATUS_COMPLETED_MASK((1 << 4) | (1 << 1)) \
151 (GEN8_CTX_STATUS_COMPLETE(1 << 4) | GEN8_CTX_STATUS_PREEMPTED(1 << 1))
152
153#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(0x1) (0x1) /* lower csb dword */
154#define GEN12_CTX_SWITCH_DETAIL(csb_dw)((csb_dw) & 0xF) ((csb_dw) & 0xF) /* upper csb dword */
155#define GEN12_CSB_SW_CTX_ID_MASK(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15
)))
GENMASK(25, 15)(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15
)))
156#define GEN12_IDLE_CTX_ID0x7FF 0x7FF
157#define GEN12_CSB_CTX_VALID(csb_dw)(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((csb_dw) & ((((~0UL) >> (64 - (25) - 1))
& ((~0UL) << (15))))) >> (__builtin_ffsll(((
(~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15))
)) - 1))) != 0x7FF)
\
158 (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw)((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((csb_dw) & ((((~0UL) >> (64 - (25) - 1))
& ((~0UL) << (15))))) >> (__builtin_ffsll(((
(~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15))
)) - 1)))
!= GEN12_IDLE_CTX_ID0x7FF)
159
160#define XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(1UL << (1)) BIT(1)(1UL << (1)) /* upper csb dword */
161#define XEHP_CSB_SW_CTX_ID_MASK(((~0UL) >> (64 - (31) - 1)) & ((~0UL) << (10
)))
GENMASK(31, 10)(((~0UL) >> (64 - (31) - 1)) & ((~0UL) << (10
)))
162#define XEHP_IDLE_CTX_ID0xFFFF 0xFFFF
163#define XEHP_CSB_CTX_VALID(csb_dw)(((typeof((((~0UL) >> (64 - (31) - 1)) & ((~0UL) <<
(10)))))(((csb_dw) & ((((~0UL) >> (64 - (31) - 1))
& ((~0UL) << (10))))) >> (__builtin_ffsll(((
(~0UL) >> (64 - (31) - 1)) & ((~0UL) << (10))
)) - 1))) != 0xFFFF)
\
164 (FIELD_GET(XEHP_CSB_SW_CTX_ID_MASK, csb_dw)((typeof((((~0UL) >> (64 - (31) - 1)) & ((~0UL) <<
(10)))))(((csb_dw) & ((((~0UL) >> (64 - (31) - 1))
& ((~0UL) << (10))))) >> (__builtin_ffsll(((
(~0UL) >> (64 - (31) - 1)) & ((~0UL) << (10))
)) - 1)))
!= XEHP_IDLE_CTX_ID0xFFFF)
165
166/* Typical size of the average request (2 pipecontrols and a MI_BB) */
167#define EXECLISTS_REQUEST_SIZE64 64 /* bytes */
168
169struct virtual_engine {
170 struct intel_engine_cs base;
171 struct intel_context context;
172 struct rcu_work rcu;
173
174 /*
175 * We allow only a single request through the virtual engine at a time
176 * (each request in the timeline waits for the completion fence of
177 * the previous before being submitted). By restricting ourselves to
178 * only submitting a single request, each request is placed on to a
179 * physical to maximise load spreading (by virtue of the late greedy
180 * scheduling -- each real engine takes the next available request
181 * upon idling).
182 */
183 struct i915_request *request;
184
185 /*
186 * We keep a rbtree of available virtual engines inside each physical
187 * engine, sorted by priority. Here we preallocate the nodes we need
188 * for the virtual engine, indexed by physical_engine->id.
189 */
190 struct ve_node {
191 struct rb_node rb;
192 int prio;
193 } nodes[I915_NUM_ENGINES];
194
195 /* And finally, which physical engines this virtual engine maps onto. */
196 unsigned int num_siblings;
197 struct intel_engine_cs *siblings[];
198};
199
200static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
201{
202 GEM_BUG_ON(!intel_engine_is_virtual(engine))((void)0);
203 return container_of(engine, struct virtual_engine, base)({ const __typeof( ((struct virtual_engine *)0)->base ) *__mptr
= (engine); (struct virtual_engine *)( (char *)__mptr - __builtin_offsetof
(struct virtual_engine, base) );})
;
204}
205
206static struct intel_context *
207execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
208 unsigned long flags);
209
210static struct i915_request *
211__active_request(const struct intel_timeline * const tl,
212 struct i915_request *rq,
213 int error)
214{
215 struct i915_request *active = rq;
216
217 list_for_each_entry_from_reverse(rq, &tl->requests, link)for (; &rq->link != (&tl->requests); rq = ({ const
__typeof( ((__typeof(*rq) *)0)->link ) *__mptr = (rq->
link.prev); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), link) );}))
{
218 if (__i915_request_is_complete(rq))
219 break;
220
221 if (error) {
222 i915_request_set_error_once(rq, error);
223 __i915_request_skip(rq);
224 }
225 active = rq;
226 }
227
228 return active;
229}
230
231static struct i915_request *
232active_request(const struct intel_timeline * const tl, struct i915_request *rq)
233{
234 return __active_request(tl, rq, 0);
235}
236
237static void ring_set_paused(const struct intel_engine_cs *engine, int state)
238{
239 /*
240 * We inspect HWS_PREEMPT with a semaphore inside
241 * engine->emit_fini_breadcrumb. If the dword is true,
242 * the ring is paused as the semaphore will busywait
243 * until the dword is false.
244 */
245 engine->status_page.addr[I915_GEM_HWS_PREEMPT0x32] = state;
246 if (state)
247 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
248}
249
250static struct i915_priolist *to_priolist(struct rb_node *rb)
251{
252 return rb_entry(rb, struct i915_priolist, node)({ const __typeof( ((struct i915_priolist *)0)->node ) *__mptr
= (rb); (struct i915_priolist *)( (char *)__mptr - __builtin_offsetof
(struct i915_priolist, node) );})
;
253}
254
255static int rq_prio(const struct i915_request *rq)
256{
257 return READ_ONCE(rq->sched.attr.priority)({ typeof(rq->sched.attr.priority) __tmp = *(volatile typeof
(rq->sched.attr.priority) *)&(rq->sched.attr.priority
); membar_datadep_consumer(); __tmp; })
;
258}
259
260static int effective_prio(const struct i915_request *rq)
261{
262 int prio = rq_prio(rq);
263
264 /*
265 * If this request is special and must not be interrupted at any
266 * cost, so be it. Note we are only checking the most recent request
267 * in the context and so may be masking an earlier vip request. It
268 * is hoped that under the conditions where nopreempt is used, this
269 * will not matter (i.e. all requests to that context will be
270 * nopreempt for as long as desired).
271 */
272 if (i915_request_has_nopreempt(rq))
273 prio = I915_PRIORITY_UNPREEMPTABLE0x7fffffff;
274
275 return prio;
276}
277
278static int queue_prio(const struct i915_sched_engine *sched_engine)
279{
280 struct rb_node *rb;
281
282 rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
;
283 if (!rb)
284 return INT_MIN(-0x7fffffff-1);
285
286 return to_priolist(rb)->priority;
287}
288
289static int virtual_prio(const struct intel_engine_execlists *el)
290{
291 struct rb_node *rb = rb_first_cached(&el->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&el->
virtual)->rb_root), -1)
;
292
293 return rb ? rb_entry(rb, struct ve_node, rb)({ const __typeof( ((struct ve_node *)0)->rb ) *__mptr = (
rb); (struct ve_node *)( (char *)__mptr - __builtin_offsetof(
struct ve_node, rb) );})
->prio : INT_MIN(-0x7fffffff-1);
294}
295
296static bool_Bool need_preempt(const struct intel_engine_cs *engine,
297 const struct i915_request *rq)
298{
299 int last_prio;
300
301 if (!intel_engine_has_semaphores(engine))
302 return false0;
303
304 /*
305 * Check if the current priority hint merits a preemption attempt.
306 *
307 * We record the highest value priority we saw during rescheduling
308 * prior to this dequeue, therefore we know that if it is strictly
309 * less than the current tail of ESLP[0], we do not need to force
310 * a preempt-to-idle cycle.
311 *
312 * However, the priority hint is a mere hint that we may need to
313 * preempt. If that hint is stale or we may be trying to preempt
314 * ourselves, ignore the request.
315 *
316 * More naturally we would write
317 * prio >= max(0, last);
318 * except that we wish to prevent triggering preemption at the same
319 * priority level: the task that is running should remain running
320 * to preserve FIFO ordering of dependencies.
321 */
322 last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1)(((effective_prio(rq))>(I915_PRIORITY_NORMAL - 1))?(effective_prio
(rq)):(I915_PRIORITY_NORMAL - 1))
;
323 if (engine->sched_engine->queue_priority_hint <= last_prio)
324 return false0;
325
326 /*
327 * Check against the first request in ELSP[1], it will, thanks to the
328 * power of PI, be the highest priority of that context.
329 */
330 if (!list_is_last(&rq->sched.link, &engine->sched_engine->requests) &&
331 rq_prio(list_next_entry(rq, sched.link)({ const __typeof( ((typeof(*(rq)) *)0)->sched.link ) *__mptr
= (((rq)->sched.link.next)); (typeof(*(rq)) *)( (char *)__mptr
- __builtin_offsetof(typeof(*(rq)), sched.link) );})
) > last_prio)
332 return true1;
333
334 /*
335 * If the inflight context did not trigger the preemption, then maybe
336 * it was the set of queued requests? Pick the highest priority in
337 * the queue (the first active priolist) and see if it deserves to be
338 * running instead of ELSP[0].
339 *
340 * The highest priority request in the queue can not be either
341 * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
342 * context, it's priority would not exceed ELSP[0] aka last_prio.
343 */
344 return max(virtual_prio(&engine->execlists),(((virtual_prio(&engine->execlists))>(queue_prio(engine
->sched_engine)))?(virtual_prio(&engine->execlists)
):(queue_prio(engine->sched_engine)))
345 queue_prio(engine->sched_engine))(((virtual_prio(&engine->execlists))>(queue_prio(engine
->sched_engine)))?(virtual_prio(&engine->execlists)
):(queue_prio(engine->sched_engine)))
> last_prio;
346}
347
348__maybe_unused__attribute__((__unused__)) static bool_Bool
349assert_priority_queue(const struct i915_request *prev,
350 const struct i915_request *next)
351{
352 /*
353 * Without preemption, the prev may refer to the still active element
354 * which we refuse to let go.
355 *
356 * Even with preemption, there are times when we think it is better not
357 * to preempt and leave an ostensibly lower priority request in flight.
358 */
359 if (i915_request_is_active(prev))
360 return true1;
361
362 return rq_prio(prev) >= rq_prio(next);
363}
364
365static struct i915_request *
366__unwind_incomplete_requests(struct intel_engine_cs *engine)
367{
368 struct i915_request *rq, *rn, *active = NULL((void *)0);
369 struct list_head *pl;
370 int prio = I915_PRIORITY_INVALID((-0x7fffffff-1));
371
372 lockdep_assert_held(&engine->sched_engine->lock)do { (void)(&engine->sched_engine->lock); } while(0
)
;
373
374 list_for_each_entry_safe_reverse(rq, rn,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->sched_engine->requests)->
prev); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), sched.link) );}), rn = ({ const __typeof( ((__typeof
(*rq) *)0)->sched.link ) *__mptr = ((rq)->sched.link.prev
); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rq), sched.link) );}); &(rq)->sched.link != (&engine
->sched_engine->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->sched.link ) *__mptr = (rn->sched
.link.prev); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rn), sched.link) );}))
375 &engine->sched_engine->requests,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->sched_engine->requests)->
prev); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), sched.link) );}), rn = ({ const __typeof( ((__typeof
(*rq) *)0)->sched.link ) *__mptr = ((rq)->sched.link.prev
); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rq), sched.link) );}); &(rq)->sched.link != (&engine
->sched_engine->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->sched.link ) *__mptr = (rn->sched
.link.prev); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rn), sched.link) );}))
376 sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->sched_engine->requests)->
prev); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), sched.link) );}), rn = ({ const __typeof( ((__typeof
(*rq) *)0)->sched.link ) *__mptr = ((rq)->sched.link.prev
); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rq), sched.link) );}); &(rq)->sched.link != (&engine
->sched_engine->requests); rq = rn, rn = ({ const __typeof
( ((__typeof(*rn) *)0)->sched.link ) *__mptr = (rn->sched
.link.prev); (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rn), sched.link) );}))
{
377 if (__i915_request_is_complete(rq)) {
378 list_del_init(&rq->sched.link);
379 continue;
380 }
381
382 __i915_request_unsubmit(rq);
383
384 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID)((void)0);
385 if (rq_prio(rq) != prio) {
386 prio = rq_prio(rq);
387 pl = i915_sched_lookup_priolist(engine->sched_engine,
388 prio);
389 }
390 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine))((void)0);
391
392 list_move(&rq->sched.link, pl);
393 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
394
395 /* Check in case we rollback so far we wrap [size/2] */
396 if (intel_ring_direction(rq->ring,
397 rq->tail,
398 rq->ring->tail + 8) > 0)
399 rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2));
400
401 active = rq;
402 }
403
404 return active;
405}
406
407struct i915_request *
408execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
409{
410 struct intel_engine_cs *engine =
411 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
412
413 return __unwind_incomplete_requests(engine);
414}
415
416static void
417execlists_context_status_change(struct i915_request *rq, unsigned long status)
418{
419 /*
420 * Only used when GVT-g is enabled now. When GVT-g is disabled,
421 * The compiler should eliminate this function as dead-code.
422 */
423 if (!IS_ENABLED(CONFIG_DRM_I915_GVT)0)
424 return;
425
426 STUB()do { printf("%s: stub\n", __func__); } while(0);
427#ifdef notyet
428 atomic_notifier_call_chain(&rq->engine->context_status_notifier,
429 status, rq);
430#endif
431}
432
433static void reset_active(struct i915_request *rq,
434 struct intel_engine_cs *engine)
435{
436 struct intel_context * const ce = rq->context;
437 u32 head;
438
439 /*
440 * The executing context has been cancelled. We want to prevent
441 * further execution along this context and propagate the error on
442 * to anything depending on its results.
443 *
444 * In __i915_request_submit(), we apply the -EIO and remove the
445 * requests' payloads for any banned requests. But first, we must
446 * rewind the context back to the start of the incomplete request so
447 * that we do not jump back into the middle of the batch.
448 *
449 * We preserve the breadcrumbs and semaphores of the incomplete
450 * requests so that inter-timeline dependencies (i.e other timelines)
451 * remain correctly ordered. And we defer to __i915_request_submit()
452 * so that all asynchronous waits are correctly handled.
453 */
454 ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
455 rq->fence.context, rq->fence.seqno)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
456
457 /* On resubmission of the active request, payload will be scrubbed */
458 if (__i915_request_is_complete(rq))
459 head = rq->tail;
460 else
461 head = __active_request(ce->timeline, rq, -EIO5)->head;
462 head = intel_ring_wrap(ce->ring, head);
463
464 /* Scrub the context image to prevent replaying the previous batch */
465 lrc_init_regs(ce, engine, true1);
466
467 /* We've switched away, so this should be a no-op, but intent matters */
468 ce->lrc.lrca = lrc_update_regs(ce, engine, head);
469}
470
471static bool_Bool bad_request(const struct i915_request *rq)
472{
473 return rq->fence.error && i915_request_started(rq);
474}
475
476static struct intel_engine_cs *
477__execlists_schedule_in(struct i915_request *rq)
478{
479 struct intel_engine_cs * const engine = rq->engine;
480 struct intel_context * const ce = rq->context;
481
482 intel_context_get(ce);
483
484 if (unlikely(intel_context_is_closed(ce) &&__builtin_expect(!!(intel_context_is_closed(ce) && !intel_engine_has_heartbeat
(engine)), 0)
485 !intel_engine_has_heartbeat(engine))__builtin_expect(!!(intel_context_is_closed(ce) && !intel_engine_has_heartbeat
(engine)), 0)
)
486 intel_context_set_exiting(ce);
487
488 if (unlikely(!intel_context_is_schedulable(ce) || bad_request(rq))__builtin_expect(!!(!intel_context_is_schedulable(ce) || bad_request
(rq)), 0)
)
489 reset_active(rq, engine);
490
491 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
492 lrc_check_regs(ce, engine, "before");
493
494 if (ce->tag) {
495 /* Use a fixed tag for OA and friends */
496 GEM_BUG_ON(ce->tag <= BITS_PER_LONG)((void)0);
497 ce->lrc.ccid = ce->tag;
498 } else if (GRAPHICS_VER_FULL(engine->i915)(((&(engine->i915)->__runtime)->graphics.ip.ver)
<< 8 | ((&(engine->i915)->__runtime)->graphics
.ip.rel))
>= IP_VER(12, 50)((12) << 8 | (50))) {
499 /* We don't need a strict matching tag, just different values */
500 unsigned int tag = ffs(READ_ONCE(engine->context_tag)({ typeof(engine->context_tag) __tmp = *(volatile typeof(engine
->context_tag) *)&(engine->context_tag); membar_datadep_consumer
(); __tmp; })
);
501
502 GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG)((void)0);
503 clear_bit(tag - 1, &engine->context_tag);
504 ce->lrc.ccid = tag << (XEHP_SW_CTX_ID_SHIFT39 - 32);
505
506 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID)extern char _ctassert[(!(64 > ((1 << 11) - 1))) ? 1 :
-1 ] __attribute__((__unused__))
;
507
508 } else {
509 /* We don't need a strict matching tag, just different values */
510 unsigned int tag = __ffs(engine->context_tag)__builtin_ctzl(engine->context_tag);
511
512 GEM_BUG_ON(tag >= BITS_PER_LONG)((void)0);
513 __clear_bit(tag, &engine->context_tag);
514 ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT37 - 32);
515
516 BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID)extern char _ctassert[(!(64 > ((1 << 11) - 1))) ? 1 :
-1 ] __attribute__((__unused__))
;
517 }
518
519 ce->lrc.ccid |= engine->execlists.ccid;
520
521 __intel_gt_pm_get(engine->gt);
522 if (engine->fw_domain && !engine->fw_active++)
523 intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
524 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
525 intel_engine_context_in(engine);
526
527 CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid)do { const struct intel_context *ce__ = (ce); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (ce__->
engine); do { } while (0); } while (0); } while (0)
;
528
529 return engine;
530}
531
532static void execlists_schedule_in(struct i915_request *rq, int idx)
533{
534 struct intel_context * const ce = rq->context;
535 struct intel_engine_cs *old;
536
537 GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine))((void)0);
538 trace_i915_request_in(rq, idx);
539
540 old = ce->inflight;
541 if (!old)
542 old = __execlists_schedule_in(rq);
543 WRITE_ONCE(ce->inflight, ptr_inc(old))({ typeof(ce->inflight) __tmp = (({ unsigned long __v = (unsigned
long)(old); (typeof(old))(__v + 1); })); *(volatile typeof(ce
->inflight) *)&(ce->inflight) = __tmp; __tmp; })
;
544
545 GEM_BUG_ON(intel_context_inflight(ce) != rq->engine)((void)0);
546}
547
548static void
549resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve)
550{
551 struct intel_engine_cs *engine = rq->engine;
552
553 spin_lock_irq(&engine->sched_engine->lock)mtx_enter(&engine->sched_engine->lock);
554
555 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
556 WRITE_ONCE(rq->engine, &ve->base)({ typeof(rq->engine) __tmp = (&ve->base); *(volatile
typeof(rq->engine) *)&(rq->engine) = __tmp; __tmp;
})
;
557 ve->base.submit_request(rq);
558
559 spin_unlock_irq(&engine->sched_engine->lock)mtx_leave(&engine->sched_engine->lock);
560}
561
562static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
563{
564 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
565 struct intel_engine_cs *engine = rq->engine;
566
567 /*
568 * After this point, the rq may be transferred to a new sibling, so
569 * before we clear ce->inflight make sure that the context has been
570 * removed from the b->signalers and furthermore we need to make sure
571 * that the concurrent iterator in signal_irq_work is no longer
572 * following ce->signal_link.
573 */
574 if (!list_empty(&ce->signals))
575 intel_context_remove_breadcrumbs(ce, engine->breadcrumbs);
576
577 /*
578 * This engine is now too busy to run this virtual request, so
579 * see if we can find an alternative engine for it to execute on.
580 * Once a request has become bonded to this engine, we treat it the
581 * same as other native request.
582 */
583 if (i915_request_in_priority_queue(rq) &&
584 rq->execution_mask != engine->mask)
585 resubmit_virtual_request(rq, ve);
586
587 if (READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
)
588 tasklet_hi_schedule(&ve->base.sched_engine->tasklet);
589}
590
591static void __execlists_schedule_out(struct i915_request * const rq,
592 struct intel_context * const ce)
593{
594 struct intel_engine_cs * const engine = rq->engine;
595 unsigned int ccid;
596
597 /*
598 * NB process_csb() is not under the engine->sched_engine->lock and hence
599 * schedule_out can race with schedule_in meaning that we should
600 * refrain from doing non-trivial work here.
601 */
602
603 CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid)do { const struct intel_context *ce__ = (ce); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (ce__->
engine); do { } while (0); } while (0); } while (0)
;
604 GEM_BUG_ON(ce->inflight != engine)((void)0);
605
606 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
607 lrc_check_regs(ce, engine, "after");
608
609 /*
610 * If we have just completed this context, the engine may now be
611 * idle and we want to re-enter powersaving.
612 */
613 if (intel_timeline_is_last(ce->timeline, rq) &&
614 __i915_request_is_complete(rq))
615 intel_engine_add_retire(engine, ce->timeline);
616
617 ccid = ce->lrc.ccid;
618 if (GRAPHICS_VER_FULL(engine->i915)(((&(engine->i915)->__runtime)->graphics.ip.ver)
<< 8 | ((&(engine->i915)->__runtime)->graphics
.ip.rel))
>= IP_VER(12, 50)((12) << 8 | (50))) {
619 ccid >>= XEHP_SW_CTX_ID_SHIFT39 - 32;
620 ccid &= XEHP_MAX_CONTEXT_HW_ID0xFFFF;
621 } else {
622 ccid >>= GEN11_SW_CTX_ID_SHIFT37 - 32;
623 ccid &= GEN12_MAX_CONTEXT_HW_ID((1 << 11) - 1);
624 }
625
626 if (ccid < BITS_PER_LONG64) {
627 GEM_BUG_ON(ccid == 0)((void)0);
628 GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag))((void)0);
629 __set_bit(ccid - 1, &engine->context_tag);
630 }
631 intel_engine_context_out(engine);
632 execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
633 if (engine->fw_domain && !--engine->fw_active)
634 intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
635 intel_gt_pm_put_async(engine->gt);
636
637 /*
638 * If this is part of a virtual engine, its next request may
639 * have been blocked waiting for access to the active context.
640 * We have to kick all the siblings again in case we need to
641 * switch (e.g. the next request is not runnable on this
642 * engine). Hopefully, we will already have submitted the next
643 * request before the tasklet runs and do not need to rebuild
644 * each virtual tree and kick everyone again.
645 */
646 if (ce->engine != engine)
647 kick_siblings(rq, ce);
648
649 WRITE_ONCE(ce->inflight, NULL)({ typeof(ce->inflight) __tmp = (((void *)0)); *(volatile typeof
(ce->inflight) *)&(ce->inflight) = __tmp; __tmp; })
;
650 intel_context_put(ce);
651}
652
653static inline void execlists_schedule_out(struct i915_request *rq)
654{
655 struct intel_context * const ce = rq->context;
656
657 trace_i915_request_out(rq);
658
659 GEM_BUG_ON(!ce->inflight)((void)0);
660 ce->inflight = ptr_dec(ce->inflight)({ unsigned long __v = (unsigned long)(ce->inflight); (typeof
(ce->inflight))(__v - 1); })
;
661 if (!__intel_context_inflight_count(ce->inflight)((unsigned long)(ce->inflight) & ((1UL << (3)) -
1))
)
662 __execlists_schedule_out(rq, ce);
663
664 i915_request_put(rq);
665}
666
667static u32 map_i915_prio_to_lrc_desc_prio(int prio)
668{
669 if (prio > I915_PRIORITY_NORMAL)
670 return GEN12_CTX_PRIORITY_HIGH(((typeof((((~0UL) >> (64 - (10) - 1)) & ((~0UL) <<
(9)))))(2) << (__builtin_ffsll((((~0UL) >> (64 -
(10) - 1)) & ((~0UL) << (9)))) - 1)) & ((((~0UL
) >> (64 - (10) - 1)) & ((~0UL) << (9)))))
;
671 else if (prio < I915_PRIORITY_NORMAL)
672 return GEN12_CTX_PRIORITY_LOW(((typeof((((~0UL) >> (64 - (10) - 1)) & ((~0UL) <<
(9)))))(0) << (__builtin_ffsll((((~0UL) >> (64 -
(10) - 1)) & ((~0UL) << (9)))) - 1)) & ((((~0UL
) >> (64 - (10) - 1)) & ((~0UL) << (9)))))
;
673 else
674 return GEN12_CTX_PRIORITY_NORMAL(((typeof((((~0UL) >> (64 - (10) - 1)) & ((~0UL) <<
(9)))))(1) << (__builtin_ffsll((((~0UL) >> (64 -
(10) - 1)) & ((~0UL) << (9)))) - 1)) & ((((~0UL
) >> (64 - (10) - 1)) & ((~0UL) << (9)))))
;
675}
676
677static u64 execlists_update_context(struct i915_request *rq)
678{
679 struct intel_context *ce = rq->context;
680 u64 desc;
681 u32 tail, prev;
682
683 desc = ce->lrc.desc;
684 if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY(1UL << (10)))
685 desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq));
686
687 /*
688 * WaIdleLiteRestore:bdw,skl
689 *
690 * We should never submit the context with the same RING_TAIL twice
691 * just in case we submit an empty ring, which confuses the HW.
692 *
693 * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
694 * the normal request to be able to always advance the RING_TAIL on
695 * subsequent resubmissions (for lite restore). Should that fail us,
696 * and we try and submit the same tail again, force the context
697 * reload.
698 *
699 * If we need to return to a preempted context, we need to skip the
700 * lite-restore and force it to reload the RING_TAIL. Otherwise, the
701 * HW has a tendency to ignore us rewinding the TAIL to the end of
702 * an earlier request.
703 */
704 GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail)((void)0);
705 prev = rq->ring->tail;
706 tail = intel_ring_set_tail(rq->ring, rq->tail);
707 if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)__builtin_expect(!!(intel_ring_direction(rq->ring, tail, prev
) <= 0), 0)
)
708 desc |= CTX_DESC_FORCE_RESTORE(1ULL << (2));
709 ce->lrc_reg_state[CTX_RING_TAIL(0x06 + 1)] = tail;
710 rq->tail = rq->wa_tail;
711
712 /*
713 * Make sure the context image is complete before we submit it to HW.
714 *
715 * Ostensibly, writes (including the WCB) should be flushed prior to
716 * an uncached write such as our mmio register access, the empirical
717 * evidence (esp. on Braswell) suggests that the WC write into memory
718 * may not be visible to the HW prior to the completion of the UC
719 * register write and that we may begin execution from the context
720 * before its image is complete leading to invalid PD chasing.
721 */
722 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
723
724 ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE(1ULL << (2));
725 return desc;
726}
727
728static void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
729{
730 if (execlists->ctrl_reg) {
731 writel(lower_32_bits(desc), execlists->submit_reg + port * 2)iowrite32(((u32)(desc)), execlists->submit_reg + port * 2);
732 writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1)iowrite32(((u32)(((desc) >> 16) >> 16)), execlists
->submit_reg + port * 2 + 1)
;
733 } else {
734 writel(upper_32_bits(desc), execlists->submit_reg)iowrite32(((u32)(((desc) >> 16) >> 16)), execlists
->submit_reg)
;
735 writel(lower_32_bits(desc), execlists->submit_reg)iowrite32(((u32)(desc)), execlists->submit_reg);
736 }
737}
738
739static __maybe_unused__attribute__((__unused__)) char *
740dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq)
741{
742 if (!rq)
743 return "";
744
745 snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
746 prefix,
747 rq->context->lrc.ccid,
748 rq->fence.context, rq->fence.seqno,
749 __i915_request_is_complete(rq) ? "!" :
750 __i915_request_has_started(rq) ? "*" :
751 "",
752 rq_prio(rq));
753
754 return buf;
755}
756
757static __maybe_unused__attribute__((__unused__)) noinline__attribute__((__noinline__)) void
758trace_ports(const struct intel_engine_execlists *execlists,
759 const char *msg,
760 struct i915_request * const *ports)
761{
762 const struct intel_engine_cs *engine =
763 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
764 char __maybe_unused__attribute__((__unused__)) p0[40], p1[40];
765
766 if (!ports[0])
767 return;
768
769 ENGINE_TRACE(engine, "%s { %s%s }\n", msg,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
770 dump_port(p0, sizeof(p0), "", ports[0]),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
771 dump_port(p1, sizeof(p1), ", ", ports[1]))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
772}
773
774static bool_Bool
775reset_in_progress(const struct intel_engine_cs *engine)
776{
777 return unlikely(!__tasklet_is_enabled(&engine->sched_engine->tasklet))__builtin_expect(!!(!__tasklet_is_enabled(&engine->sched_engine
->tasklet)), 0)
;
778}
779
780static __maybe_unused__attribute__((__unused__)) noinline__attribute__((__noinline__)) bool_Bool
781assert_pending_valid(const struct intel_engine_execlists *execlists,
782 const char *msg)
783{
784 struct intel_engine_cs *engine =
785 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
786 struct i915_request * const *port, *rq, *prev = NULL((void *)0);
787 struct intel_context *ce = NULL((void *)0);
788 u32 ccid = -1;
789
790 trace_ports(execlists, msg, execlists->pending);
791
792 /* We may be messing around with the lists during reset, lalala */
793 if (reset_in_progress(engine))
794 return true1;
795
796 if (!execlists->pending[0]) {
797 GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",do { } while (0)
798 engine->name)do { } while (0);
799 return false0;
800 }
801
802 if (execlists->pending[execlists_num_ports(execlists)]) {
803 GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",do { } while (0)
804 engine->name, execlists_num_ports(execlists))do { } while (0);
805 return false0;
806 }
807
808 for (port = execlists->pending; (rq = *port); port++) {
809 unsigned long flags;
810 bool_Bool ok = true1;
811
812 GEM_BUG_ON(!kref_read(&rq->fence.refcount))((void)0);
813 GEM_BUG_ON(!i915_request_is_active(rq))((void)0);
814
815 if (ce == rq->context) {
816 GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",do { } while (0)
817 engine->name,do { } while (0)
818 ce->timeline->fence_context,do { } while (0)
819 port - execlists->pending)do { } while (0);
820 return false0;
821 }
822 ce = rq->context;
823
824 if (ccid == ce->lrc.ccid) {
825 GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",do { } while (0)
826 engine->name,do { } while (0)
827 ccid, ce->timeline->fence_context,do { } while (0)
828 port - execlists->pending)do { } while (0);
829 return false0;
830 }
831 ccid = ce->lrc.ccid;
832
833 /*
834 * Sentinels are supposed to be the last request so they flush
835 * the current execution off the HW. Check that they are the only
836 * request in the pending submission.
837 *
838 * NB: Due to the async nature of preempt-to-busy and request
839 * cancellation we need to handle the case where request
840 * becomes a sentinel in parallel to CSB processing.
841 */
842 if (prev && i915_request_has_sentinel(prev) &&
843 !READ_ONCE(prev->fence.error)({ typeof(prev->fence.error) __tmp = *(volatile typeof(prev
->fence.error) *)&(prev->fence.error); membar_datadep_consumer
(); __tmp; })
) {
844 GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",do { } while (0)
845 engine->name,do { } while (0)
846 ce->timeline->fence_context,do { } while (0)
847 port - execlists->pending)do { } while (0);
848 return false0;
849 }
850 prev = rq;
851
852 /*
853 * We want virtual requests to only be in the first slot so
854 * that they are never stuck behind a hog and can be immediately
855 * transferred onto the next idle engine.
856 */
857 if (rq->execution_mask != engine->mask &&
858 port != execlists->pending) {
859 GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n",do { } while (0)
860 engine->name,do { } while (0)
861 ce->timeline->fence_context,do { } while (0)
862 port - execlists->pending)do { } while (0);
863 return false0;
864 }
865
866 /* Hold tightly onto the lock to prevent concurrent retires! */
867 if (!spin_trylock_irqsave(&rq->lock, flags)({ (void)(flags); mtx_enter_try(&rq->lock) ? 1 : 0; }))
868 continue;
869
870 if (__i915_request_is_complete(rq))
871 goto unlock;
872
873 if (i915_active_is_idle(&ce->active) &&
874 !intel_context_is_barrier(ce)) {
875 GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",do { } while (0)
876 engine->name,do { } while (0)
877 ce->timeline->fence_context,do { } while (0)
878 port - execlists->pending)do { } while (0);
879 ok = false0;
880 goto unlock;
881 }
882
883 if (!i915_vma_is_pinned(ce->state)) {
884 GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",do { } while (0)
885 engine->name,do { } while (0)
886 ce->timeline->fence_context,do { } while (0)
887 port - execlists->pending)do { } while (0);
888 ok = false0;
889 goto unlock;
890 }
891
892 if (!i915_vma_is_pinned(ce->ring->vma)) {
893 GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",do { } while (0)
894 engine->name,do { } while (0)
895 ce->timeline->fence_context,do { } while (0)
896 port - execlists->pending)do { } while (0);
897 ok = false0;
898 goto unlock;
899 }
900
901unlock:
902 spin_unlock_irqrestore(&rq->lock, flags)do { (void)(flags); mtx_leave(&rq->lock); } while (0);
903 if (!ok)
904 return false0;
905 }
906
907 return ce;
908}
909
910static void execlists_submit_ports(struct intel_engine_cs *engine)
911{
912 struct intel_engine_execlists *execlists = &engine->execlists;
913 unsigned int n;
914
915 GEM_BUG_ON(!assert_pending_valid(execlists, "submit"))((void)0);
916
917 /*
918 * We can skip acquiring intel_runtime_pm_get() here as it was taken
919 * on our behalf by the request (see i915_gem_mark_busy()) and it will
920 * not be relinquished until the device is idle (see
921 * i915_gem_idle_work_handler()). As a precaution, we make sure
922 * that all ELSP are drained i.e. we have processed the CSB,
923 * before allowing ourselves to idle and calling intel_runtime_pm_put().
924 */
925 GEM_BUG_ON(!intel_engine_pm_is_awake(engine))((void)0);
926
927 /*
928 * ELSQ note: the submit queue is not cleared after being submitted
929 * to the HW so we need to make sure we always clean it up. This is
930 * currently ensured by the fact that we always write the same number
931 * of elsq entries, keep this in mind before changing the loop below.
932 */
933 for (n = execlists_num_ports(execlists); n--; ) {
934 struct i915_request *rq = execlists->pending[n];
935
936 write_desc(execlists,
937 rq ? execlists_update_context(rq) : 0,
938 n);
939 }
940
941 /* we need to manually load the submit queue */
942 if (execlists->ctrl_reg)
943 writel(EL_CTRL_LOAD, execlists->ctrl_reg)iowrite32(((u32)((1UL << (0)) + 0)), execlists->ctrl_reg
)
;
944}
945
946static bool_Bool ctx_single_port_submission(const struct intel_context *ce)
947{
948 return (IS_ENABLED(CONFIG_DRM_I915_GVT)0 &&
949 intel_context_force_single_submission(ce));
950}
951
952static bool_Bool can_merge_ctx(const struct intel_context *prev,
953 const struct intel_context *next)
954{
955 if (prev != next)
956 return false0;
957
958 if (ctx_single_port_submission(prev))
959 return false0;
960
961 return true1;
962}
963
964static unsigned long i915_request_flags(const struct i915_request *rq)
965{
966 return READ_ONCE(rq->fence.flags)({ typeof(rq->fence.flags) __tmp = *(volatile typeof(rq->
fence.flags) *)&(rq->fence.flags); membar_datadep_consumer
(); __tmp; })
;
967}
968
969static bool_Bool can_merge_rq(const struct i915_request *prev,
970 const struct i915_request *next)
971{
972 GEM_BUG_ON(prev == next)((void)0);
973 GEM_BUG_ON(!assert_priority_queue(prev, next))((void)0);
974
975 /*
976 * We do not submit known completed requests. Therefore if the next
977 * request is already completed, we can pretend to merge it in
978 * with the previous context (and we will skip updating the ELSP
979 * and tracking). Thus hopefully keeping the ELSP full with active
980 * contexts, despite the best efforts of preempt-to-busy to confuse
981 * us.
982 */
983 if (__i915_request_is_complete(next))
984 return true1;
985
986 if (unlikely((i915_request_flags(prev) | i915_request_flags(next)) &__builtin_expect(!!((i915_request_flags(prev) | i915_request_flags
(next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) | (
1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
987 (BIT(I915_FENCE_FLAG_NOPREEMPT) |__builtin_expect(!!((i915_request_flags(prev) | i915_request_flags
(next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) | (
1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
988 BIT(I915_FENCE_FLAG_SENTINEL)))__builtin_expect(!!((i915_request_flags(prev) | i915_request_flags
(next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) | (
1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
)
989 return false0;
990
991 if (!can_merge_ctx(prev->context, next->context))
992 return false0;
993
994 GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno))((void)0);
995 return true1;
996}
997
998static bool_Bool virtual_matches(const struct virtual_engine *ve,
999 const struct i915_request *rq,
1000 const struct intel_engine_cs *engine)
1001{
1002 const struct intel_engine_cs *inflight;
1003
1004 if (!rq)
1005 return false0;
1006
1007 if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1008 return false0;
1009
1010 /*
1011 * We track when the HW has completed saving the context image
1012 * (i.e. when we have seen the final CS event switching out of
1013 * the context) and must not overwrite the context image before
1014 * then. This restricts us to only using the active engine
1015 * while the previous virtualized request is inflight (so
1016 * we reuse the register offsets). This is a very small
1017 * hystersis on the greedy seelction algorithm.
1018 */
1019 inflight = intel_context_inflight(&ve->context)({ unsigned long __v = (unsigned long)(({ typeof((&ve->
context)->inflight) __tmp = *(volatile typeof((&ve->
context)->inflight) *)&((&ve->context)->inflight
); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof((
&ve->context)->inflight) __tmp = *(volatile typeof(
(&ve->context)->inflight) *)&((&ve->context
)->inflight); membar_datadep_consumer(); __tmp; })))(__v &
-(1UL << (3))); })
;
1020 if (inflight && inflight != engine)
1021 return false0;
1022
1023 return true1;
1024}
1025
1026static struct virtual_engine *
1027first_virtual_engine(struct intel_engine_cs *engine)
1028{
1029 struct intel_engine_execlists *el = &engine->execlists;
1030 struct rb_node *rb = rb_first_cached(&el->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&el->
virtual)->rb_root), -1)
;
1031
1032 while (rb) {
1033 struct virtual_engine *ve =
1034 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
1035 struct i915_request *rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
1036
1037 /* lazily cleanup after another engine handled rq */
1038 if (!rq || !virtual_matches(ve, rq, engine)) {
1039 rb_erase_cached(rb, &el->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&el->
virtual)->rb_root), (rb))
;
1040 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
1041 rb = rb_first_cached(&el->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&el->
virtual)->rb_root), -1)
;
1042 continue;
1043 }
1044
1045 return ve;
1046 }
1047
1048 return NULL((void *)0);
1049}
1050
1051static void virtual_xfer_context(struct virtual_engine *ve,
1052 struct intel_engine_cs *engine)
1053{
1054 unsigned int n;
1055
1056 if (likely(engine == ve->siblings[0])__builtin_expect(!!(engine == ve->siblings[0]), 1))
1057 return;
1058
1059 GEM_BUG_ON(READ_ONCE(ve->context.inflight))((void)0);
1060 if (!intel_engine_has_relative_mmio(engine))
1061 lrc_update_offsets(&ve->context, engine);
1062
1063 /*
1064 * Move the bound engine to the top of the list for
1065 * future execution. We then kick this tasklet first
1066 * before checking others, so that we preferentially
1067 * reuse this set of bound registers.
1068 */
1069 for (n = 1; n < ve->num_siblings; n++) {
1070 if (ve->siblings[n] == engine) {
1071 swap(ve->siblings[n], ve->siblings[0])do { __typeof(ve->siblings[n]) __tmp = (ve->siblings[n]
); (ve->siblings[n]) = (ve->siblings[0]); (ve->siblings
[0]) = __tmp; } while(0)
;
1072 break;
1073 }
1074 }
1075}
1076
1077static void defer_request(struct i915_request *rq, struct list_head * const pl)
1078{
1079 DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
1080
1081 /*
1082 * We want to move the interrupted request to the back of
1083 * the round-robin list (i.e. its priority level), but
1084 * in doing so, we must then move all requests that were in
1085 * flight and were waiting for the interrupted request to
1086 * be run after it again.
1087 */
1088 do {
1089 struct i915_dependency *p;
1090
1091 GEM_BUG_ON(i915_request_is_active(rq))((void)0);
1092 list_move_tail(&rq->sched.link, pl);
1093
1094 for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->wait_link
) *__mptr = ((&(rq)->sched.waiters_list)->next); (
__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), wait_link) );}); &p->wait_link != (&(rq)->
sched.waiters_list); p = ({ const __typeof( ((__typeof(*p) *)
0)->wait_link ) *__mptr = (p->wait_link.next); (__typeof
(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof(*p), wait_link
) );}))
{
1095 struct i915_request *w =
1096 container_of(p->waiter, typeof(*w), sched)({ const __typeof( ((typeof(*w) *)0)->sched ) *__mptr = (p
->waiter); (typeof(*w) *)( (char *)__mptr - __builtin_offsetof
(typeof(*w), sched) );})
;
1097
1098 if (p->flags & I915_DEPENDENCY_WEAK(1UL << (2)))
1099 continue;
1100
1101 /* Leave semaphores spinning on the other engines */
1102 if (w->engine != rq->engine)
1103 continue;
1104
1105 /* No waiter should start before its signaler */
1106 GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&((void)0)
1107 __i915_request_has_started(w) &&((void)0)
1108 !__i915_request_is_complete(rq))((void)0);
1109
1110 if (!i915_request_is_ready(w))
1111 continue;
1112
1113 if (rq_prio(w) < rq_prio(rq))
1114 continue;
1115
1116 GEM_BUG_ON(rq_prio(w) > rq_prio(rq))((void)0);
1117 GEM_BUG_ON(i915_request_is_active(w))((void)0);
1118 list_move_tail(&w->sched.link, &list);
1119 }
1120
1121 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link)(list_empty(&list) ? ((void *)0) : ({ const __typeof( ((typeof
(*rq) *)0)->sched.link ) *__mptr = ((&list)->next);
(typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(typeof(
*rq), sched.link) );}))
;
1122 } while (rq);
1123}
1124
1125static void defer_active(struct intel_engine_cs *engine)
1126{
1127 struct i915_request *rq;
1128
1129 rq = __unwind_incomplete_requests(engine);
1130 if (!rq)
1131 return;
1132
1133 defer_request(rq, i915_sched_lookup_priolist(engine->sched_engine,
1134 rq_prio(rq)));
1135}
1136
1137static bool_Bool
1138timeslice_yield(const struct intel_engine_execlists *el,
1139 const struct i915_request *rq)
1140{
1141 /*
1142 * Once bitten, forever smitten!
1143 *
1144 * If the active context ever busy-waited on a semaphore,
1145 * it will be treated as a hog until the end of its timeslice (i.e.
1146 * until it is scheduled out and replaced by a new submission,
1147 * possibly even its own lite-restore). The HW only sends an interrupt
1148 * on the first miss, and we do know if that semaphore has been
1149 * signaled, or even if it is now stuck on another semaphore. Play
1150 * safe, yield if it might be stuck -- it will be given a fresh
1151 * timeslice in the near future.
1152 */
1153 return rq->context->lrc.ccid == READ_ONCE(el->yield)({ typeof(el->yield) __tmp = *(volatile typeof(el->yield
) *)&(el->yield); membar_datadep_consumer(); __tmp; })
;
1154}
1155
1156static bool_Bool needs_timeslice(const struct intel_engine_cs *engine,
1157 const struct i915_request *rq)
1158{
1159 if (!intel_engine_has_timeslices(engine))
1160 return false0;
1161
1162 /* If not currently active, or about to switch, wait for next event */
1163 if (!rq || __i915_request_is_complete(rq))
1164 return false0;
1165
1166 /* We do not need to start the timeslice until after the ACK */
1167 if (READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = *(volatile
typeof(engine->execlists.pending[0]) *)&(engine->execlists
.pending[0]); membar_datadep_consumer(); __tmp; })
)
1168 return false0;
1169
1170 /* If ELSP[1] is occupied, always check to see if worth slicing */
1171 if (!list_is_last_rcu(&rq->sched.link,
1172 &engine->sched_engine->requests)) {
1173 ENGINE_TRACE(engine, "timeslice required for second inflight context\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1174 return true1;
1175 }
1176
1177 /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */
1178 if (!i915_sched_engine_is_empty(engine->sched_engine)) {
1179 ENGINE_TRACE(engine, "timeslice required for queue\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1180 return true1;
1181 }
1182
1183 if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)((&engine->execlists.virtual.rb_root)->rb_node == (
(void *)0))
) {
1184 ENGINE_TRACE(engine, "timeslice required for virtual\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1185 return true1;
1186 }
1187
1188 return false0;
1189}
1190
1191static bool_Bool
1192timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq)
1193{
1194 const struct intel_engine_execlists *el = &engine->execlists;
1195
1196 if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq))
1197 return false0;
1198
1199 if (!needs_timeslice(engine, rq))
1200 return false0;
1201
1202 return timer_expired(&el->timer) || timeslice_yield(el, rq);
1203}
1204
1205static unsigned long timeslice(const struct intel_engine_cs *engine)
1206{
1207 return READ_ONCE(engine->props.timeslice_duration_ms)({ typeof(engine->props.timeslice_duration_ms) __tmp = *(volatile
typeof(engine->props.timeslice_duration_ms) *)&(engine
->props.timeslice_duration_ms); membar_datadep_consumer();
__tmp; })
;
1208}
1209
1210static void start_timeslice(struct intel_engine_cs *engine)
1211{
1212 struct intel_engine_execlists *el = &engine->execlists;
1213 unsigned long duration;
1214
1215 /* Disable the timer if there is nothing to switch to */
1216 duration = 0;
1217 if (needs_timeslice(engine, *el->active)) {
1218 /* Avoid continually prolonging an active timeslice */
1219 if (timer_active(&el->timer)) {
1220 /*
1221 * If we just submitted a new ELSP after an old
1222 * context, that context may have already consumed
1223 * its timeslice, so recheck.
1224 */
1225 if (!timer_pending(&el->timer)(((&el->timer))->to_flags & 0x02))
1226 tasklet_hi_schedule(&engine->sched_engine->tasklet);
1227 return;
1228 }
1229
1230 duration = timeslice(engine);
1231 }
1232
1233 set_timer_ms(&el->timer, duration);
1234}
1235
1236static void record_preemption(struct intel_engine_execlists *execlists)
1237{
1238 (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++)0;
1239}
1240
1241static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
1242 const struct i915_request *rq)
1243{
1244 if (!rq)
1245 return 0;
1246
1247 /* Only allow ourselves to force reset the currently active context */
1248 engine->execlists.preempt_target = rq;
1249
1250 /* Force a fast reset for terminated contexts (ignoring sysfs!) */
1251 if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq))__builtin_expect(!!(intel_context_is_banned(rq->context) ||
bad_request(rq)), 0)
)
1252 return INTEL_CONTEXT_BANNED_PREEMPT_TIMEOUT_MS(1);
1253
1254 return READ_ONCE(engine->props.preempt_timeout_ms)({ typeof(engine->props.preempt_timeout_ms) __tmp = *(volatile
typeof(engine->props.preempt_timeout_ms) *)&(engine->
props.preempt_timeout_ms); membar_datadep_consumer(); __tmp; }
)
;
1255}
1256
1257static void set_preempt_timeout(struct intel_engine_cs *engine,
1258 const struct i915_request *rq)
1259{
1260 if (!intel_engine_has_preempt_reset(engine))
1261 return;
1262
1263 set_timer_ms(&engine->execlists.preempt,
1264 active_preempt_timeout(engine, rq));
1265}
1266
1267static bool_Bool completed(const struct i915_request *rq)
1268{
1269 if (i915_request_has_sentinel(rq))
1270 return false0;
1271
1272 return __i915_request_is_complete(rq);
1273}
1274
1275static void execlists_dequeue(struct intel_engine_cs *engine)
1276{
1277 struct intel_engine_execlists * const execlists = &engine->execlists;
1278 struct i915_sched_engine * const sched_engine = engine->sched_engine;
1279 struct i915_request **port = execlists->pending;
1280 struct i915_request ** const last_port = port + execlists->port_mask;
1281 struct i915_request *last, * const *active;
1282 struct virtual_engine *ve;
1283 struct rb_node *rb;
1284 bool_Bool submit = false0;
1285
1286 /*
1287 * Hardware submission is through 2 ports. Conceptually each port
1288 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
1289 * static for a context, and unique to each, so we only execute
1290 * requests belonging to a single context from each ring. RING_HEAD
1291 * is maintained by the CS in the context image, it marks the place
1292 * where it got up to last time, and through RING_TAIL we tell the CS
1293 * where we want to execute up to this time.
1294 *
1295 * In this list the requests are in order of execution. Consecutive
1296 * requests from the same context are adjacent in the ringbuffer. We
1297 * can combine these requests into a single RING_TAIL update:
1298 *
1299 * RING_HEAD...req1...req2
1300 * ^- RING_TAIL
1301 * since to execute req2 the CS must first execute req1.
1302 *
1303 * Our goal then is to point each port to the end of a consecutive
1304 * sequence of requests as being the most optimal (fewest wake ups
1305 * and context switches) submission.
1306 */
1307
1308 spin_lock(&sched_engine->lock)mtx_enter(&sched_engine->lock);
1309
1310 /*
1311 * If the queue is higher priority than the last
1312 * request in the currently active context, submit afresh.
1313 * We will resubmit again afterwards in case we need to split
1314 * the active context to interject the preemption request,
1315 * i.e. we will retrigger preemption following the ack in case
1316 * of trouble.
1317 *
1318 */
1319 active = execlists->active;
1320 while ((last = *active) && completed(last))
1321 active++;
1322
1323 if (last) {
1324 if (need_preempt(engine, last)) {
1325 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1326 "preempting last=%llx:%lld, prio=%d, hint=%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1327 last->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1328 last->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1329 last->sched.attr.priority,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1330 sched_engine->queue_priority_hint)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1331 record_preemption(execlists);
1332
1333 /*
1334 * Don't let the RING_HEAD advance past the breadcrumb
1335 * as we unwind (and until we resubmit) so that we do
1336 * not accidentally tell it to go backwards.
1337 */
1338 ring_set_paused(engine, 1);
1339
1340 /*
1341 * Note that we have not stopped the GPU at this point,
1342 * so we are unwinding the incomplete requests as they
1343 * remain inflight and so by the time we do complete
1344 * the preemption, some of the unwound requests may
1345 * complete!
1346 */
1347 __unwind_incomplete_requests(engine);
1348
1349 last = NULL((void *)0);
1350 } else if (timeslice_expired(engine, last)) {
1351 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1352 "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1353 str_yes_no(timer_expired(&execlists->timer)),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1354 last->fence.context, last->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1355 rq_prio(last),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1356 sched_engine->queue_priority_hint,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1357 str_yes_no(timeslice_yield(execlists, last)))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1358
1359 /*
1360 * Consume this timeslice; ensure we start a new one.
1361 *
1362 * The timeslice expired, and we will unwind the
1363 * running contexts and recompute the next ELSP.
1364 * If that submit will be the same pair of contexts
1365 * (due to dependency ordering), we will skip the
1366 * submission. If we don't cancel the timer now,
1367 * we will see that the timer has expired and
1368 * reschedule the tasklet; continually until the
1369 * next context switch or other preemption event.
1370 *
1371 * Since we have decided to reschedule based on
1372 * consumption of this timeslice, if we submit the
1373 * same context again, grant it a full timeslice.
1374 */
1375 cancel_timer(&execlists->timer);
1376 ring_set_paused(engine, 1);
1377 defer_active(engine);
1378
1379 /*
1380 * Unlike for preemption, if we rewind and continue
1381 * executing the same context as previously active,
1382 * the order of execution will remain the same and
1383 * the tail will only advance. We do not need to
1384 * force a full context restore, as a lite-restore
1385 * is sufficient to resample the monotonic TAIL.
1386 *
1387 * If we switch to any other context, similarly we
1388 * will not rewind TAIL of current context, and
1389 * normal save/restore will preserve state and allow
1390 * us to later continue executing the same request.
1391 */
1392 last = NULL((void *)0);
1393 } else {
1394 /*
1395 * Otherwise if we already have a request pending
1396 * for execution after the current one, we can
1397 * just wait until the next CS event before
1398 * queuing more. In either case we will force a
1399 * lite-restore preemption event, but if we wait
1400 * we hopefully coalesce several updates into a single
1401 * submission.
1402 */
1403 if (active[1]) {
1404 /*
1405 * Even if ELSP[1] is occupied and not worthy
1406 * of timeslices, our queue might be.
1407 */
1408 spin_unlock(&sched_engine->lock)mtx_leave(&sched_engine->lock);
1409 return;
1410 }
1411 }
1412 }
1413
1414 /* XXX virtual is always taking precedence */
1415 while ((ve = first_virtual_engine(engine))) {
1416 struct i915_request *rq;
1417
1418 spin_lock(&ve->base.sched_engine->lock)mtx_enter(&ve->base.sched_engine->lock);
1419
1420 rq = ve->request;
1421 if (unlikely(!virtual_matches(ve, rq, engine))__builtin_expect(!!(!virtual_matches(ve, rq, engine)), 0))
1422 goto unlock; /* lost the race to a sibling */
1423
1424 GEM_BUG_ON(rq->engine != &ve->base)((void)0);
1425 GEM_BUG_ON(rq->context != &ve->context)((void)0);
1426
1427 if (unlikely(rq_prio(rq) < queue_prio(sched_engine))__builtin_expect(!!(rq_prio(rq) < queue_prio(sched_engine)
), 0)
) {
1428 spin_unlock(&ve->base.sched_engine->lock)mtx_leave(&ve->base.sched_engine->lock);
1429 break;
1430 }
1431
1432 if (last && !can_merge_rq(last, rq)) {
1433 spin_unlock(&ve->base.sched_engine->lock)mtx_leave(&ve->base.sched_engine->lock);
1434 spin_unlock(&engine->sched_engine->lock)mtx_leave(&engine->sched_engine->lock);
1435 return; /* leave this for another sibling */
1436 }
1437
1438 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1439 "virtual rq=%llx:%lld%s, new engine? %s\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1440 rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1441 rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1442 __i915_request_is_complete(rq) ? "!" :do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1443 __i915_request_has_started(rq) ? "*" :do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1444 "",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1445 str_yes_no(engine != ve->siblings[0]))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1446
1447 WRITE_ONCE(ve->request, NULL)({ typeof(ve->request) __tmp = (((void *)0)); *(volatile typeof
(ve->request) *)&(ve->request) = __tmp; __tmp; })
;
1448 WRITE_ONCE(ve->base.sched_engine->queue_priority_hint, INT_MIN)({ typeof(ve->base.sched_engine->queue_priority_hint) __tmp
= ((-0x7fffffff-1)); *(volatile typeof(ve->base.sched_engine
->queue_priority_hint) *)&(ve->base.sched_engine->
queue_priority_hint) = __tmp; __tmp; })
;
1449
1450 rb = &ve->nodes[engine->id].rb;
1451 rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->virtual)->rb_root), (rb))
;
1452 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
1453
1454 GEM_BUG_ON(!(rq->execution_mask & engine->mask))((void)0);
1455 WRITE_ONCE(rq->engine, engine)({ typeof(rq->engine) __tmp = (engine); *(volatile typeof(
rq->engine) *)&(rq->engine) = __tmp; __tmp; })
;
1456
1457 if (__i915_request_submit(rq)) {
1458 /*
1459 * Only after we confirm that we will submit
1460 * this request (i.e. it has not already
1461 * completed), do we want to update the context.
1462 *
1463 * This serves two purposes. It avoids
1464 * unnecessary work if we are resubmitting an
1465 * already completed request after timeslicing.
1466 * But more importantly, it prevents us altering
1467 * ve->siblings[] on an idle context, where
1468 * we may be using ve->siblings[] in
1469 * virtual_context_enter / virtual_context_exit.
1470 */
1471 virtual_xfer_context(ve, engine);
1472 GEM_BUG_ON(ve->siblings[0] != engine)((void)0);
1473
1474 submit = true1;
1475 last = rq;
1476 }
1477
1478 i915_request_put(rq);
1479unlock:
1480 spin_unlock(&ve->base.sched_engine->lock)mtx_leave(&ve->base.sched_engine->lock);
1481
1482 /*
1483 * Hmm, we have a bunch of virtual engine requests,
1484 * but the first one was already completed (thanks
1485 * preempt-to-busy!). Keep looking at the veng queue
1486 * until we have no more relevant requests (i.e.
1487 * the normal submit queue has higher priority).
1488 */
1489 if (submit)
1490 break;
1491 }
1492
1493 while ((rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
)) {
1494 struct i915_priolist *p = to_priolist(rb);
1495 struct i915_request *rq, *rn;
1496
1497 priolist_for_each_request_consume(rq, rn, p)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&(p)->requests)->next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&(p)->requests
); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *)0)->
sched.link ) *__mptr = (rn->sched.link.next); (__typeof(*rn
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
{
1498 bool_Bool merge = true1;
1499
1500 /*
1501 * Can we combine this request with the current port?
1502 * It has to be the same context/ringbuffer and not
1503 * have any exceptions (e.g. GVT saying never to
1504 * combine contexts).
1505 *
1506 * If we can combine the requests, we can execute both
1507 * by updating the RING_TAIL to point to the end of the
1508 * second request, and so we never need to tell the
1509 * hardware about the first.
1510 */
1511 if (last && !can_merge_rq(last, rq)) {
1512 /*
1513 * If we are on the second port and cannot
1514 * combine this request with the last, then we
1515 * are done.
1516 */
1517 if (port == last_port)
1518 goto done;
1519
1520 /*
1521 * We must not populate both ELSP[] with the
1522 * same LRCA, i.e. we must submit 2 different
1523 * contexts if we submit 2 ELSP.
1524 */
1525 if (last->context == rq->context)
1526 goto done;
1527
1528 if (i915_request_has_sentinel(last))
1529 goto done;
1530
1531 /*
1532 * We avoid submitting virtual requests into
1533 * the secondary ports so that we can migrate
1534 * the request immediately to another engine
1535 * rather than wait for the primary request.
1536 */
1537 if (rq->execution_mask != engine->mask)
1538 goto done;
1539
1540 /*
1541 * If GVT overrides us we only ever submit
1542 * port[0], leaving port[1] empty. Note that we
1543 * also have to be careful that we don't queue
1544 * the same context (even though a different
1545 * request) to the second port.
1546 */
1547 if (ctx_single_port_submission(last->context) ||
1548 ctx_single_port_submission(rq->context))
1549 goto done;
1550
1551 merge = false0;
1552 }
1553
1554 if (__i915_request_submit(rq)) {
1555 if (!merge) {
1556 *port++ = i915_request_get(last);
1557 last = NULL((void *)0);
1558 }
1559
1560 GEM_BUG_ON(last &&((void)0)
1561 !can_merge_ctx(last->context,((void)0)
1562 rq->context))((void)0);
1563 GEM_BUG_ON(last &&((void)0)
1564 i915_seqno_passed(last->fence.seqno,((void)0)
1565 rq->fence.seqno))((void)0);
1566
1567 submit = true1;
1568 last = rq;
1569 }
1570 }
1571
1572 rb_erase_cached(&p->node, &sched_engine->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&sched_engine
->queue)->rb_root), (&p->node))
;
1573 i915_priolist_free(p);
1574 }
1575done:
1576 *port++ = i915_request_get(last);
1577
1578 /*
1579 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
1580 *
1581 * We choose the priority hint such that if we add a request of greater
1582 * priority than this, we kick the submission tasklet to decide on
1583 * the right order of submitting the requests to hardware. We must
1584 * also be prepared to reorder requests as they are in-flight on the
1585 * HW. We derive the priority hint then as the first "hole" in
1586 * the HW submission ports and if there are no available slots,
1587 * the priority of the lowest executing request, i.e. last.
1588 *
1589 * When we do receive a higher priority request ready to run from the
1590 * user, see queue_request(), the priority hint is bumped to that
1591 * request triggering preemption on the next dequeue (or subsequent
1592 * interrupt for secondary ports).
1593 */
1594 sched_engine->queue_priority_hint = queue_prio(sched_engine);
1595 i915_sched_engine_reset_on_empty(sched_engine);
1596 spin_unlock(&sched_engine->lock)mtx_leave(&sched_engine->lock);
1597
1598 /*
1599 * We can skip poking the HW if we ended up with exactly the same set
1600 * of requests as currently running, e.g. trying to timeslice a pair
1601 * of ordered contexts.
1602 */
1603 if (submit &&
1604 memcmp(active,__builtin_memcmp((active), (execlists->pending), ((port - execlists
->pending) * sizeof(*port)))
1605 execlists->pending,__builtin_memcmp((active), (execlists->pending), ((port - execlists
->pending) * sizeof(*port)))
1606 (port - execlists->pending) * sizeof(*port))__builtin_memcmp((active), (execlists->pending), ((port - execlists
->pending) * sizeof(*port)))
) {
1607 *port = NULL((void *)0);
1608 while (port-- != execlists->pending)
1609 execlists_schedule_in(*port, port - execlists->pending);
1610
1611 WRITE_ONCE(execlists->yield, -1)({ typeof(execlists->yield) __tmp = (-1); *(volatile typeof
(execlists->yield) *)&(execlists->yield) = __tmp; __tmp
; })
;
1612 set_preempt_timeout(engine, *active);
1613 execlists_submit_ports(engine);
1614 } else {
1615 ring_set_paused(engine, 0);
1616 while (port-- != execlists->pending)
1617 i915_request_put(*port);
1618 *execlists->pending = NULL((void *)0);
1619 }
1620}
1621
1622static void execlists_dequeue_irq(struct intel_engine_cs *engine)
1623{
1624 local_irq_disable()intr_disable(); /* Suspend interrupts across request submission */
1625 execlists_dequeue(engine);
1626 local_irq_enable()intr_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
1627}
1628
1629static void clear_ports(struct i915_request **ports, int count)
1630{
1631 memset_p((void **)ports, NULL((void *)0), count);
1632}
1633
1634static void
1635copy_ports(struct i915_request **dst, struct i915_request **src, int count)
1636{
1637 /* A memcpy_p() would be very useful here! */
1638 while (count--)
1639 WRITE_ONCE(*dst++, *src++)({ typeof(*dst++) __tmp = (*src++); *(volatile typeof(*dst++)
*)&(*dst++) = __tmp; __tmp; })
; /* avoid write tearing */
1640}
1641
1642static struct i915_request **
1643cancel_port_requests(struct intel_engine_execlists * const execlists,
1644 struct i915_request **inactive)
1645{
1646 struct i915_request * const *port;
1647
1648 for (port = execlists->pending; *port; port++)
1649 *inactive++ = *port;
1650 clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)(sizeof((execlists->pending)) / sizeof((execlists->pending
)[0]))
);
1651
1652 /* Mark the end of active before we overwrite *active */
1653 for (port = xchg(&execlists->active, execlists->pending)__sync_lock_test_and_set(&execlists->active, execlists
->pending)
; *port; port++)
1654 *inactive++ = *port;
1655 clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)(sizeof((execlists->inflight)) / sizeof((execlists->inflight
)[0]))
);
1656
1657 smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* complete the seqlock for execlists_active() */
1658 WRITE_ONCE(execlists->active, execlists->inflight)({ typeof(execlists->active) __tmp = (execlists->inflight
); *(volatile typeof(execlists->active) *)&(execlists->
active) = __tmp; __tmp; })
;
1659
1660 /* Having cancelled all outstanding process_csb(), stop their timers */
1661 GEM_BUG_ON(execlists->pending[0])((void)0);
1662 cancel_timer(&execlists->timer);
1663 cancel_timer(&execlists->preempt);
1664
1665 return inactive;
1666}
1667
1668/*
1669 * Starting with Gen12, the status has a new format:
1670 *
1671 * bit 0: switched to new queue
1672 * bit 1: reserved
1673 * bit 2: semaphore wait mode (poll or signal), only valid when
1674 * switch detail is set to "wait on semaphore"
1675 * bits 3-5: engine class
1676 * bits 6-11: engine instance
1677 * bits 12-14: reserved
1678 * bits 15-25: sw context id of the lrc the GT switched to
1679 * bits 26-31: sw counter of the lrc the GT switched to
1680 * bits 32-35: context switch detail
1681 * - 0: ctx complete
1682 * - 1: wait on sync flip
1683 * - 2: wait on vblank
1684 * - 3: wait on scanline
1685 * - 4: wait on semaphore
1686 * - 5: context preempted (not on SEMAPHORE_WAIT or
1687 * WAIT_FOR_EVENT)
1688 * bit 36: reserved
1689 * bits 37-43: wait detail (for switch detail 1 to 4)
1690 * bits 44-46: reserved
1691 * bits 47-57: sw context id of the lrc the GT switched away from
1692 * bits 58-63: sw counter of the lrc the GT switched away from
1693 *
1694 * Xe_HP csb shuffles things around compared to TGL:
1695 *
1696 * bits 0-3: context switch detail (same possible values as TGL)
1697 * bits 4-9: engine instance
1698 * bits 10-25: sw context id of the lrc the GT switched to
1699 * bits 26-31: sw counter of the lrc the GT switched to
1700 * bit 32: semaphore wait mode (poll or signal), Only valid when
1701 * switch detail is set to "wait on semaphore"
1702 * bit 33: switched to new queue
1703 * bits 34-41: wait detail (for switch detail 1 to 4)
1704 * bits 42-57: sw context id of the lrc the GT switched away from
1705 * bits 58-63: sw counter of the lrc the GT switched away from
1706 */
1707static inline bool_Bool
1708__gen12_csb_parse(bool_Bool ctx_to_valid, bool_Bool ctx_away_valid, bool_Bool new_queue,
1709 u8 switch_detail)
1710{
1711 /*
1712 * The context switch detail is not guaranteed to be 5 when a preemption
1713 * occurs, so we can't just check for that. The check below works for
1714 * all the cases we care about, including preemptions of WAIT
1715 * instructions and lite-restore. Preempt-to-idle via the CTRL register
1716 * would require some extra handling, but we don't support that.
1717 */
1718 if (!ctx_away_valid || new_queue) {
1719 GEM_BUG_ON(!ctx_to_valid)((void)0);
1720 return true1;
1721 }
1722
1723 /*
1724 * switch detail = 5 is covered by the case above and we do not expect a
1725 * context switch on an unsuccessful wait instruction since we always
1726 * use polling mode.
1727 */
1728 GEM_BUG_ON(switch_detail)((void)0);
1729 return false0;
1730}
1731
1732static bool_Bool xehp_csb_parse(const u64 csb)
1733{
1734 return __gen12_csb_parse(XEHP_CSB_CTX_VALID(lower_32_bits(csb))(((typeof((((~0UL) >> (64 - (31) - 1)) & ((~0UL) <<
(10)))))(((((u32)(csb))) & ((((~0UL) >> (64 - (31)
- 1)) & ((~0UL) << (10))))) >> (__builtin_ffsll
((((~0UL) >> (64 - (31) - 1)) & ((~0UL) << (10
)))) - 1))) != 0xFFFF)
, /* cxt to */
1735 XEHP_CSB_CTX_VALID(upper_32_bits(csb))(((typeof((((~0UL) >> (64 - (31) - 1)) & ((~0UL) <<
(10)))))(((((u32)(((csb) >> 16) >> 16))) & (
(((~0UL) >> (64 - (31) - 1)) & ((~0UL) << (10
))))) >> (__builtin_ffsll((((~0UL) >> (64 - (31) -
1)) & ((~0UL) << (10)))) - 1))) != 0xFFFF)
, /* cxt away */
1736 upper_32_bits(csb)((u32)(((csb) >> 16) >> 16)) & XEHP_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(1UL << (1)),
1737 GEN12_CTX_SWITCH_DETAIL(lower_32_bits(csb))((((u32)(csb))) & 0xF));
1738}
1739
1740static bool_Bool gen12_csb_parse(const u64 csb)
1741{
1742 return __gen12_csb_parse(GEN12_CSB_CTX_VALID(lower_32_bits(csb))(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((((u32)(csb))) & ((((~0UL) >> (64 - (25)
- 1)) & ((~0UL) << (15))))) >> (__builtin_ffsll
((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15
)))) - 1))) != 0x7FF)
, /* cxt to */
1743 GEN12_CSB_CTX_VALID(upper_32_bits(csb))(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) <<
(15)))))(((((u32)(((csb) >> 16) >> 16))) & (
(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15
))))) >> (__builtin_ffsll((((~0UL) >> (64 - (25) -
1)) & ((~0UL) << (15)))) - 1))) != 0x7FF)
, /* cxt away */
1744 lower_32_bits(csb)((u32)(csb)) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(0x1),
1745 GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))((((u32)(((csb) >> 16) >> 16))) & 0xF));
1746}
1747
1748static bool_Bool gen8_csb_parse(const u64 csb)
1749{
1750 return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE(1 << 0) | GEN8_CTX_STATUS_PREEMPTED(1 << 1));
1751}
1752
1753static noinline__attribute__((__noinline__)) u64
1754wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb)
1755{
1756 u64 entry;
1757
1758 /*
1759 * Reading from the HWSP has one particular advantage: we can detect
1760 * a stale entry. Since the write into HWSP is broken, we have no reason
1761 * to trust the HW at all, the mmio entry may equally be unordered, so
1762 * we prefer the path that is self-checking and as a last resort,
1763 * return the mmio value.
1764 *
1765 * tgl,dg1:HSDES#22011327657
1766 */
1767 preempt_disable();
1768 if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)({ extern char _ctassert[(!(!__builtin_constant_p(10))) ? 1 :
-1 ] __attribute__((__unused__)); extern char _ctassert[(!((
10) > 50000)) ? 1 : -1 ] __attribute__((__unused__)); ({ int
cpu, ret, timeout = ((10)) * 1000; u64 base; do { } while (0
); if (!(1)) { preempt_disable(); cpu = (({struct cpu_info *__ci
; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_cpuid); } base =
local_clock(); for (;;) { u64 now = local_clock(); if (!(1))
preempt_enable(); __asm volatile("" : : : "memory"); if (((entry
= ({ typeof(*csb) __tmp = *(volatile typeof(*csb) *)&(*csb
); membar_datadep_consumer(); __tmp; })) != -1)) { ret = 0; break
; } if (now - base >= timeout) { ret = -60; break; } cpu_relax
(); if (!(1)) { preempt_disable(); if (__builtin_expect(!!(cpu
!= (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0"
: "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_cpuid)), 0)) { timeout -= now - base; cpu
= (({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" :
"=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self
))); __ci;})->ci_cpuid); base = local_clock(); } } } ret; }
); })
) {
1769 int idx = csb - engine->execlists.csb_status;
1770 int status;
1771
1772 status = GEN8_EXECLISTS_STATUS_BUF0x370;
1773 if (idx >= 6) {
1774 status = GEN11_EXECLISTS_STATUS_BUF20x3c0;
1775 idx -= 6;
1776 }
1777 status += sizeof(u64) * idx;
1778
1779 entry = intel_uncore_read64(engine->uncore,
1780 _MMIO(engine->mmio_base + status)((const i915_reg_t){ .reg = (engine->mmio_base + status) }
)
);
1781 }
1782 preempt_enable();
1783
1784 return entry;
1785}
1786
1787static u64 csb_read(const struct intel_engine_cs *engine, u64 * const csb)
1788{
1789 u64 entry = READ_ONCE(*csb)({ typeof(*csb) __tmp = *(volatile typeof(*csb) *)&(*csb)
; membar_datadep_consumer(); __tmp; })
;
1790
1791 /*
1792 * Unfortunately, the GPU does not always serialise its write
1793 * of the CSB entries before its write of the CSB pointer, at least
1794 * from the perspective of the CPU, using what is known as a Global
1795 * Observation Point. We may read a new CSB tail pointer, but then
1796 * read the stale CSB entries, causing us to misinterpret the
1797 * context-switch events, and eventually declare the GPU hung.
1798 *
1799 * icl:HSDES#1806554093
1800 * tgl:HSDES#22011248461
1801 */
1802 if (unlikely(entry == -1)__builtin_expect(!!(entry == -1), 0))
1803 entry = wa_csb_read(engine, csb);
1804
1805 /* Consume this entry so that we can spot its future reuse. */
1806 WRITE_ONCE(*csb, -1)({ typeof(*csb) __tmp = (-1); *(volatile typeof(*csb) *)&
(*csb) = __tmp; __tmp; })
;
1807
1808 /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */
1809 return entry;
1810}
1811
1812static void new_timeslice(struct intel_engine_execlists *el)
1813{
1814 /* By cancelling, we will start afresh in start_timeslice() */
1815 cancel_timer(&el->timer);
1816}
1817
1818static struct i915_request **
1819process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
1820{
1821 struct intel_engine_execlists * const execlists = &engine->execlists;
1822 u64 * const buf = execlists->csb_status;
1823 const u8 num_entries = execlists->csb_size;
1824 struct i915_request **prev;
1825 u8 head, tail;
1826
1827 /*
1828 * As we modify our execlists state tracking we require exclusive
1829 * access. Either we are inside the tasklet, or the tasklet is disabled
1830 * and we assume that is only inside the reset paths and so serialised.
1831 */
1832 GEM_BUG_ON(!tasklet_is_locked(&engine->sched_engine->tasklet) &&((void)0)
1833 !reset_in_progress(engine))((void)0);
1834
1835 /*
1836 * Note that csb_write, csb_status may be either in HWSP or mmio.
1837 * When reading from the csb_write mmio register, we have to be
1838 * careful to only use the GEN8_CSB_WRITE_PTR portion, which is
1839 * the low 4bits. As it happens we know the next 4bits are always
1840 * zero and so we can simply masked off the low u8 of the register
1841 * and treat it identically to reading from the HWSP (without having
1842 * to use explicit shifting and masking, and probably bifurcating
1843 * the code to handle the legacy mmio read).
1844 */
1845 head = execlists->csb_head;
1846 tail = READ_ONCE(*execlists->csb_write)({ typeof(*execlists->csb_write) __tmp = *(volatile typeof
(*execlists->csb_write) *)&(*execlists->csb_write);
membar_datadep_consumer(); __tmp; })
;
1847 if (unlikely(head == tail)__builtin_expect(!!(head == tail), 0))
1848 return inactive;
1849
1850 /*
1851 * We will consume all events from HW, or at least pretend to.
1852 *
1853 * The sequence of events from the HW is deterministic, and derived
1854 * from our writes to the ELSP, with a smidgen of variability for
1855 * the arrival of the asynchronous requests wrt to the inflight
1856 * execution. If the HW sends an event that does not correspond with
1857 * the one we are expecting, we have to abandon all hope as we lose
1858 * all tracking of what the engine is actually executing. We will
1859 * only detect we are out of sequence with the HW when we get an
1860 * 'impossible' event because we have already drained our own
1861 * preemption/promotion queue. If this occurs, we know that we likely
1862 * lost track of execution earlier and must unwind and restart, the
1863 * simplest way is by stop processing the event queue and force the
1864 * engine to reset.
1865 */
1866 execlists->csb_head = tail;
1867 ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1868
1869 /*
1870 * Hopefully paired with a wmb() in HW!
1871 *
1872 * We must complete the read of the write pointer before any reads
1873 * from the CSB, so that we do not see stale values. Without an rmb
1874 * (lfence) the HW may speculatively perform the CSB[] reads *before*
1875 * we perform the READ_ONCE(*csb_write).
1876 */
1877 rmb()do { __asm volatile("lfence" ::: "memory"); } while (0);
1878
1879 /* Remember who was last running under the timer */
1880 prev = inactive;
1881 *prev = NULL((void *)0);
1882
1883 do {
1884 bool_Bool promote;
1885 u64 csb;
1886
1887 if (++head == num_entries)
1888 head = 0;
1889
1890 /*
1891 * We are flying near dragons again.
1892 *
1893 * We hold a reference to the request in execlist_port[]
1894 * but no more than that. We are operating in softirq
1895 * context and so cannot hold any mutex or sleep. That
1896 * prevents us stopping the requests we are processing
1897 * in port[] from being retired simultaneously (the
1898 * breadcrumb will be complete before we see the
1899 * context-switch). As we only hold the reference to the
1900 * request, any pointer chasing underneath the request
1901 * is subject to a potential use-after-free. Thus we
1902 * store all of the bookkeeping within port[] as
1903 * required, and avoid using unguarded pointers beneath
1904 * request itself. The same applies to the atomic
1905 * status notifier.
1906 */
1907
1908 csb = csb_read(engine, buf + head);
1909 ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1910 head, upper_32_bits(csb), lower_32_bits(csb))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1911
1912 if (GRAPHICS_VER_FULL(engine->i915)(((&(engine->i915)->__runtime)->graphics.ip.ver)
<< 8 | ((&(engine->i915)->__runtime)->graphics
.ip.rel))
>= IP_VER(12, 50)((12) << 8 | (50)))
1913 promote = xehp_csb_parse(csb);
1914 else if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) >= 12)
1915 promote = gen12_csb_parse(csb);
1916 else
1917 promote = gen8_csb_parse(csb);
1918 if (promote) {
1919 struct i915_request * const *old = execlists->active;
1920
1921 if (GEM_WARN_ON(!*execlists->pending)({ __builtin_expect(!!(!!(!*execlists->pending)), 0); })) {
1922 execlists->error_interrupt |= ERROR_CSB(1UL << (31));
1923 break;
1924 }
1925
1926 ring_set_paused(engine, 0);
1927
1928 /* Point active to the new ELSP; prevent overwriting */
1929 WRITE_ONCE(execlists->active, execlists->pending)({ typeof(execlists->active) __tmp = (execlists->pending
); *(volatile typeof(execlists->active) *)&(execlists->
active) = __tmp; __tmp; })
;
1930 smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* notify execlists_active() */
1931
1932 /* cancel old inflight, prepare for switch */
1933 trace_ports(execlists, "preempted", old);
1934 while (*old)
1935 *inactive++ = *old++;
1936
1937 /* switch pending to inflight */
1938 GEM_BUG_ON(!assert_pending_valid(execlists, "promote"))((void)0);
1939 copy_ports(execlists->inflight,
1940 execlists->pending,
1941 execlists_num_ports(execlists));
1942 smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* complete the seqlock */
1943 WRITE_ONCE(execlists->active, execlists->inflight)({ typeof(execlists->active) __tmp = (execlists->inflight
); *(volatile typeof(execlists->active) *)&(execlists->
active) = __tmp; __tmp; })
;
1944
1945 /* XXX Magic delay for tgl */
1946 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x3a0) })))
;
1947
1948 WRITE_ONCE(execlists->pending[0], NULL)({ typeof(execlists->pending[0]) __tmp = (((void *)0)); *(
volatile typeof(execlists->pending[0]) *)&(execlists->
pending[0]) = __tmp; __tmp; })
;
1949 } else {
1950 if (GEM_WARN_ON(!*execlists->active)({ __builtin_expect(!!(!!(!*execlists->active)), 0); })) {
1951 execlists->error_interrupt |= ERROR_CSB(1UL << (31));
1952 break;
1953 }
1954
1955 /* port0 completed, advanced to port1 */
1956 trace_ports(execlists, "completed", execlists->active);
1957
1958 /*
1959 * We rely on the hardware being strongly
1960 * ordered, that the breadcrumb write is
1961 * coherent (visible from the CPU) before the
1962 * user interrupt is processed. One might assume
1963 * that the breadcrumb write being before the
1964 * user interrupt and the CS event for the context
1965 * switch would therefore be before the CS event
1966 * itself...
1967 */
1968 if (GEM_SHOW_DEBUG()(0) &&
1969 !__i915_request_is_complete(*execlists->active)) {
1970 struct i915_request *rq = *execlists->active;
1971 const u32 *regs __maybe_unused__attribute__((__unused__)) =
1972 rq->context->lrc_reg_state;
1973
1974 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1975 "context completed before request!\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1976 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1977 "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1978 ENGINE_READ(engine, RING_START),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1979 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1980 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1981 ENGINE_READ(engine, RING_CTL),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1982 ENGINE_READ(engine, RING_MI_MODE))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1983 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1984 "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1985 i915_ggtt_offset(rq->ring->vma),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1986 rq->head, rq->tail,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1987 rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1988 lower_32_bits(rq->fence.seqno),do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1989 hwsp_seqno(rq))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1990 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1991 "ctx:{start:%08x, head:%04x, tail:%04x}, ",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1992 regs[CTX_RING_START],do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1993 regs[CTX_RING_HEAD],do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
1994 regs[CTX_RING_TAIL])do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
1995 }
1996
1997 *inactive++ = *execlists->active++;
1998
1999 GEM_BUG_ON(execlists->active - execlists->inflight >((void)0)
2000 execlists_num_ports(execlists))((void)0);
2001 }
2002 } while (head != tail);
2003
2004 /*
2005 * Gen11 has proven to fail wrt global observation point between
2006 * entry and tail update, failing on the ordering and thus
2007 * we see an old entry in the context status buffer.
2008 *
2009 * Forcibly evict out entries for the next gpu csb update,
2010 * to increase the odds that we get a fresh entries with non
2011 * working hardware. The cost for doing so comes out mostly with
2012 * the wash as hardware, working or not, will need to do the
2013 * invalidation before.
2014 */
2015 drm_clflush_virt_range(&buf[0], num_entries * sizeof(buf[0]));
2016
2017 /*
2018 * We assume that any event reflects a change in context flow
2019 * and merits a fresh timeslice. We reinstall the timer after
2020 * inspecting the queue to see if we need to resumbit.
2021 */
2022 if (*prev != *execlists->active) { /* elide lite-restores */
2023 struct intel_context *prev_ce = NULL((void *)0), *active_ce = NULL((void *)0);
2024
2025 /*
2026 * Note the inherent discrepancy between the HW runtime,
2027 * recorded as part of the context switch, and the CPU
2028 * adjustment for active contexts. We have to hope that
2029 * the delay in processing the CS event is very small
2030 * and consistent. It works to our advantage to have
2031 * the CPU adjustment _undershoot_ (i.e. start later than)
2032 * the CS timestamp so we never overreport the runtime
2033 * and correct overselves later when updating from HW.
2034 */
2035 if (*prev)
2036 prev_ce = (*prev)->context;
2037 if (*execlists->active)
2038 active_ce = (*execlists->active)->context;
2039 if (prev_ce != active_ce) {
2040 if (prev_ce)
2041 lrc_runtime_stop(prev_ce);
2042 if (active_ce)
2043 lrc_runtime_start(active_ce);
2044 }
2045 new_timeslice(execlists);
2046 }
2047
2048 return inactive;
2049}
2050
2051static void post_process_csb(struct i915_request **port,
2052 struct i915_request **last)
2053{
2054 while (port != last)
2055 execlists_schedule_out(*port++);
2056}
2057
2058static void __execlists_hold(struct i915_request *rq)
2059{
2060 DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
2061
2062 do {
2063 struct i915_dependency *p;
2064
2065 if (i915_request_is_active(rq))
2066 __i915_request_unsubmit(rq);
2067
2068 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2069 list_move_tail(&rq->sched.link,
2070 &rq->engine->sched_engine->hold);
2071 i915_request_set_hold(rq);
2072 RQ_TRACE(rq, "on hold\n")do { const struct i915_request *rq__ = (rq); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (rq__->
engine); do { } while (0); } while (0); } while (0)
;
2073
2074 for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->wait_link
) *__mptr = ((&(rq)->sched.waiters_list)->next); (
__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), wait_link) );}); &p->wait_link != (&(rq)->
sched.waiters_list); p = ({ const __typeof( ((__typeof(*p) *)
0)->wait_link ) *__mptr = (p->wait_link.next); (__typeof
(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof(*p), wait_link
) );}))
{
2075 struct i915_request *w =
2076 container_of(p->waiter, typeof(*w), sched)({ const __typeof( ((typeof(*w) *)0)->sched ) *__mptr = (p
->waiter); (typeof(*w) *)( (char *)__mptr - __builtin_offsetof
(typeof(*w), sched) );})
;
2077
2078 if (p->flags & I915_DEPENDENCY_WEAK(1UL << (2)))
2079 continue;
2080
2081 /* Leave semaphores spinning on the other engines */
2082 if (w->engine != rq->engine)
2083 continue;
2084
2085 if (!i915_request_is_ready(w))
2086 continue;
2087
2088 if (__i915_request_is_complete(w))
2089 continue;
2090
2091 if (i915_request_on_hold(w))
2092 continue;
2093
2094 list_move_tail(&w->sched.link, &list);
2095 }
2096
2097 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link)(list_empty(&list) ? ((void *)0) : ({ const __typeof( ((typeof
(*rq) *)0)->sched.link ) *__mptr = ((&list)->next);
(typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(typeof(
*rq), sched.link) );}))
;
2098 } while (rq);
2099}
2100
2101static bool_Bool execlists_hold(struct intel_engine_cs *engine,
2102 struct i915_request *rq)
2103{
2104 if (i915_request_on_hold(rq))
2105 return false0;
2106
2107 spin_lock_irq(&engine->sched_engine->lock)mtx_enter(&engine->sched_engine->lock);
2108
2109 if (__i915_request_is_complete(rq)) { /* too late! */
2110 rq = NULL((void *)0);
2111 goto unlock;
2112 }
2113
2114 /*
2115 * Transfer this request onto the hold queue to prevent it
2116 * being resumbitted to HW (and potentially completed) before we have
2117 * released it. Since we may have already submitted following
2118 * requests, we need to remove those as well.
2119 */
2120 GEM_BUG_ON(i915_request_on_hold(rq))((void)0);
2121 GEM_BUG_ON(rq->engine != engine)((void)0);
2122 __execlists_hold(rq);
2123 GEM_BUG_ON(list_empty(&engine->sched_engine->hold))((void)0);
2124
2125unlock:
2126 spin_unlock_irq(&engine->sched_engine->lock)mtx_leave(&engine->sched_engine->lock);
2127 return rq;
2128}
2129
2130static bool_Bool hold_request(const struct i915_request *rq)
2131{
2132 struct i915_dependency *p;
2133 bool_Bool result = false0;
2134
2135 /*
2136 * If one of our ancestors is on hold, we must also be on hold,
2137 * otherwise we will bypass it and execute before it.
2138 */
2139 rcu_read_lock();
2140 for_each_signaler(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->signal_link
) *__mptr = ((&(rq)->sched.signalers_list)->next);
(__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), signal_link) );}); &p->signal_link != (&(rq)
->sched.signalers_list); p = ({ const __typeof( ((__typeof
(*p) *)0)->signal_link ) *__mptr = (p->signal_link.next
); (__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), signal_link) );}))
{
2141 const struct i915_request *s =
2142 container_of(p->signaler, typeof(*s), sched)({ const __typeof( ((typeof(*s) *)0)->sched ) *__mptr = (p
->signaler); (typeof(*s) *)( (char *)__mptr - __builtin_offsetof
(typeof(*s), sched) );})
;
2143
2144 if (s->engine != rq->engine)
2145 continue;
2146
2147 result = i915_request_on_hold(s);
2148 if (result)
2149 break;
2150 }
2151 rcu_read_unlock();
2152
2153 return result;
2154}
2155
2156static void __execlists_unhold(struct i915_request *rq)
2157{
2158 DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
2159
2160 do {
2161 struct i915_dependency *p;
2162
2163 RQ_TRACE(rq, "hold release\n")do { const struct i915_request *rq__ = (rq); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (rq__->
engine); do { } while (0); } while (0); } while (0)
;
2164
2165 GEM_BUG_ON(!i915_request_on_hold(rq))((void)0);
2166 GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit))((void)0);
2167
2168 i915_request_clear_hold(rq);
2169 list_move_tail(&rq->sched.link,
2170 i915_sched_lookup_priolist(rq->engine->sched_engine,
2171 rq_prio(rq)));
2172 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2173
2174 /* Also release any children on this engine that are ready */
2175 for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(*p) *)0)->wait_link
) *__mptr = ((&(rq)->sched.waiters_list)->next); (
__typeof(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*p), wait_link) );}); &p->wait_link != (&(rq)->
sched.waiters_list); p = ({ const __typeof( ((__typeof(*p) *)
0)->wait_link ) *__mptr = (p->wait_link.next); (__typeof
(*p) *)( (char *)__mptr - __builtin_offsetof(__typeof(*p), wait_link
) );}))
{
2176 struct i915_request *w =
2177 container_of(p->waiter, typeof(*w), sched)({ const __typeof( ((typeof(*w) *)0)->sched ) *__mptr = (p
->waiter); (typeof(*w) *)( (char *)__mptr - __builtin_offsetof
(typeof(*w), sched) );})
;
2178
2179 if (p->flags & I915_DEPENDENCY_WEAK(1UL << (2)))
2180 continue;
2181
2182 if (w->engine != rq->engine)
2183 continue;
2184
2185 if (!i915_request_on_hold(w))
2186 continue;
2187
2188 /* Check that no other parents are also on hold */
2189 if (hold_request(w))
2190 continue;
2191
2192 list_move_tail(&w->sched.link, &list);
2193 }
2194
2195 rq = list_first_entry_or_null(&list, typeof(*rq), sched.link)(list_empty(&list) ? ((void *)0) : ({ const __typeof( ((typeof
(*rq) *)0)->sched.link ) *__mptr = ((&list)->next);
(typeof(*rq) *)( (char *)__mptr - __builtin_offsetof(typeof(
*rq), sched.link) );}))
;
2196 } while (rq);
2197}
2198
2199static void execlists_unhold(struct intel_engine_cs *engine,
2200 struct i915_request *rq)
2201{
2202 spin_lock_irq(&engine->sched_engine->lock)mtx_enter(&engine->sched_engine->lock);
2203
2204 /*
2205 * Move this request back to the priority queue, and all of its
2206 * children and grandchildren that were suspended along with it.
2207 */
2208 __execlists_unhold(rq);
2209
2210 if (rq_prio(rq) > engine->sched_engine->queue_priority_hint) {
2211 engine->sched_engine->queue_priority_hint = rq_prio(rq);
2212 tasklet_hi_schedule(&engine->sched_engine->tasklet);
2213 }
2214
2215 spin_unlock_irq(&engine->sched_engine->lock)mtx_leave(&engine->sched_engine->lock);
2216}
2217
2218struct execlists_capture {
2219 struct work_struct work;
2220 struct i915_request *rq;
2221 struct i915_gpu_coredump *error;
2222};
2223
2224static void execlists_capture_work(struct work_struct *work)
2225{
2226 struct execlists_capture *cap = container_of(work, typeof(*cap), work)({ const __typeof( ((typeof(*cap) *)0)->work ) *__mptr = (
work); (typeof(*cap) *)( (char *)__mptr - __builtin_offsetof(
typeof(*cap), work) );})
;
2227 const gfp_t gfp = __GFP_KSWAPD_RECLAIM0x0002 | __GFP_RETRY_MAYFAIL0 |
2228 __GFP_NOWARN0;
2229 struct intel_engine_cs *engine = cap->rq->engine;
2230 struct intel_gt_coredump *gt = cap->error->gt;
2231 struct intel_engine_capture_vma *vma;
2232
2233 /* Compress all the objects attached to the request, slow! */
2234 vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2235 if (vma) {
2236 struct i915_vma_compress *compress =
2237 i915_vma_capture_prepare(gt);
2238
2239 intel_engine_coredump_add_vma(gt->engine, vma, compress);
2240 i915_vma_capture_finish(gt, compress);
2241 }
2242
2243 gt->simulated = gt->engine->simulated;
2244 cap->error->simulated = gt->simulated;
2245
2246 /* Publish the error state, and announce it to the world */
2247 i915_error_state_store(cap->error);
2248 i915_gpu_coredump_put(cap->error);
2249
2250 /* Return this request and all that depend upon it for signaling */
2251 execlists_unhold(engine, cap->rq);
2252 i915_request_put(cap->rq);
2253
2254 kfree(cap);
2255}
2256
2257static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
2258{
2259 const gfp_t gfp = GFP_ATOMIC0x0002 | __GFP_NOWARN0;
2260 struct execlists_capture *cap;
2261
2262 cap = kmalloc(sizeof(*cap), gfp);
2263 if (!cap)
2264 return NULL((void *)0);
2265
2266 cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2267 if (!cap->error)
2268 goto err_cap;
2269
2270 cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp, CORE_DUMP_FLAG_NONE0x0);
2271 if (!cap->error->gt)
2272 goto err_gpu;
2273
2274 cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp, CORE_DUMP_FLAG_NONE0x0);
2275 if (!cap->error->gt->engine)
2276 goto err_gt;
2277
2278 cap->error->gt->engine->hung = true1;
2279
2280 return cap;
2281
2282err_gt:
2283 kfree(cap->error->gt);
2284err_gpu:
2285 kfree(cap->error);
2286err_cap:
2287 kfree(cap);
2288 return NULL((void *)0);
2289}
2290
2291static struct i915_request *
2292active_context(struct intel_engine_cs *engine, u32 ccid)
2293{
2294 const struct intel_engine_execlists * const el = &engine->execlists;
2295 struct i915_request * const *port, *rq;
2296
2297 /*
2298 * Use the most recent result from process_csb(), but just in case
2299 * we trigger an error (via interrupt) before the first CS event has
2300 * been written, peek at the next submission.
2301 */
2302
2303 for (port = el->active; (rq = *port); port++) {
2304 if (rq->context->lrc.ccid == ccid) {
2305 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2306 "ccid:%x found at active:%zd\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2307 ccid, port - el->active)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2308 return rq;
2309 }
2310 }
2311
2312 for (port = el->pending; (rq = *port); port++) {
2313 if (rq->context->lrc.ccid == ccid) {
2314 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2315 "ccid:%x found at pending:%zd\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2316 ccid, port - el->pending)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2317 return rq;
2318 }
2319 }
2320
2321 ENGINE_TRACE(engine, "ccid:%x not found\n", ccid)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2322 return NULL((void *)0);
2323}
2324
2325static u32 active_ccid(struct intel_engine_cs *engine)
2326{
2327 return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x234 + 4) }))
;
2328}
2329
2330static void execlists_capture(struct intel_engine_cs *engine)
2331{
2332 struct execlists_capture *cap;
2333
2334 if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1)
2335 return;
2336
2337 /*
2338 * We need to _quickly_ capture the engine state before we reset.
2339 * We are inside an atomic section (softirq) here and we are delaying
2340 * the forced preemption event.
2341 */
2342 cap = capture_regs(engine);
2343 if (!cap)
2344 return;
2345
2346 spin_lock_irq(&engine->sched_engine->lock)mtx_enter(&engine->sched_engine->lock);
2347 cap->rq = active_context(engine, active_ccid(engine));
2348 if (cap->rq) {
2349 cap->rq = active_request(cap->rq->context->timeline, cap->rq);
2350 cap->rq = i915_request_get_rcu(cap->rq);
2351 }
2352 spin_unlock_irq(&engine->sched_engine->lock)mtx_leave(&engine->sched_engine->lock);
2353 if (!cap->rq)
2354 goto err_free;
2355
2356 /*
2357 * Remove the request from the execlists queue, and take ownership
2358 * of the request. We pass it to our worker who will _slowly_ compress
2359 * all the pages the _user_ requested for debugging their batch, after
2360 * which we return it to the queue for signaling.
2361 *
2362 * By removing them from the execlists queue, we also remove the
2363 * requests from being processed by __unwind_incomplete_requests()
2364 * during the intel_engine_reset(), and so they will *not* be replayed
2365 * afterwards.
2366 *
2367 * Note that because we have not yet reset the engine at this point,
2368 * it is possible for the request that we have identified as being
2369 * guilty, did in fact complete and we will then hit an arbitration
2370 * point allowing the outstanding preemption to succeed. The likelihood
2371 * of that is very low (as capturing of the engine registers should be
2372 * fast enough to run inside an irq-off atomic section!), so we will
2373 * simply hold that request accountable for being non-preemptible
2374 * long enough to force the reset.
2375 */
2376 if (!execlists_hold(engine, cap->rq))
2377 goto err_rq;
2378
2379 INIT_WORK(&cap->work, execlists_capture_work);
2380 schedule_work(&cap->work);
2381 return;
2382
2383err_rq:
2384 i915_request_put(cap->rq);
2385err_free:
2386 i915_gpu_coredump_put(cap->error);
2387 kfree(cap);
2388}
2389
2390static void execlists_reset(struct intel_engine_cs *engine, const char *msg)
2391{
2392 const unsigned int bit = I915_RESET_ENGINE2 + engine->id;
2393 unsigned long *lock = &engine->gt->reset.flags;
2394
2395 if (!intel_has_reset_engine(engine->gt))
2396 return;
2397
2398 if (test_and_set_bit(bit, lock))
2399 return;
2400
2401 ENGINE_TRACE(engine, "reset for %s\n", msg)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2402
2403 /* Mark this tasklet as disabled to avoid waiting for it to complete */
2404 tasklet_disable_nosync(&engine->sched_engine->tasklet);
2405
2406 ring_set_paused(engine, 1); /* Freeze the current request in place */
2407 execlists_capture(engine);
2408 intel_engine_reset(engine, msg);
2409
2410 tasklet_enable(&engine->sched_engine->tasklet);
2411 clear_and_wake_up_bit(bit, lock);
2412}
2413
2414static bool_Bool preempt_timeout(const struct intel_engine_cs *const engine)
2415{
2416 const struct timeout *t = &engine->execlists.preempt;
2417
2418 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT640)
2419 return false0;
2420
2421 if (!timer_expired(t))
2422 return false0;
2423
2424 return engine->execlists.pending[0];
2425}
2426
2427/*
2428 * Check the unread Context Status Buffers and manage the submission of new
2429 * contexts to the ELSP accordingly.
2430 */
2431static void execlists_submission_tasklet(struct tasklet_struct *t)
2432{
2433 struct i915_sched_engine *sched_engine =
2434 from_tasklet(sched_engine, t, tasklet)({ const __typeof( ((typeof(*sched_engine) *)0)->tasklet )
*__mptr = (t); (typeof(*sched_engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*sched_engine), tasklet) );})
;
2435 struct intel_engine_cs * const engine = sched_engine->private_data;
2436 struct i915_request *post[2 * EXECLIST_MAX_PORTS2];
2437 struct i915_request **inactive;
2438
2439 rcu_read_lock();
2440 inactive = process_csb(engine, post);
2441 GEM_BUG_ON(inactive - post > ARRAY_SIZE(post))((void)0);
2442
2443 if (unlikely(preempt_timeout(engine))__builtin_expect(!!(preempt_timeout(engine)), 0)) {
2444 const struct i915_request *rq = *engine->execlists.active;
2445
2446 /*
2447 * If after the preempt-timeout expired, we are still on the
2448 * same active request/context as before we initiated the
2449 * preemption, reset the engine.
2450 *
2451 * However, if we have processed a CS event to switch contexts,
2452 * but not yet processed the CS event for the pending
2453 * preemption, reset the timer allowing the new context to
2454 * gracefully exit.
2455 */
2456 cancel_timer(&engine->execlists.preempt);
2457 if (rq == engine->execlists.preempt_target)
2458 engine->execlists.error_interrupt |= ERROR_PREEMPT(1UL << (30));
2459 else
2460 set_timer_ms(&engine->execlists.preempt,
2461 active_preempt_timeout(engine, rq));
2462 }
2463
2464 if (unlikely(READ_ONCE(engine->execlists.error_interrupt))__builtin_expect(!!(({ typeof(engine->execlists.error_interrupt
) __tmp = *(volatile typeof(engine->execlists.error_interrupt
) *)&(engine->execlists.error_interrupt); membar_datadep_consumer
(); __tmp; })), 0)
) {
2465 const char *msg;
2466
2467 /* Generate the error message in priority wrt to the user! */
2468 if (engine->execlists.error_interrupt & GENMASK(15, 0)(((~0UL) >> (64 - (15) - 1)) & ((~0UL) << (0)
))
)
2469 msg = "CS error"; /* thrown by a user payload */
2470 else if (engine->execlists.error_interrupt & ERROR_CSB(1UL << (31)))
2471 msg = "invalid CSB event";
2472 else if (engine->execlists.error_interrupt & ERROR_PREEMPT(1UL << (30)))
2473 msg = "preemption time out";
2474 else
2475 msg = "internal error";
2476
2477 engine->execlists.error_interrupt = 0;
2478 execlists_reset(engine, msg);
2479 }
2480
2481 if (!engine->execlists.pending[0]) {
2482 execlists_dequeue_irq(engine);
2483 start_timeslice(engine);
2484 }
2485
2486 post_process_csb(post, inactive);
2487 rcu_read_unlock();
2488}
2489
2490static void execlists_irq_handler(struct intel_engine_cs *engine, u16 iir)
2491{
2492 bool_Bool tasklet = false0;
2493
2494 if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT)__builtin_expect(!!(iir & ((u32)((1UL << (3)) + 0))
), 0)
) {
2495 u32 eir;
2496
2497 /* Upper 16b are the enabling mask, rsvd for internal errors */
2498 eir = ENGINE_READ(engine, RING_EIR)intel_uncore_read(((engine))->uncore, ((const i915_reg_t){
.reg = (((engine)->mmio_base) + 0xb0) }))
& GENMASK(15, 0)(((~0UL) >> (64 - (15) - 1)) & ((~0UL) << (0)
))
;
2499 ENGINE_TRACE(engine, "CS error: %x\n", eir)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2500
2501 /* Disable the error interrupt until after the reset */
2502 if (likely(eir)__builtin_expect(!!(eir), 1)) {
2503 ENGINE_WRITE(engine, RING_EMR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb4) }), (~0u))
;
2504 ENGINE_WRITE(engine, RING_EIR, eir)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb0) }), (eir))
;
2505 WRITE_ONCE(engine->execlists.error_interrupt, eir)({ typeof(engine->execlists.error_interrupt) __tmp = (eir)
; *(volatile typeof(engine->execlists.error_interrupt) *)&
(engine->execlists.error_interrupt) = __tmp; __tmp; })
;
2506 tasklet = true1;
2507 }
2508 }
2509
2510 if (iir & GT_WAIT_SEMAPHORE_INTERRUPT((u32)((1UL << (11)) + 0))) {
2511 WRITE_ONCE(engine->execlists.yield,({ typeof(engine->execlists.yield) __tmp = (__raw_uncore_read32
(((engine))->uncore, ((const i915_reg_t){ .reg = (((engine
)->mmio_base) + 0x234 + 4) }))); *(volatile typeof(engine->
execlists.yield) *)&(engine->execlists.yield) = __tmp;
__tmp; })
2512 ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI))({ typeof(engine->execlists.yield) __tmp = (__raw_uncore_read32
(((engine))->uncore, ((const i915_reg_t){ .reg = (((engine
)->mmio_base) + 0x234 + 4) }))); *(volatile typeof(engine->
execlists.yield) *)&(engine->execlists.yield) = __tmp;
__tmp; })
;
2513 ENGINE_TRACE(engine, "semaphore yield: %08x\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2514 engine->execlists.yield)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2515 if (del_timer(&engine->execlists.timer)timeout_del((&engine->execlists.timer)))
2516 tasklet = true1;
2517 }
2518
2519 if (iir & GT_CONTEXT_SWITCH_INTERRUPT(1 << 8))
2520 tasklet = true1;
2521
2522 if (iir & GT_RENDER_USER_INTERRUPT(1 << 0))
2523 intel_engine_signal_breadcrumbs(engine);
2524
2525 if (tasklet)
2526 tasklet_hi_schedule(&engine->sched_engine->tasklet);
2527}
2528
2529static void __execlists_kick(struct intel_engine_execlists *execlists)
2530{
2531 struct intel_engine_cs *engine =
2532 container_of(execlists, typeof(*engine), execlists)({ const __typeof( ((typeof(*engine) *)0)->execlists ) *__mptr
= (execlists); (typeof(*engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*engine), execlists) );})
;
2533
2534 /* Kick the tasklet for some interrupt coalescing and reset handling */
2535 tasklet_hi_schedule(&engine->sched_engine->tasklet);
2536}
2537
2538#define execlists_kick(t, member)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->member ) *__mptr = (t); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, member) );}))
\
2539 __execlists_kick(container_of(t, struct intel_engine_execlists, member)({ const __typeof( ((struct intel_engine_execlists *)0)->member
) *__mptr = (t); (struct intel_engine_execlists *)( (char *)
__mptr - __builtin_offsetof(struct intel_engine_execlists, member
) );})
)
2540
2541static void execlists_timeslice(void *arg)
2542{
2543 struct timeout *timer = (struct timeout *)arg;
2544 execlists_kick(timer, timer)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->timer ) *__mptr = (timer); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, timer) );}))
;
2545}
2546
2547static void execlists_preempt(void *arg)
2548{
2549 struct timeout *timer = (struct timeout *)arg;
2550 execlists_kick(timer, preempt)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists
*)0)->preempt ) *__mptr = (timer); (struct intel_engine_execlists
*)( (char *)__mptr - __builtin_offsetof(struct intel_engine_execlists
, preempt) );}))
;
2551}
2552
2553static void queue_request(struct intel_engine_cs *engine,
2554 struct i915_request *rq)
2555{
2556 GEM_BUG_ON(!list_empty(&rq->sched.link))((void)0);
2557 list_add_tail(&rq->sched.link,
2558 i915_sched_lookup_priolist(engine->sched_engine,
2559 rq_prio(rq)));
2560 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2561}
2562
2563static bool_Bool submit_queue(struct intel_engine_cs *engine,
2564 const struct i915_request *rq)
2565{
2566 struct i915_sched_engine *sched_engine = engine->sched_engine;
2567
2568 if (rq_prio(rq) <= sched_engine->queue_priority_hint)
2569 return false0;
2570
2571 sched_engine->queue_priority_hint = rq_prio(rq);
2572 return true1;
2573}
2574
2575static bool_Bool ancestor_on_hold(const struct intel_engine_cs *engine,
2576 const struct i915_request *rq)
2577{
2578 GEM_BUG_ON(i915_request_on_hold(rq))((void)0);
2579 return !list_empty(&engine->sched_engine->hold) && hold_request(rq);
2580}
2581
2582static void execlists_submit_request(struct i915_request *request)
2583{
2584 struct intel_engine_cs *engine = request->engine;
2585 unsigned long flags;
2586
2587 /* Will be called from irq-context when using foreign fences. */
2588 spin_lock_irqsave(&engine->sched_engine->lock, flags)do { flags = 0; mtx_enter(&engine->sched_engine->lock
); } while (0)
;
2589
2590 if (unlikely(ancestor_on_hold(engine, request))__builtin_expect(!!(ancestor_on_hold(engine, request)), 0)) {
2591 RQ_TRACE(request, "ancestor on hold\n")do { const struct i915_request *rq__ = (request); do { const struct
intel_engine_cs *e__ __attribute__((__unused__)) = (rq__->
engine); do { } while (0); } while (0); } while (0)
;
2592 list_add_tail(&request->sched.link,
2593 &engine->sched_engine->hold);
2594 i915_request_set_hold(request);
2595 } else {
2596 queue_request(engine, request);
2597
2598 GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine))((void)0);
2599 GEM_BUG_ON(list_empty(&request->sched.link))((void)0);
2600
2601 if (submit_queue(engine, request))
2602 __execlists_kick(&engine->execlists);
2603 }
2604
2605 spin_unlock_irqrestore(&engine->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&engine->sched_engine->
lock); } while (0)
;
2606}
2607
2608static int
2609__execlists_context_pre_pin(struct intel_context *ce,
2610 struct intel_engine_cs *engine,
2611 struct i915_gem_ww_ctx *ww, void **vaddr)
2612{
2613 int err;
2614
2615 err = lrc_pre_pin(ce, engine, ww, vaddr);
2616 if (err)
2617 return err;
2618
2619 if (!__test_and_set_bit(CONTEXT_INIT_BIT2, &ce->flags)) {
2620 lrc_init_state(ce, engine, *vaddr);
2621
2622 __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size);
2623 }
2624
2625 return 0;
2626}
2627
2628static int execlists_context_pre_pin(struct intel_context *ce,
2629 struct i915_gem_ww_ctx *ww,
2630 void **vaddr)
2631{
2632 return __execlists_context_pre_pin(ce, ce->engine, ww, vaddr);
2633}
2634
2635static int execlists_context_pin(struct intel_context *ce, void *vaddr)
2636{
2637 return lrc_pin(ce, ce->engine, vaddr);
2638}
2639
2640static int execlists_context_alloc(struct intel_context *ce)
2641{
2642 return lrc_alloc(ce, ce->engine);
2643}
2644
2645static void execlists_context_cancel_request(struct intel_context *ce,
2646 struct i915_request *rq)
2647{
2648 struct intel_engine_cs *engine = NULL((void *)0);
2649
2650 i915_request_active_engine(rq, &engine);
2651
2652 if (engine && intel_engine_pulse(engine))
2653 intel_gt_handle_error(engine->gt, engine->mask, 0,
2654 "request cancellation by %s",
2655 curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_p->ps_comm);
2656}
2657
2658static struct intel_context *
2659execlists_create_parallel(struct intel_engine_cs **engines,
2660 unsigned int num_siblings,
2661 unsigned int width)
2662{
2663 struct intel_context *parent = NULL((void *)0), *ce, *err;
1
'parent' initialized to a null pointer value
2664 int i;
2665
2666 GEM_BUG_ON(num_siblings != 1)((void)0);
2667
2668 for (i = 0; i < width; ++i) {
2
Assuming 'i' is >= 'width'
3
Loop condition is false. Execution continues on line 2681
2669 ce = intel_context_create(engines[i]);
2670 if (IS_ERR(ce)) {
2671 err = ce;
2672 goto unwind;
2673 }
2674
2675 if (i == 0)
2676 parent = ce;
2677 else
2678 intel_context_bind_parent_child(parent, ce);
2679 }
2680
2681 parent->parallel.fence_context = dma_fence_context_alloc(1);
4
Dereference of null pointer
2682
2683 intel_context_set_nopreempt(parent);
2684 for_each_child(parent, ce)for (ce = ({ const __typeof( ((__typeof(*ce) *)0)->parallel
.child_link ) *__mptr = ((&(parent)->parallel.child_list
)->next); (__typeof(*ce) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*ce), parallel.child_link) );}); &ce->parallel
.child_link != (&(parent)->parallel.child_list); ce = (
{ const __typeof( ((__typeof(*ce) *)0)->parallel.child_link
) *__mptr = (ce->parallel.child_link.next); (__typeof(*ce
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*ce), parallel
.child_link) );}))
2685 intel_context_set_nopreempt(ce);
2686
2687 return parent;
2688
2689unwind:
2690 if (parent)
2691 intel_context_put(parent);
2692 return err;
2693}
2694
2695static const struct intel_context_ops execlists_context_ops = {
2696 .flags = COPS_HAS_INFLIGHT(1UL << (0)) | COPS_RUNTIME_CYCLES(1UL << (1)),
2697
2698 .alloc = execlists_context_alloc,
2699
2700 .cancel_request = execlists_context_cancel_request,
2701
2702 .pre_pin = execlists_context_pre_pin,
2703 .pin = execlists_context_pin,
2704 .unpin = lrc_unpin,
2705 .post_unpin = lrc_post_unpin,
2706
2707 .enter = intel_context_enter_engine,
2708 .exit = intel_context_exit_engine,
2709
2710 .reset = lrc_reset,
2711 .destroy = lrc_destroy,
2712
2713 .create_parallel = execlists_create_parallel,
2714 .create_virtual = execlists_create_virtual,
2715};
2716
2717static int emit_pdps(struct i915_request *rq)
2718{
2719 const struct intel_engine_cs * const engine = rq->engine;
2720 struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
2721 int err, i;
2722 u32 *cs;
2723
2724 GEM_BUG_ON(intel_vgpu_active(rq->engine->i915))((void)0);
2725
2726 /*
2727 * Beware ye of the dragons, this sequence is magic!
2728 *
2729 * Small changes to this sequence can cause anything from
2730 * GPU hangs to forcewake errors and machine lockups!
2731 */
2732
2733 cs = intel_ring_begin(rq, 2);
2734 if (IS_ERR(cs))
2735 return PTR_ERR(cs);
2736
2737 *cs++ = MI_ARB_ON_OFF(((0x0) << 29) | (0x08) << 23 | (0)) | MI_ARB_DISABLE(0<<0);
2738 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
2739 intel_ring_advance(rq, cs);
2740
2741 /* Flush any residual operations from the context load */
2742 err = engine->emit_flush(rq, EMIT_FLUSH(1UL << (1)));
2743 if (err)
2744 return err;
2745
2746 /* Magic required to prevent forcewake errors! */
2747 err = engine->emit_flush(rq, EMIT_INVALIDATE(1UL << (0)));
2748 if (err)
2749 return err;
2750
2751 cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES4 + 2);
2752 if (IS_ERR(cs))
2753 return PTR_ERR(cs);
2754
2755 /* Ensure the LRI have landed before we invalidate & continue */
2756 *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES)(((0x0) << 29) | (0x22) << 23 | (2*(2 * 4)-1)) | MI_LRI_FORCE_POSTED(1<<12);
2757 for (i = GEN8_3LVL_PDPES4; i--; ) {
2758 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
2759 u32 base = engine->mmio_base;
2760
2761 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)((const i915_reg_t){ .reg = ((base) + 0x270 + (i) * 8 + 4) }));
2762 *cs++ = upper_32_bits(pd_daddr)((u32)(((pd_daddr) >> 16) >> 16));
2763 *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)((const i915_reg_t){ .reg = ((base) + 0x270 + (i) * 8) }));
2764 *cs++ = lower_32_bits(pd_daddr)((u32)(pd_daddr));
2765 }
2766 *cs++ = MI_ARB_ON_OFF(((0x0) << 29) | (0x08) << 23 | (0)) | MI_ARB_ENABLE(1<<0);
2767 intel_ring_advance(rq, cs);
2768
2769 intel_ring_advance(rq, cs);
2770
2771 return 0;
2772}
2773
2774static int execlists_request_alloc(struct i915_request *request)
2775{
2776 int ret;
2777
2778 GEM_BUG_ON(!intel_context_is_pinned(request->context))((void)0);
2779
2780 /*
2781 * Flush enough space to reduce the likelihood of waiting after
2782 * we start building the request - in which case we will just
2783 * have to repeat work.
2784 */
2785 request->reserved_space += EXECLISTS_REQUEST_SIZE64;
2786
2787 /*
2788 * Note that after this point, we have committed to using
2789 * this request as it is being used to both track the
2790 * state of engine initialisation and liveness of the
2791 * golden renderstate above. Think twice before you try
2792 * to cancel/unwind this request now.
2793 */
2794
2795 if (!i915_vm_is_4lvl(request->context->vm)) {
2796 ret = emit_pdps(request);
2797 if (ret)
2798 return ret;
2799 }
2800
2801 /* Unconditionally invalidate GPU caches and TLBs. */
2802 ret = request->engine->emit_flush(request, EMIT_INVALIDATE(1UL << (0)));
2803 if (ret)
2804 return ret;
2805
2806 request->reserved_space -= EXECLISTS_REQUEST_SIZE64;
2807 return 0;
2808}
2809
2810static void reset_csb_pointers(struct intel_engine_cs *engine)
2811{
2812 struct intel_engine_execlists * const execlists = &engine->execlists;
2813 const unsigned int reset_value = execlists->csb_size - 1;
2814
2815 ring_set_paused(engine, 0);
2816
2817 /*
2818 * Sometimes Icelake forgets to reset its pointers on a GPU reset.
2819 * Bludgeon them with a mmio update to be sure.
2820 */
2821 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
2822 0xffff << 16 | reset_value << 8 | reset_value)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
;
2823 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x3a0) })))
;
2824
2825 /*
2826 * After a reset, the HW starts writing into CSB entry [0]. We
2827 * therefore have to set our HEAD pointer back one entry so that
2828 * the *first* entry we check is entry 0. To complicate this further,
2829 * as we don't wait for the first interrupt after reset, we have to
2830 * fake the HW write to point back to the last entry so that our
2831 * inline comparison of our cached head position against the last HW
2832 * write works even before the first interrupt.
2833 */
2834 execlists->csb_head = reset_value;
2835 WRITE_ONCE(*execlists->csb_write, reset_value)({ typeof(*execlists->csb_write) __tmp = (reset_value); *(
volatile typeof(*execlists->csb_write) *)&(*execlists->
csb_write) = __tmp; __tmp; })
;
2836 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0); /* Make sure this is visible to HW (paranoia?) */
2837
2838 /* Check that the GPU does indeed update the CSB entries! */
2839 memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64))__builtin_memset((execlists->csb_status), (-1), ((reset_value
+ 1) * sizeof(u64)))
;
2840 drm_clflush_virt_range(execlists->csb_status,
2841 execlists->csb_size *
2842 sizeof(execlists->csb_status));
2843
2844 /* Once more for luck and our trusty paranoia */
2845 ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
2846 0xffff << 16 | reset_value << 8 | reset_value)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff <<
16 | reset_value << 8 | reset_value))
;
2847 ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x3a0) })))
;
2848
2849 GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value)((void)0);
2850}
2851
2852static void sanitize_hwsp(struct intel_engine_cs *engine)
2853{
2854 struct intel_timeline *tl;
2855
2856 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)for (tl = ({ const __typeof( ((__typeof(*tl) *)0)->engine_link
) *__mptr = ((&engine->status_page.timelines)->next
); (__typeof(*tl) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tl), engine_link) );}); &tl->engine_link != (&engine
->status_page.timelines); tl = ({ const __typeof( ((__typeof
(*tl) *)0)->engine_link ) *__mptr = (tl->engine_link.next
); (__typeof(*tl) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tl), engine_link) );}))
2857 intel_timeline_reset_seqno(tl);
2858}
2859
2860static void execlists_sanitize(struct intel_engine_cs *engine)
2861{
2862 GEM_BUG_ON(execlists_active(&engine->execlists))((void)0);
2863
2864 /*
2865 * Poison residual state on resume, in case the suspend didn't!
2866 *
2867 * We have to assume that across suspend/resume (or other loss
2868 * of control) that the contents of our pinned buffers has been
2869 * lost, replaced by garbage. Since this doesn't always happen,
2870 * let's poison such state so that we more quickly spot when
2871 * we falsely assume it has been preserved.
2872 */
2873 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
2874 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE)__builtin_memset((engine->status_page.addr), (0xdb), ((1 <<
12)))
;
2875
2876 reset_csb_pointers(engine);
2877
2878 /*
2879 * The kernel_context HWSP is stored in the status_page. As above,
2880 * that may be lost on resume/initialisation, and so we need to
2881 * reset the value in the HWSP.
2882 */
2883 sanitize_hwsp(engine);
2884
2885 /* And scrub the dirty cachelines for the HWSP */
2886 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE(1 << 12));
2887
2888 intel_engine_reset_pinned_contexts(engine);
2889}
2890
2891static void enable_error_interrupt(struct intel_engine_cs *engine)
2892{
2893 u32 status;
2894
2895 engine->execlists.error_interrupt = 0;
2896 ENGINE_WRITE(engine, RING_EMR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb4) }), (~0u))
;
2897 ENGINE_WRITE(engine, RING_EIR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb0) }), (~0u))
; /* clear all existing errors */
2898
2899 status = ENGINE_READ(engine, RING_ESR)intel_uncore_read(((engine))->uncore, ((const i915_reg_t){
.reg = (((engine)->mmio_base) + 0xb8) }))
;
2900 if (unlikely(status)__builtin_expect(!!(status), 0)) {
2901 drm_err(&engine->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "engine '%s' resumed still in error: %08x\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name, status)
2902 "engine '%s' resumed still in error: %08x\n",printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "engine '%s' resumed still in error: %08x\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name, status)
2903 engine->name, status)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "engine '%s' resumed still in error: %08x\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , engine
->name, status)
;
2904 __intel_gt_reset(engine->gt, engine->mask);
2905 }
2906
2907 /*
2908 * On current gen8+, we have 2 signals to play with
2909 *
2910 * - I915_ERROR_INSTUCTION (bit 0)
2911 *
2912 * Generate an error if the command parser encounters an invalid
2913 * instruction
2914 *
2915 * This is a fatal error.
2916 *
2917 * - CP_PRIV (bit 2)
2918 *
2919 * Generate an error on privilege violation (where the CP replaces
2920 * the instruction with a no-op). This also fires for writes into
2921 * read-only scratch pages.
2922 *
2923 * This is a non-fatal error, parsing continues.
2924 *
2925 * * there are a few others defined for odd HW that we do not use
2926 *
2927 * Since CP_PRIV fires for cases where we have chosen to ignore the
2928 * error (as the HW is validating and suppressing the mistakes), we
2929 * only unmask the instruction error bit.
2930 */
2931 ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xb4) }), (~(1 << 0
)))
;
2932}
2933
2934static void enable_execlists(struct intel_engine_cs *engine)
2935{
2936 u32 mode;
2937
2938 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
2939
2940 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
2941
2942 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) >= 11)
2943 mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)({ typeof((1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })
;
2944 else
2945 mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)({ typeof((1 << 15)) _a = ((1 << 15)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })
;
2946 ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode)__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x29c) }), (mode))
;
2947
2948 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x9c) }), ((({ if (__builtin_constant_p
((((u32)((1UL << (8)) + 0))))) do { } while (0); if (__builtin_constant_p
(0)) do { } while (0); if (__builtin_constant_p((((u32)((1UL <<
(8)) + 0)))) && __builtin_constant_p(0)) do { } while
(0); (((((u32)((1UL << (8)) + 0)))) << 16 | (0))
; }))))
;
2949
2950 ENGINE_WRITE_FW(engine,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
2951 RING_HWS_PGA,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
2952 i915_ggtt_offset(engine->status_page.vma))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
;
2953 ENGINE_POSTING_READ(engine, RING_HWS_PGA)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) })))
;
2954
2955 enable_error_interrupt(engine);
2956}
2957
2958static int execlists_resume(struct intel_engine_cs *engine)
2959{
2960 intel_mocs_init_engine(engine);
2961 intel_breadcrumbs_reset(engine->breadcrumbs);
2962
2963 enable_execlists(engine);
2964
2965 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE(1UL << (11)))
2966 xehp_enable_ccs_engines(engine);
2967
2968 return 0;
2969}
2970
2971static void execlists_reset_prepare(struct intel_engine_cs *engine)
2972{
2973 ENGINE_TRACE(engine, "depth<-%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
2974 atomic_read(&engine->sched_engine->tasklet.count))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
2975
2976 /*
2977 * Prevent request submission to the hardware until we have
2978 * completed the reset in i915_gem_reset_finish(). If a request
2979 * is completed by one engine, it may then queue a request
2980 * to a second via its execlists->tasklet *just* as we are
2981 * calling engine->resume() and also writing the ELSP.
2982 * Turning off the execlists->tasklet until the reset is over
2983 * prevents the race.
2984 */
2985 __tasklet_disable_sync_once(&engine->sched_engine->tasklet);
2986 GEM_BUG_ON(!reset_in_progress(engine))((void)0);
2987
2988 /*
2989 * We stop engines, otherwise we might get failed reset and a
2990 * dead gpu (on elk). Also as modern gpu as kbl can suffer
2991 * from system hang if batchbuffer is progressing when
2992 * the reset is issued, regardless of READY_TO_RESET ack.
2993 * Thus assume it is best to stop engines on all gens
2994 * where we have a gpu reset.
2995 *
2996 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
2997 *
2998 * FIXME: Wa for more modern gens needs to be validated
2999 */
3000 ring_set_paused(engine, 1);
3001 intel_engine_stop_cs(engine);
3002
3003 /*
3004 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
3005 * to wait for any pending mi force wakeups
3006 */
3007 if (IS_GRAPHICS_VER(engine->i915, 11, 12)(((&(engine->i915)->__runtime)->graphics.ip.ver)
>= (11) && ((&(engine->i915)->__runtime
)->graphics.ip.ver) <= (12))
)
3008 intel_engine_wait_for_pending_mi_fw(engine);
3009
3010 engine->execlists.reset_ccid = active_ccid(engine);
3011}
3012
3013static struct i915_request **
3014reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
3015{
3016 struct intel_engine_execlists * const execlists = &engine->execlists;
3017
3018 drm_clflush_virt_range(execlists->csb_write,
3019 sizeof(execlists->csb_write[0]));
3020
3021 inactive = process_csb(engine, inactive); /* drain preemption events */
3022
3023 /* Following the reset, we need to reload the CSB read/write pointers */
3024 reset_csb_pointers(engine);
3025
3026 return inactive;
3027}
3028
3029static void
3030execlists_reset_active(struct intel_engine_cs *engine, bool_Bool stalled)
3031{
3032 struct intel_context *ce;
3033 struct i915_request *rq;
3034 u32 head;
3035
3036 /*
3037 * Save the currently executing context, even if we completed
3038 * its request, it was still running at the time of the
3039 * reset and will have been clobbered.
3040 */
3041 rq = active_context(engine, engine->execlists.reset_ccid);
3042 if (!rq)
3043 return;
3044
3045 ce = rq->context;
3046 GEM_BUG_ON(!i915_vma_is_pinned(ce->state))((void)0);
3047
3048 if (__i915_request_is_complete(rq)) {
3049 /* Idle context; tidy up the ring so we can restart afresh */
3050 head = intel_ring_wrap(ce->ring, rq->tail);
3051 goto out_replay;
3052 }
3053
3054 /* We still have requests in-flight; the engine should be active */
3055 GEM_BUG_ON(!intel_engine_pm_is_awake(engine))((void)0);
3056
3057 /* Context has requests still in-flight; it should not be idle! */
3058 GEM_BUG_ON(i915_active_is_idle(&ce->active))((void)0);
3059
3060 rq = active_request(ce->timeline, rq);
3061 head = intel_ring_wrap(ce->ring, rq->head);
3062 GEM_BUG_ON(head == ce->ring->tail)((void)0);
3063
3064 /*
3065 * If this request hasn't started yet, e.g. it is waiting on a
3066 * semaphore, we need to avoid skipping the request or else we
3067 * break the signaling chain. However, if the context is corrupt
3068 * the request will not restart and we will be stuck with a wedged
3069 * device. It is quite often the case that if we issue a reset
3070 * while the GPU is loading the context image, that the context
3071 * image becomes corrupt.
3072 *
3073 * Otherwise, if we have not started yet, the request should replay
3074 * perfectly and we do not need to flag the result as being erroneous.
3075 */
3076 if (!__i915_request_has_started(rq))
3077 goto out_replay;
3078
3079 /*
3080 * If the request was innocent, we leave the request in the ELSP
3081 * and will try to replay it on restarting. The context image may
3082 * have been corrupted by the reset, in which case we may have
3083 * to service a new GPU hang, but more likely we can continue on
3084 * without impact.
3085 *
3086 * If the request was guilty, we presume the context is corrupt
3087 * and have to at least restore the RING register in the context
3088 * image back to the expected values to skip over the guilty request.
3089 */
3090 __i915_request_reset(rq, stalled);
3091
3092 /*
3093 * We want a simple context + ring to execute the breadcrumb update.
3094 * We cannot rely on the context being intact across the GPU hang,
3095 * so clear it and rebuild just what we need for the breadcrumb.
3096 * All pending requests for this context will be zapped, and any
3097 * future request will be after userspace has had the opportunity
3098 * to recreate its own state.
3099 */
3100out_replay:
3101 ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3102 head, ce->ring->tail)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3103 lrc_reset_regs(ce, engine);
3104 ce->lrc.lrca = lrc_update_regs(ce, engine, head);
3105}
3106
3107static void execlists_reset_csb(struct intel_engine_cs *engine, bool_Bool stalled)
3108{
3109 struct intel_engine_execlists * const execlists = &engine->execlists;
3110 struct i915_request *post[2 * EXECLIST_MAX_PORTS2];
3111 struct i915_request **inactive;
3112
3113 rcu_read_lock();
3114 inactive = reset_csb(engine, post);
3115
3116 execlists_reset_active(engine, true1);
3117
3118 inactive = cancel_port_requests(execlists, inactive);
3119 post_process_csb(post, inactive);
3120 rcu_read_unlock();
3121}
3122
3123static void execlists_reset_rewind(struct intel_engine_cs *engine, bool_Bool stalled)
3124{
3125 unsigned long flags;
3126
3127 ENGINE_TRACE(engine, "\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3128
3129 /* Process the csb, find the guilty context and throw away */
3130 execlists_reset_csb(engine, stalled);
3131
3132 /* Push back any incomplete requests for replay after the reset. */
3133 rcu_read_lock();
3134 spin_lock_irqsave(&engine->sched_engine->lock, flags)do { flags = 0; mtx_enter(&engine->sched_engine->lock
); } while (0)
;
3135 __unwind_incomplete_requests(engine);
3136 spin_unlock_irqrestore(&engine->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&engine->sched_engine->
lock); } while (0)
;
3137 rcu_read_unlock();
3138}
3139
3140static void nop_submission_tasklet(struct tasklet_struct *t)
3141{
3142 struct i915_sched_engine *sched_engine =
3143 from_tasklet(sched_engine, t, tasklet)({ const __typeof( ((typeof(*sched_engine) *)0)->tasklet )
*__mptr = (t); (typeof(*sched_engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*sched_engine), tasklet) );})
;
3144 struct intel_engine_cs * const engine = sched_engine->private_data;
3145
3146 /* The driver is wedged; don't process any more events. */
3147 WRITE_ONCE(engine->sched_engine->queue_priority_hint, INT_MIN)({ typeof(engine->sched_engine->queue_priority_hint) __tmp
= ((-0x7fffffff-1)); *(volatile typeof(engine->sched_engine
->queue_priority_hint) *)&(engine->sched_engine->
queue_priority_hint) = __tmp; __tmp; })
;
3148}
3149
3150static void execlists_reset_cancel(struct intel_engine_cs *engine)
3151{
3152 struct intel_engine_execlists * const execlists = &engine->execlists;
3153 struct i915_sched_engine * const sched_engine = engine->sched_engine;
3154 struct i915_request *rq, *rn;
3155 struct rb_node *rb;
3156 unsigned long flags;
3157
3158 ENGINE_TRACE(engine, "\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3159
3160 /*
3161 * Before we call engine->cancel_requests(), we should have exclusive
3162 * access to the submission state. This is arranged for us by the
3163 * caller disabling the interrupt generation, the tasklet and other
3164 * threads that may then access the same state, giving us a free hand
3165 * to reset state. However, we still need to let lockdep be aware that
3166 * we know this state may be accessed in hardirq context, so we
3167 * disable the irq around this manipulation and we want to keep
3168 * the spinlock focused on its duties and not accidentally conflate
3169 * coverage to the submission's irq state. (Similarly, although we
3170 * shouldn't need to disable irq around the manipulation of the
3171 * submission's irq state, we also wish to remind ourselves that
3172 * it is irq state.)
3173 */
3174 execlists_reset_csb(engine, true1);
3175
3176 rcu_read_lock();
3177 spin_lock_irqsave(&engine->sched_engine->lock, flags)do { flags = 0; mtx_enter(&engine->sched_engine->lock
); } while (0)
;
3178
3179 /* Mark all executing requests as skipped. */
3180 list_for_each_entry(rq, &engine->sched_engine->requests, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&engine->sched_engine->requests)->
next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), sched.link) );}); &rq->sched.link != (
&engine->sched_engine->requests); rq = ({ const __typeof
( ((__typeof(*rq) *)0)->sched.link ) *__mptr = (rq->sched
.link.next); (__typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*rq), sched.link) );}))
3181 i915_request_put(i915_request_mark_eio(rq));
3182 intel_engine_signal_breadcrumbs(engine);
3183
3184 /* Flush the queued requests to the timeline list (for retiring). */
3185 while ((rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
)) {
3186 struct i915_priolist *p = to_priolist(rb);
3187
3188 priolist_for_each_request_consume(rq, rn, p)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&(p)->requests)->next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&(p)->requests
); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *)0)->
sched.link ) *__mptr = (rn->sched.link.next); (__typeof(*rn
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
{
3189 if (i915_request_mark_eio(rq)) {
3190 __i915_request_submit(rq);
3191 i915_request_put(rq);
3192 }
3193 }
3194
3195 rb_erase_cached(&p->node, &sched_engine->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&sched_engine
->queue)->rb_root), (&p->node))
;
3196 i915_priolist_free(p);
3197 }
3198
3199 /* On-hold requests will be flushed to timeline upon their release */
3200 list_for_each_entry(rq, &sched_engine->hold, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&sched_engine->hold)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&sched_engine->
hold); rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched
.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched.link
) );}))
3201 i915_request_put(i915_request_mark_eio(rq));
3202
3203 /* Cancel all attached virtual engines */
3204 while ((rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
)) {
3205 struct virtual_engine *ve =
3206 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
3207
3208 rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists
->virtual)->rb_root), (rb))
;
3209 RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
3210
3211 spin_lock(&ve->base.sched_engine->lock)mtx_enter(&ve->base.sched_engine->lock);
3212 rq = fetch_and_zero(&ve->request)({ typeof(*&ve->request) __T = *(&ve->request);
*(&ve->request) = (typeof(*&ve->request))0; __T
; })
;
3213 if (rq) {
3214 if (i915_request_mark_eio(rq)) {
3215 rq->engine = engine;
3216 __i915_request_submit(rq);
3217 i915_request_put(rq);
3218 }
3219 i915_request_put(rq);
3220
3221 ve->base.sched_engine->queue_priority_hint = INT_MIN(-0x7fffffff-1);
3222 }
3223 spin_unlock(&ve->base.sched_engine->lock)mtx_leave(&ve->base.sched_engine->lock);
3224 }
3225
3226 /* Remaining _unready_ requests will be nop'ed when submitted */
3227
3228 sched_engine->queue_priority_hint = INT_MIN(-0x7fffffff-1);
3229 sched_engine->queue = RB_ROOT_CACHED(struct rb_root_cached) { ((void *)0) };
3230
3231 GEM_BUG_ON(__tasklet_is_enabled(&engine->sched_engine->tasklet))((void)0);
3232 engine->sched_engine->tasklet.callback = nop_submission_tasklet;
3233
3234 spin_unlock_irqrestore(&engine->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&engine->sched_engine->
lock); } while (0)
;
3235 rcu_read_unlock();
3236}
3237
3238static void execlists_reset_finish(struct intel_engine_cs *engine)
3239{
3240 struct intel_engine_execlists * const execlists = &engine->execlists;
3241
3242 /*
3243 * After a GPU reset, we may have requests to replay. Do so now while
3244 * we still have the forcewake to be sure that the GPU is not allowed
3245 * to sleep before we restart and reload a context.
3246 *
3247 * If the GPU reset fails, the engine may still be alive with requests
3248 * inflight. We expect those to complete, or for the device to be
3249 * reset as the next level of recovery, and as a final resort we
3250 * will declare the device wedged.
3251 */
3252 GEM_BUG_ON(!reset_in_progress(engine))((void)0);
3253
3254 /* And kick in case we missed a new request submission. */
3255 if (__tasklet_enable(&engine->sched_engine->tasklet))
3256 __execlists_kick(execlists);
3257
3258 ENGINE_TRACE(engine, "depth->%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3259 atomic_read(&engine->sched_engine->tasklet.count))do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3260}
3261
3262static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
3263{
3264 ENGINE_WRITE(engine, RING_IMR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xa8) }), (~(engine->
irq_enable_mask | engine->irq_keep_mask)))
3265 ~(engine->irq_enable_mask | engine->irq_keep_mask))intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xa8) }), (~(engine->
irq_enable_mask | engine->irq_keep_mask)))
;
3266 ENGINE_POSTING_READ(engine, RING_IMR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0xa8) })))
;
3267}
3268
3269static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
3270{
3271 ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask)intel_uncore_write(((engine))->uncore, ((const i915_reg_t)
{ .reg = (((engine)->mmio_base) + 0xa8) }), (~engine->irq_keep_mask
))
;
3272}
3273
3274static void execlists_park(struct intel_engine_cs *engine)
3275{
3276 cancel_timer(&engine->execlists.timer);
3277 cancel_timer(&engine->execlists.preempt);
3278}
3279
3280static void add_to_engine(struct i915_request *rq)
3281{
3282 lockdep_assert_held(&rq->engine->sched_engine->lock)do { (void)(&rq->engine->sched_engine->lock); } while
(0)
;
3283 list_move_tail(&rq->sched.link, &rq->engine->sched_engine->requests);
3284}
3285
3286static void remove_from_engine(struct i915_request *rq)
3287{
3288 struct intel_engine_cs *engine, *locked;
3289
3290 /*
3291 * Virtual engines complicate acquiring the engine timeline lock,
3292 * as their rq->engine pointer is not stable until under that
3293 * engine lock. The simple ploy we use is to take the lock then
3294 * check that the rq still belongs to the newly locked engine.
3295 */
3296 locked = READ_ONCE(rq->engine)({ typeof(rq->engine) __tmp = *(volatile typeof(rq->engine
) *)&(rq->engine); membar_datadep_consumer(); __tmp; }
)
;
3297 spin_lock_irq(&locked->sched_engine->lock)mtx_enter(&locked->sched_engine->lock);
3298 while (unlikely(locked != (engine = READ_ONCE(rq->engine)))__builtin_expect(!!(locked != (engine = ({ typeof(rq->engine
) __tmp = *(volatile typeof(rq->engine) *)&(rq->engine
); membar_datadep_consumer(); __tmp; }))), 0)
) {
3299 spin_unlock(&locked->sched_engine->lock)mtx_leave(&locked->sched_engine->lock);
3300 spin_lock(&engine->sched_engine->lock)mtx_enter(&engine->sched_engine->lock);
3301 locked = engine;
3302 }
3303 list_del_init(&rq->sched.link);
3304
3305 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3306 clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
3307
3308 /* Prevent further __await_execution() registering a cb, then flush */
3309 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3310
3311 spin_unlock_irq(&locked->sched_engine->lock)mtx_leave(&locked->sched_engine->lock);
3312
3313 i915_request_notify_execute_cb_imm(rq);
3314}
3315
3316static bool_Bool can_preempt(struct intel_engine_cs *engine)
3317{
3318 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) > 8)
3319 return true1;
3320
3321 /* GPGPU on bdw requires extra w/a; not implemented */
3322 return engine->class != RENDER_CLASS0;
3323}
3324
3325static void kick_execlists(const struct i915_request *rq, int prio)
3326{
3327 struct intel_engine_cs *engine = rq->engine;
3328 struct i915_sched_engine *sched_engine = engine->sched_engine;
3329 const struct i915_request *inflight;
3330
3331 /*
3332 * We only need to kick the tasklet once for the high priority
3333 * new context we add into the queue.
3334 */
3335 if (prio <= sched_engine->queue_priority_hint)
3336 return;
3337
3338 rcu_read_lock();
3339
3340 /* Nothing currently active? We're overdue for a submission! */
3341 inflight = execlists_active(&engine->execlists);
3342 if (!inflight)
3343 goto unlock;
3344
3345 /*
3346 * If we are already the currently executing context, don't
3347 * bother evaluating if we should preempt ourselves.
3348 */
3349 if (inflight->context == rq->context)
3350 goto unlock;
3351
3352 ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3353 "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3354 prio,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3355 rq->fence.context, rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3356 inflight->fence.context, inflight->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
3357 inflight->sched.attr.priority)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (engine); do { } while (0); } while (0)
;
3358
3359 sched_engine->queue_priority_hint = prio;
3360
3361 /*
3362 * Allow preemption of low -> normal -> high, but we do
3363 * not allow low priority tasks to preempt other low priority
3364 * tasks under the impression that latency for low priority
3365 * tasks does not matter (as much as background throughput),
3366 * so kiss.
3367 */
3368 if (prio >= max(I915_PRIORITY_NORMAL, rq_prio(inflight))(((I915_PRIORITY_NORMAL)>(rq_prio(inflight)))?(I915_PRIORITY_NORMAL
):(rq_prio(inflight)))
)
3369 tasklet_hi_schedule(&sched_engine->tasklet);
3370
3371unlock:
3372 rcu_read_unlock();
3373}
3374
3375static void execlists_set_default_submission(struct intel_engine_cs *engine)
3376{
3377 engine->submit_request = execlists_submit_request;
3378 engine->sched_engine->schedule = i915_schedule;
3379 engine->sched_engine->kick_backend = kick_execlists;
3380 engine->sched_engine->tasklet.callback = execlists_submission_tasklet;
3381}
3382
3383static void execlists_shutdown(struct intel_engine_cs *engine)
3384{
3385 /* Synchronise with residual timers and any softirq they raise */
3386 del_timer_sync(&engine->execlists.timer)timeout_del_barrier((&engine->execlists.timer));
3387 del_timer_sync(&engine->execlists.preempt)timeout_del_barrier((&engine->execlists.preempt));
3388 tasklet_kill(&engine->sched_engine->tasklet);
3389}
3390
3391static void execlists_release(struct intel_engine_cs *engine)
3392{
3393 engine->sanitize = NULL((void *)0); /* no longer in control, nothing to sanitize */
3394
3395 execlists_shutdown(engine);
3396
3397 intel_engine_cleanup_common(engine);
3398 lrc_fini_wa_ctx(engine);
3399}
3400
3401static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine,
3402 ktime_t *now)
3403{
3404 struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
3405 ktime_t total = stats->total;
3406
3407 /*
3408 * If the engine is executing something at the moment
3409 * add it to the total.
3410 */
3411 *now = ktime_get();
3412 if (READ_ONCE(stats->active)({ typeof(stats->active) __tmp = *(volatile typeof(stats->
active) *)&(stats->active); membar_datadep_consumer();
__tmp; })
)
3413 total = ktime_add(total, ktime_sub(*now, stats->start));
3414
3415 return total;
3416}
3417
3418static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine,
3419 ktime_t *now)
3420{
3421 struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
3422 unsigned int seq;
3423 ktime_t total;
3424
3425 do {
3426 seq = read_seqcount_begin(&stats->lock);
3427 total = __execlists_engine_busyness(engine, now);
3428 } while (read_seqcount_retry(&stats->lock, seq));
3429
3430 return total;
3431}
3432
3433static void
3434logical_ring_default_vfuncs(struct intel_engine_cs *engine)
3435{
3436 /* Default vfuncs which can be overridden by each engine. */
3437
3438 engine->resume = execlists_resume;
3439
3440 engine->cops = &execlists_context_ops;
3441 engine->request_alloc = execlists_request_alloc;
3442 engine->add_active_request = add_to_engine;
3443 engine->remove_active_request = remove_from_engine;
3444
3445 engine->reset.prepare = execlists_reset_prepare;
3446 engine->reset.rewind = execlists_reset_rewind;
3447 engine->reset.cancel = execlists_reset_cancel;
3448 engine->reset.finish = execlists_reset_finish;
3449
3450 engine->park = execlists_park;
3451 engine->unpark = NULL((void *)0);
3452
3453 engine->emit_flush = gen8_emit_flush_xcs;
3454 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
3455 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
3456 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) >= 12) {
3457 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
3458 engine->emit_flush = gen12_emit_flush_xcs;
3459 }
3460 engine->set_default_submission = execlists_set_default_submission;
3461
3462 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) < 11) {
3463 engine->irq_enable = gen8_logical_ring_enable_irq;
3464 engine->irq_disable = gen8_logical_ring_disable_irq;
3465 } else {
3466 /*
3467 * TODO: On Gen11 interrupt masks need to be clear
3468 * to allow C6 entry. Keep interrupts enabled at
3469 * and take the hit of generating extra interrupts
3470 * until a more refined solution exists.
3471 */
3472 }
3473 intel_engine_set_irq_handler(engine, execlists_irq_handler);
3474
3475 engine->flags |= I915_ENGINE_SUPPORTS_STATS(1UL << (1));
3476 if (!intel_vgpu_active(engine->i915)) {
3477 engine->flags |= I915_ENGINE_HAS_SEMAPHORES(1UL << (3));
3478 if (can_preempt(engine)) {
3479 engine->flags |= I915_ENGINE_HAS_PREEMPTION(1UL << (2));
3480 if (CONFIG_DRM_I915_TIMESLICE_DURATION1)
3481 engine->flags |= I915_ENGINE_HAS_TIMESLICES(1UL << (4));
3482 }
3483 }
3484
3485 if (GRAPHICS_VER_FULL(engine->i915)(((&(engine->i915)->__runtime)->graphics.ip.ver)
<< 8 | ((&(engine->i915)->__runtime)->graphics
.ip.rel))
>= IP_VER(12, 50)((12) << 8 | (50))) {
3486 if (intel_engine_has_preemption(engine))
3487 engine->emit_bb_start = gen125_emit_bb_start;
3488 else
3489 engine->emit_bb_start = gen125_emit_bb_start_noarb;
3490 } else {
3491 if (intel_engine_has_preemption(engine))
3492 engine->emit_bb_start = gen8_emit_bb_start;
3493 else
3494 engine->emit_bb_start = gen8_emit_bb_start_noarb;
3495 }
3496
3497 engine->busyness = execlists_engine_busyness;
3498}
3499
3500static void logical_ring_default_irqs(struct intel_engine_cs *engine)
3501{
3502 unsigned int shift = 0;
3503
3504 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) < 11) {
3505 const u8 irq_shifts[] = {
3506 [RCS0] = GEN8_RCS_IRQ_SHIFT0,
3507 [BCS0] = GEN8_BCS_IRQ_SHIFT16,
3508 [VCS0] = GEN8_VCS0_IRQ_SHIFT0,
3509 [VCS1] = GEN8_VCS1_IRQ_SHIFT16,
3510 [VECS0] = GEN8_VECS_IRQ_SHIFT0,
3511 };
3512
3513 shift = irq_shifts[engine->id];
3514 }
3515
3516 engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT(1 << 0) << shift;
3517 engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT(1 << 8) << shift;
3518 engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT((u32)((1UL << (3)) + 0)) << shift;
3519 engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT((u32)((1UL << (11)) + 0)) << shift;
3520}
3521
3522static void rcs_submission_override(struct intel_engine_cs *engine)
3523{
3524 switch (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver)) {
3525 case 12:
3526 engine->emit_flush = gen12_emit_flush_rcs;
3527 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
3528 break;
3529 case 11:
3530 engine->emit_flush = gen11_emit_flush_rcs;
3531 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
3532 break;
3533 default:
3534 engine->emit_flush = gen8_emit_flush_rcs;
3535 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
3536 break;
3537 }
3538}
3539
3540int intel_execlists_submission_setup(struct intel_engine_cs *engine)
3541{
3542 struct intel_engine_execlists * const execlists = &engine->execlists;
3543 struct drm_i915_privateinteldrm_softc *i915 = engine->i915;
3544 struct intel_uncore *uncore = engine->uncore;
3545 u32 base = engine->mmio_base;
3546
3547 tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet);
3548#ifdef __linux__
3549 timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
3550 timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
3551#else
3552 timeout_set(&engine->execlists.timer, execlists_timeslice,
3553 &engine->execlists.timer);
3554 timeout_set(&engine->execlists.preempt, execlists_preempt,
3555 &engine->execlists.preempt);
3556#endif
3557
3558 logical_ring_default_vfuncs(engine);
3559 logical_ring_default_irqs(engine);
3560
3561 seqcount_init(&engine->stats.execlists.lock);
3562
3563 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE(1UL << (9)))
3564 rcs_submission_override(engine);
3565
3566 lrc_init_wa_ctx(engine);
3567
3568 if (HAS_LOGICAL_RING_ELSQ(i915)((&(i915)->__info)->has_logical_ring_elsq)) {
3569 execlists->submit_reg = uncore->regs +
3570 i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)((const i915_reg_t){ .reg = ((base) + 0x510) }));
3571 execlists->ctrl_reg = uncore->regs +
3572 i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)((const i915_reg_t){ .reg = ((base) + 0x550) }));
3573
3574 engine->fw_domain = intel_uncore_forcewake_for_reg(engine->uncore,
3575 RING_EXECLIST_CONTROL(engine->mmio_base)((const i915_reg_t){ .reg = ((engine->mmio_base) + 0x550) }
)
,
3576 FW_REG_WRITE(2));
3577 } else {
3578 execlists->submit_reg = uncore->regs +
3579 i915_mmio_reg_offset(RING_ELSP(base)((const i915_reg_t){ .reg = ((base) + 0x230) }));
3580 }
3581
3582 execlists->csb_status =
3583 (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX0x10];
3584
3585 execlists->csb_write =
3586 &engine->status_page.addr[INTEL_HWS_CSB_WRITE_INDEX(i915)(((&(i915)->__runtime)->graphics.ip.ver) >= 11 ?
0x2f : 0x1f)
];
3587
3588 if (GRAPHICS_VER(i915)((&(i915)->__runtime)->graphics.ip.ver) < 11)
3589 execlists->csb_size = GEN8_CSB_ENTRIES6;
3590 else
3591 execlists->csb_size = GEN11_CSB_ENTRIES12;
3592
3593 engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0)(((~0UL) >> (64 - (64 - 2) - 1)) & ((~0UL) <<
(0)))
;
3594 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) >= 11 &&
3595 GRAPHICS_VER_FULL(engine->i915)(((&(engine->i915)->__runtime)->graphics.ip.ver)
<< 8 | ((&(engine->i915)->__runtime)->graphics
.ip.rel))
< IP_VER(12, 50)((12) << 8 | (50))) {
3596 execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT48 - 32);
3597 execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT61 - 32);
3598 }
3599
3600 /* Finally, take ownership and responsibility for cleanup! */
3601 engine->sanitize = execlists_sanitize;
3602 engine->release = execlists_release;
3603
3604 return 0;
3605}
3606
3607static struct list_head *virtual_queue(struct virtual_engine *ve)
3608{
3609 return &ve->base.sched_engine->default_priolist.requests;
3610}
3611
3612static void rcu_virtual_context_destroy(struct work_struct *wrk)
3613{
3614 struct virtual_engine *ve =
3615 container_of(wrk, typeof(*ve), rcu.work)({ const __typeof( ((typeof(*ve) *)0)->rcu.work ) *__mptr =
(wrk); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(
typeof(*ve), rcu.work) );})
;
3616 unsigned int n;
3617
3618 GEM_BUG_ON(ve->context.inflight)((void)0);
3619
3620 /* Preempt-to-busy may leave a stale request behind. */
3621 if (unlikely(ve->request)__builtin_expect(!!(ve->request), 0)) {
3622 struct i915_request *old;
3623
3624 spin_lock_irq(&ve->base.sched_engine->lock)mtx_enter(&ve->base.sched_engine->lock);
3625
3626 old = fetch_and_zero(&ve->request)({ typeof(*&ve->request) __T = *(&ve->request);
*(&ve->request) = (typeof(*&ve->request))0; __T
; })
;
3627 if (old) {
3628 GEM_BUG_ON(!__i915_request_is_complete(old))((void)0);
3629 __i915_request_submit(old);
3630 i915_request_put(old);
3631 }
3632
3633 spin_unlock_irq(&ve->base.sched_engine->lock)mtx_leave(&ve->base.sched_engine->lock);
3634 }
3635
3636 /*
3637 * Flush the tasklet in case it is still running on another core.
3638 *
3639 * This needs to be done before we remove ourselves from the siblings'
3640 * rbtrees as in the case it is running in parallel, it may reinsert
3641 * the rb_node into a sibling.
3642 */
3643 tasklet_kill(&ve->base.sched_engine->tasklet);
3644
3645 /* Decouple ourselves from the siblings, no more access allowed. */
3646 for (n = 0; n < ve->num_siblings; n++) {
3647 struct intel_engine_cs *sibling = ve->siblings[n];
3648 struct rb_node *node = &ve->nodes[sibling->id].rb;
3649
3650 if (RB_EMPTY_NODE(node)((node)->__entry.rbe_parent == node))
3651 continue;
3652
3653 spin_lock_irq(&sibling->sched_engine->lock)mtx_enter(&sibling->sched_engine->lock);
3654
3655 /* Detachment is lazily performed in the sched_engine->tasklet */
3656 if (!RB_EMPTY_NODE(node)((node)->__entry.rbe_parent == node))
3657 rb_erase_cached(node, &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (node))
;
3658
3659 spin_unlock_irq(&sibling->sched_engine->lock)mtx_leave(&sibling->sched_engine->lock);
3660 }
3661 GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.sched_engine->tasklet))((void)0);
3662 GEM_BUG_ON(!list_empty(virtual_queue(ve)))((void)0);
3663
3664 lrc_fini(&ve->context);
3665 intel_context_fini(&ve->context);
3666
3667 if (ve->base.breadcrumbs)
3668 intel_breadcrumbs_put(ve->base.breadcrumbs);
3669 if (ve->base.sched_engine)
3670 i915_sched_engine_put(ve->base.sched_engine);
3671 intel_engine_free_request_pool(&ve->base);
3672
3673 kfree(ve);
3674}
3675
3676static void virtual_context_destroy(struct kref *kref)
3677{
3678 struct virtual_engine *ve =
3679 container_of(kref, typeof(*ve), context.ref)({ const __typeof( ((typeof(*ve) *)0)->context.ref ) *__mptr
= (kref); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), context.ref) );})
;
3680
3681 GEM_BUG_ON(!list_empty(&ve->context.signals))((void)0);
3682
3683 /*
3684 * When destroying the virtual engine, we have to be aware that
3685 * it may still be in use from an hardirq/softirq context causing
3686 * the resubmission of a completed request (background completion
3687 * due to preempt-to-busy). Before we can free the engine, we need
3688 * to flush the submission code and tasklets that are still potentially
3689 * accessing the engine. Flushing the tasklets requires process context,
3690 * and since we can guard the resubmit onto the engine with an RCU read
3691 * lock, we can delegate the free of the engine to an RCU worker.
3692 */
3693 INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
3694 queue_rcu_work(system_wq, &ve->rcu);
3695}
3696
3697static void virtual_engine_initial_hint(struct virtual_engine *ve)
3698{
3699 int swp;
3700
3701 /*
3702 * Pick a random sibling on starting to help spread the load around.
3703 *
3704 * New contexts are typically created with exactly the same order
3705 * of siblings, and often started in batches. Due to the way we iterate
3706 * the array of sibling when submitting requests, sibling[0] is
3707 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
3708 * randomised across the system, we also help spread the load by the
3709 * first engine we inspect being different each time.
3710 *
3711 * NB This does not force us to execute on this engine, it will just
3712 * typically be the first we inspect for submission.
3713 */
3714 swp = prandom_u32_max(ve->num_siblings);
3715 if (swp)
3716 swap(ve->siblings[swp], ve->siblings[0])do { __typeof(ve->siblings[swp]) __tmp = (ve->siblings[
swp]); (ve->siblings[swp]) = (ve->siblings[0]); (ve->
siblings[0]) = __tmp; } while(0)
;
3717}
3718
3719static int virtual_context_alloc(struct intel_context *ce)
3720{
3721 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
3722
3723 return lrc_alloc(ce, ve->siblings[0]);
3724}
3725
3726static int virtual_context_pre_pin(struct intel_context *ce,
3727 struct i915_gem_ww_ctx *ww,
3728 void **vaddr)
3729{
3730 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
3731
3732 /* Note: we must use a real engine class for setting up reg state */
3733 return __execlists_context_pre_pin(ce, ve->siblings[0], ww, vaddr);
3734}
3735
3736static int virtual_context_pin(struct intel_context *ce, void *vaddr)
3737{
3738 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
3739
3740 return lrc_pin(ce, ve->siblings[0], vaddr);
3741}
3742
3743static void virtual_context_enter(struct intel_context *ce)
3744{
3745 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
3746 unsigned int n;
3747
3748 for (n = 0; n < ve->num_siblings; n++)
3749 intel_engine_pm_get(ve->siblings[n]);
3750
3751 intel_timeline_enter(ce->timeline);
3752}
3753
3754static void virtual_context_exit(struct intel_context *ce)
3755{
3756 struct virtual_engine *ve = container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
3757 unsigned int n;
3758
3759 intel_timeline_exit(ce->timeline);
3760
3761 for (n = 0; n < ve->num_siblings; n++)
3762 intel_engine_pm_put(ve->siblings[n]);
3763}
3764
3765static struct intel_engine_cs *
3766virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
3767{
3768 struct virtual_engine *ve = to_virtual_engine(engine);
3769
3770 if (sibling >= ve->num_siblings)
3771 return NULL((void *)0);
3772
3773 return ve->siblings[sibling];
3774}
3775
3776static const struct intel_context_ops virtual_context_ops = {
3777 .flags = COPS_HAS_INFLIGHT(1UL << (0)) | COPS_RUNTIME_CYCLES(1UL << (1)),
3778
3779 .alloc = virtual_context_alloc,
3780
3781 .cancel_request = execlists_context_cancel_request,
3782
3783 .pre_pin = virtual_context_pre_pin,
3784 .pin = virtual_context_pin,
3785 .unpin = lrc_unpin,
3786 .post_unpin = lrc_post_unpin,
3787
3788 .enter = virtual_context_enter,
3789 .exit = virtual_context_exit,
3790
3791 .destroy = virtual_context_destroy,
3792
3793 .get_sibling = virtual_get_sibling,
3794};
3795
3796static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
3797{
3798 struct i915_request *rq;
3799 intel_engine_mask_t mask;
3800
3801 rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
3802 if (!rq)
3803 return 0;
3804
3805 /* The rq is ready for submission; rq->execution_mask is now stable. */
3806 mask = rq->execution_mask;
3807 if (unlikely(!mask)__builtin_expect(!!(!mask), 0)) {
3808 /* Invalid selection, submit to a random engine in error */
3809 i915_request_set_error_once(rq, -ENODEV19);
3810 mask = ve->siblings[0]->mask;
3811 }
3812
3813 ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
3814 rq->fence.context, rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
3815 mask, ve->base.sched_engine->queue_priority_hint)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
;
3816
3817 return mask;
3818}
3819
3820static void virtual_submission_tasklet(struct tasklet_struct *t)
3821{
3822 struct i915_sched_engine *sched_engine =
3823 from_tasklet(sched_engine, t, tasklet)({ const __typeof( ((typeof(*sched_engine) *)0)->tasklet )
*__mptr = (t); (typeof(*sched_engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*sched_engine), tasklet) );})
;
3824 struct virtual_engine * const ve =
3825 (struct virtual_engine *)sched_engine->private_data;
3826 const int prio = READ_ONCE(sched_engine->queue_priority_hint)({ typeof(sched_engine->queue_priority_hint) __tmp = *(volatile
typeof(sched_engine->queue_priority_hint) *)&(sched_engine
->queue_priority_hint); membar_datadep_consumer(); __tmp; }
)
;
3827 intel_engine_mask_t mask;
3828 unsigned int n;
3829
3830 rcu_read_lock();
3831 mask = virtual_submission_mask(ve);
3832 rcu_read_unlock();
3833 if (unlikely(!mask)__builtin_expect(!!(!mask), 0))
3834 return;
3835
3836 for (n = 0; n < ve->num_siblings; n++) {
3837 struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n])({ typeof(ve->siblings[n]) __tmp = *(volatile typeof(ve->
siblings[n]) *)&(ve->siblings[n]); membar_datadep_consumer
(); __tmp; })
;
3838 struct ve_node * const node = &ve->nodes[sibling->id];
3839 struct rb_node **parent, *rb;
3840 bool_Bool first;
3841
3842 if (!READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
)
3843 break; /* already handled by a sibling's tasklet */
3844
3845 spin_lock_irq(&sibling->sched_engine->lock)mtx_enter(&sibling->sched_engine->lock);
3846
3847 if (unlikely(!(mask & sibling->mask))__builtin_expect(!!(!(mask & sibling->mask)), 0)) {
3848 if (!RB_EMPTY_NODE(&node->rb)((&node->rb)->__entry.rbe_parent == &node->rb
)
) {
3849 rb_erase_cached(&node->rb,linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
3850 &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
;
3851 RB_CLEAR_NODE(&node->rb)(((&node->rb))->__entry.rbe_parent = (&node->
rb))
;
3852 }
3853
3854 goto unlock_engine;
3855 }
3856
3857 if (unlikely(!RB_EMPTY_NODE(&node->rb))__builtin_expect(!!(!((&node->rb)->__entry.rbe_parent
== &node->rb)), 0)
) {
3858 /*
3859 * Cheat and avoid rebalancing the tree if we can
3860 * reuse this node in situ.
3861 */
3862 first = rb_first_cached(&sibling->execlists.virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), -1)
==
3863 &node->rb;
3864 if (prio == node->prio || (prio > node->prio && first))
3865 goto submit_engine;
3866
3867 rb_erase_cached(&node->rb, &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
;
3868 }
3869
3870 rb = NULL((void *)0);
3871 first = true1;
3872 parent = &sibling->execlists.virtual.rb_root.rb_node;
3873 while (*parent) {
3874 struct ve_node *other;
3875
3876 rb = *parent;
3877 other = rb_entry(rb, typeof(*other), rb)({ const __typeof( ((typeof(*other) *)0)->rb ) *__mptr = (
rb); (typeof(*other) *)( (char *)__mptr - __builtin_offsetof(
typeof(*other), rb) );})
;
3878 if (prio > other->prio) {
3879 parent = &rb->rb_left__entry.rbe_left;
3880 } else {
3881 parent = &rb->rb_right__entry.rbe_right;
3882 first = false0;
3883 }
3884 }
3885
3886 rb_link_node(&node->rb, rb, parent);
3887 rb_insert_color_cached(&node->rb,linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
3888 &sibling->execlists.virtual,linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
3889 first)linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling
->execlists.virtual)->rb_root), (&node->rb))
;
3890
3891submit_engine:
3892 GEM_BUG_ON(RB_EMPTY_NODE(&node->rb))((void)0);
3893 node->prio = prio;
3894 if (first && prio > sibling->sched_engine->queue_priority_hint)
3895 tasklet_hi_schedule(&sibling->sched_engine->tasklet);
3896
3897unlock_engine:
3898 spin_unlock_irq(&sibling->sched_engine->lock)mtx_leave(&sibling->sched_engine->lock);
3899
3900 if (intel_context_inflight(&ve->context)({ unsigned long __v = (unsigned long)(({ typeof((&ve->
context)->inflight) __tmp = *(volatile typeof((&ve->
context)->inflight) *)&((&ve->context)->inflight
); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof((
&ve->context)->inflight) __tmp = *(volatile typeof(
(&ve->context)->inflight) *)&((&ve->context
)->inflight); membar_datadep_consumer(); __tmp; })))(__v &
-(1UL << (3))); })
)
3901 break;
3902 }
3903}
3904
3905static void virtual_submit_request(struct i915_request *rq)
3906{
3907 struct virtual_engine *ve = to_virtual_engine(rq->engine);
3908 unsigned long flags;
3909
3910 ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
3911 rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
3912 rq->fence.seqno)do { const struct intel_engine_cs *e__ __attribute__((__unused__
)) = (&ve->base); do { } while (0); } while (0)
;
3913
3914 GEM_BUG_ON(ve->base.submit_request != virtual_submit_request)((void)0);
3915
3916 spin_lock_irqsave(&ve->base.sched_engine->lock, flags)do { flags = 0; mtx_enter(&ve->base.sched_engine->lock
); } while (0)
;
3917
3918 /* By the time we resubmit a request, it may be completed */
3919 if (__i915_request_is_complete(rq)) {
3920 __i915_request_submit(rq);
3921 goto unlock;
3922 }
3923
3924 if (ve->request) { /* background completion from preempt-to-busy */
3925 GEM_BUG_ON(!__i915_request_is_complete(ve->request))((void)0);
3926 __i915_request_submit(ve->request);
3927 i915_request_put(ve->request);
3928 }
3929
3930 ve->base.sched_engine->queue_priority_hint = rq_prio(rq);
3931 ve->request = i915_request_get(rq);
3932
3933 GEM_BUG_ON(!list_empty(virtual_queue(ve)))((void)0);
3934 list_move_tail(&rq->sched.link, virtual_queue(ve));
3935
3936 tasklet_hi_schedule(&ve->base.sched_engine->tasklet);
3937
3938unlock:
3939 spin_unlock_irqrestore(&ve->base.sched_engine->lock, flags)do { (void)(flags); mtx_leave(&ve->base.sched_engine->
lock); } while (0)
;
3940}
3941
3942static struct intel_context *
3943execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
3944 unsigned long flags)
3945{
3946 struct virtual_engine *ve;
3947 unsigned int n;
3948 int err;
3949
3950 ve = kzalloc(struct_size(ve, siblings, count)(sizeof(*(ve)) + ((count) * (sizeof(*(ve)->siblings)))), GFP_KERNEL(0x0001 | 0x0004));
3951 if (!ve)
3952 return ERR_PTR(-ENOMEM12);
3953
3954 ve->base.i915 = siblings[0]->i915;
3955 ve->base.gt = siblings[0]->gt;
3956 ve->base.uncore = siblings[0]->uncore;
3957 ve->base.id = -1;
3958
3959 ve->base.class = OTHER_CLASS4;
3960 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
3961 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
3962 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
3963
3964 /*
3965 * The decision on whether to submit a request using semaphores
3966 * depends on the saturated state of the engine. We only compute
3967 * this during HW submission of the request, and we need for this
3968 * state to be globally applied to all requests being submitted
3969 * to this engine. Virtual engines encompass more than one physical
3970 * engine and so we cannot accurately tell in advance if one of those
3971 * engines is already saturated and so cannot afford to use a semaphore
3972 * and be pessimized in priority for doing so -- if we are the only
3973 * context using semaphores after all other clients have stopped, we
3974 * will be starved on the saturated system. Such a global switch for
3975 * semaphores is less than ideal, but alas is the current compromise.
3976 */
3977 ve->base.saturated = ALL_ENGINES((intel_engine_mask_t)~0ul);
3978
3979 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
3980
3981 intel_engine_init_execlists(&ve->base);
3982
3983 ve->base.sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL2);
3984 if (!ve->base.sched_engine) {
3985 err = -ENOMEM12;
3986 goto err_put;
3987 }
3988 ve->base.sched_engine->private_data = &ve->base;
3989
3990 ve->base.cops = &virtual_context_ops;
3991 ve->base.request_alloc = execlists_request_alloc;
3992
3993 ve->base.sched_engine->schedule = i915_schedule;
3994 ve->base.sched_engine->kick_backend = kick_execlists;
3995 ve->base.submit_request = virtual_submit_request;
3996
3997 INIT_LIST_HEAD(virtual_queue(ve));
3998 tasklet_setup(&ve->base.sched_engine->tasklet, virtual_submission_tasklet);
3999
4000 intel_context_init(&ve->context, &ve->base);
4001
4002 ve->base.breadcrumbs = intel_breadcrumbs_create(NULL((void *)0));
4003 if (!ve->base.breadcrumbs) {
4004 err = -ENOMEM12;
4005 goto err_put;
4006 }
4007
4008 for (n = 0; n < count; n++) {
4009 struct intel_engine_cs *sibling = siblings[n];
4010
4011 GEM_BUG_ON(!is_power_of_2(sibling->mask))((void)0);
4012 if (sibling->mask & ve->base.mask) {
4013 DRM_DEBUG("duplicate %s entry in load balancer\n",___drm_dbg(((void *)0), DRM_UT_CORE, "duplicate %s entry in load balancer\n"
, sibling->name)
4014 sibling->name)___drm_dbg(((void *)0), DRM_UT_CORE, "duplicate %s entry in load balancer\n"
, sibling->name)
;
4015 err = -EINVAL22;
4016 goto err_put;
4017 }
4018
4019 /*
4020 * The virtual engine implementation is tightly coupled to
4021 * the execlists backend -- we push out request directly
4022 * into a tree inside each physical engine. We could support
4023 * layering if we handle cloning of the requests and
4024 * submitting a copy into each backend.
4025 */
4026 if (sibling->sched_engine->tasklet.callback !=
4027 execlists_submission_tasklet) {
4028 err = -ENODEV19;
4029 goto err_put;
4030 }
4031
4032 GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb))((void)0);
4033 RB_CLEAR_NODE(&ve->nodes[sibling->id].rb)(((&ve->nodes[sibling->id].rb))->__entry.rbe_parent
= (&ve->nodes[sibling->id].rb))
;
4034
4035 ve->siblings[ve->num_siblings++] = sibling;
4036 ve->base.mask |= sibling->mask;
4037 ve->base.logical_mask |= sibling->logical_mask;
4038
4039 /*
4040 * All physical engines must be compatible for their emission
4041 * functions (as we build the instructions during request
4042 * construction and do not alter them before submission
4043 * on the physical engine). We use the engine class as a guide
4044 * here, although that could be refined.
4045 */
4046 if (ve->base.class != OTHER_CLASS4) {
4047 if (ve->base.class != sibling->class) {
4048 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",___drm_dbg(((void *)0), DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n"
, sibling->class, ve->base.class)
4049 sibling->class, ve->base.class)___drm_dbg(((void *)0), DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n"
, sibling->class, ve->base.class)
;
4050 err = -EINVAL22;
4051 goto err_put;
4052 }
4053 continue;
4054 }
4055
4056 ve->base.class = sibling->class;
4057 ve->base.uabi_class = sibling->uabi_class;
4058 snprintf(ve->base.name, sizeof(ve->base.name),
4059 "v%dx%d", ve->base.class, count);
4060 ve->base.context_size = sibling->context_size;
4061
4062 ve->base.add_active_request = sibling->add_active_request;
4063 ve->base.remove_active_request = sibling->remove_active_request;
4064 ve->base.emit_bb_start = sibling->emit_bb_start;
4065 ve->base.emit_flush = sibling->emit_flush;
4066 ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
4067 ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
4068 ve->base.emit_fini_breadcrumb_dw =
4069 sibling->emit_fini_breadcrumb_dw;
4070
4071 ve->base.flags = sibling->flags;
4072 }
4073
4074 ve->base.flags |= I915_ENGINE_IS_VIRTUAL(1UL << (5));
4075
4076 virtual_engine_initial_hint(ve);
4077 return &ve->context;
4078
4079err_put:
4080 intel_context_put(&ve->context);
4081 return ERR_PTR(err);
4082}
4083
4084void intel_execlists_show_requests(struct intel_engine_cs *engine,
4085 struct drm_printer *m,
4086 void (*show_request)(struct drm_printer *m,
4087 const struct i915_request *rq,
4088 const char *prefix,
4089 int indent),
4090 unsigned int max)
4091{
4092 const struct intel_engine_execlists *execlists = &engine->execlists;
4093 struct i915_sched_engine *sched_engine = engine->sched_engine;
4094 struct i915_request *rq, *last;
4095 unsigned long flags;
4096 unsigned int count;
4097 struct rb_node *rb;
4098
4099 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
4100
4101 last = NULL((void *)0);
4102 count = 0;
4103 list_for_each_entry(rq, &sched_engine->requests, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&sched_engine->requests)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&sched_engine->
requests); rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched
.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)
( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched.link
) );}))
{
4104 if (count++ < max - 1)
4105 show_request(m, rq, "\t\t", 0);
4106 else
4107 last = rq;
4108 }
4109 if (last) {
4110 if (count > max) {
4111 drm_printf(m,
4112 "\t\t...skipping %d executing requests...\n",
4113 count - max);
4114 }
4115 show_request(m, last, "\t\t", 0);
4116 }
4117
4118 if (sched_engine->queue_priority_hint != INT_MIN(-0x7fffffff-1))
4119 drm_printf(m, "\t\tQueue priority hint: %d\n",
4120 READ_ONCE(sched_engine->queue_priority_hint)({ typeof(sched_engine->queue_priority_hint) __tmp = *(volatile
typeof(sched_engine->queue_priority_hint) *)&(sched_engine
->queue_priority_hint); membar_datadep_consumer(); __tmp; }
)
);
4121
4122 last = NULL((void *)0);
4123 count = 0;
4124 for (rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
; rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
4125 struct i915_priolist *p = rb_entry(rb, typeof(*p), node)({ const __typeof( ((typeof(*p) *)0)->node ) *__mptr = (rb
); (typeof(*p) *)( (char *)__mptr - __builtin_offsetof(typeof
(*p), node) );})
;
4126
4127 priolist_for_each_request(rq, p)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&(p)->requests)->next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&(p)->requests
); rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rq), sched.link) );}
))
{
4128 if (count++ < max - 1)
4129 show_request(m, rq, "\t\t", 0);
4130 else
4131 last = rq;
4132 }
4133 }
4134 if (last) {
4135 if (count > max) {
4136 drm_printf(m,
4137 "\t\t...skipping %d queued requests...\n",
4138 count - max);
4139 }
4140 show_request(m, last, "\t\t", 0);
4141 }
4142
4143 last = NULL((void *)0);
4144 count = 0;
4145 for (rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists
->virtual)->rb_root), -1)
; rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
4146 struct virtual_engine *ve =
4147 rb_entry(rb, typeof(*ve), nodes[engine->id].rb)({ const __typeof( ((typeof(*ve) *)0)->nodes[engine->id
].rb ) *__mptr = (rb); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof
(typeof(*ve), nodes[engine->id].rb) );})
;
4148 struct i915_request *rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = *(volatile typeof(ve->request
) *)&(ve->request); membar_datadep_consumer(); __tmp; }
)
;
4149
4150 if (rq) {
4151 if (count++ < max - 1)
4152 show_request(m, rq, "\t\t", 0);
4153 else
4154 last = rq;
4155 }
4156 }
4157 if (last) {
4158 if (count > max) {
4159 drm_printf(m,
4160 "\t\t...skipping %d virtual requests...\n",
4161 count - max);
4162 }
4163 show_request(m, last, "\t\t", 0);
4164 }
4165
4166 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
4167}
4168
4169static unsigned long list_count(struct list_head *list)
4170{
4171 struct list_head *pos;
4172 unsigned long count = 0;
4173
4174 list_for_each(pos, list)for (pos = (list)->next; pos != list; pos = (pos)->next
)
4175 count++;
4176
4177 return count;
4178}
4179
4180void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
4181 struct i915_request *hung_rq,
4182 struct drm_printer *m)
4183{
4184 unsigned long flags;
4185
4186 spin_lock_irqsave(&engine->sched_engine->lock, flags)do { flags = 0; mtx_enter(&engine->sched_engine->lock
); } while (0)
;
4187
4188 intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
4189
4190 drm_printf(m, "\tOn hold?: %lu\n",
4191 list_count(&engine->sched_engine->hold));
4192
4193 spin_unlock_irqrestore(&engine->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&engine->sched_engine->
lock); } while (0)
;
4194}
4195
4196#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
4197#include "selftest_execlists.c"
4198#endif