/usr/src/sys/dev/pci/drm/i915/gt/intel

Bug Summary

File:	dev/pci/drm/i915/gt/intel_lrc.c
Warning:	line 1584, column 26 Value stored to 'engine' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name intel_lrc.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/i915/gt/intel_lrc.c

1	/*
2	* Copyright © 2014 Intel Corporation
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*
23	* Authors:
24	* Ben Widawsky <ben@bwidawsk.net>
25	* Michel Thierry <michel.thierry@intel.com>
26	* Thomas Daniel <thomas.daniel@intel.com>
27	* Oscar Mateo <oscar.mateo@intel.com>
28	*
29	*/
30
31	/**
32	* DOC: Logical Rings, Logical Ring Contexts and Execlists
33	*
34	* Motivation:
35	* GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36	* These expanded contexts enable a number of new abilities, especially
37	* "Execlists" (also implemented in this file).
38	*
39	* One of the main differences with the legacy HW contexts is that logical
40	* ring contexts incorporate many more things to the context's state, like
41	* PDPs or ringbuffer control registers:
42	*
43	* The reason why PDPs are included in the context is straightforward: as
44	* PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45	* contained there mean you don't need to do a ppgtt->switch_mm yourself,
46	* instead, the GPU will do it for you on the context switch.
47	*
48	* But, what about the ringbuffer control registers (head, tail, etc..)?
49	* shouldn't we just need a set of those per engine command streamer? This is
50	* where the name "Logical Rings" starts to make sense: by virtualizing the
51	* rings, the engine cs shifts to a new "ring buffer" with every context
52	* switch. When you want to submit a workload to the GPU you: A) choose your
53	* context, B) find its appropriate virtualized ring, C) write commands to it
54	* and then, finally, D) tell the GPU to switch to that context.
55	*
56	* Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57	* to a contexts is via a context execution list, ergo "Execlists".
58	*
59	* LRC implementation:
60	* Regarding the creation of contexts, we have:
61	*
62	* - One global default context.
63	* - One local default context for each opened fd.
64	* - One local extra context for each context create ioctl call.
65	*
66	* Now that ringbuffers belong per-context (and not per-engine, like before)
67	* and that contexts are uniquely tied to a given engine (and not reusable,
68	* like before) we need:
69	*
70	* - One ringbuffer per-engine inside each context.
71	* - One backing object per-engine inside each context.
72	*
73	* The global default context starts its life with these new objects fully
74	* allocated and populated. The local default context for each opened fd is
75	* more complex, because we don't know at creation time which engine is going
76	* to use them. To handle this, we have implemented a deferred creation of LR
77	* contexts:
78	*
79	* The local context starts its life as a hollow or blank holder, that only
80	* gets populated for a given engine once we receive an execbuffer. If later
81	* on we receive another execbuffer ioctl for the same context but a different
82	* engine, we allocate/populate a new ringbuffer and context backing object and
83	* so on.
84	*
85	* Finally, regarding local contexts created using the ioctl call: as they are
86	* only allowed with the render ring, we can allocate & populate them right
87	* away (no need to defer anything, at least for now).
88	*
89	* Execlists implementation:
90	* Execlists are the new method by which, on gen8+ hardware, workloads are
91	* submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92	* This method works as follows:
93	*
94	* When a request is committed, its commands (the BB start and any leading or
95	* trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96	* for the appropriate context. The tail pointer in the hardware context is not
97	* updated at this time, but instead, kept by the driver in the ringbuffer
98	* structure. A structure representing this request is added to a request queue
99	* for the appropriate engine: this structure contains a copy of the context's
100	* tail after the request was written to the ring buffer and a pointer to the
101	* context itself.
102	*
103	* If the engine's request queue was empty before the request was added, the
104	* queue is processed immediately. Otherwise the queue will be processed during
105	* a context switch interrupt. In any case, elements on the queue will get sent
106	* (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107	* globally unique 20-bits submission ID.
108	*
109	* When execution of a request completes, the GPU updates the context status
110	* buffer with a context complete event and generates a context switch interrupt.
111	* During the interrupt handling, the driver examines the events in the buffer:
112	* for each context complete event, if the announced ID matches that on the head
113	* of the request queue, then that request is retired and removed from the queue.
114	*
115	* After processing, if any requests were retired and the queue is not empty
116	* then a new execution list can be submitted. The two requests at the front of
117	* the queue are next to be submitted but since a context may not occur twice in
118	* an execution list, if subsequent requests have the same ID as the first then
119	* the two requests must be combined. This is done simply by discarding requests
120	* at the head of the queue until either only one requests is left (in which case
121	* we use a NULL second context) or the first two requests have unique IDs.
122	*
123	* By always executing the first two requests in the queue the driver ensures
124	* that the GPU is kept as busy as possible. In the case where a single context
125	* completes but a second context is still executing, the request for this second
126	* context will be at the head of the queue when we remove the first one. This
127	* request will then be resubmitted along with a new request for a different context,
128	* which will cause the hardware to continue executing the second request and queue
129	* the new request (the GPU detects the condition of a context getting preempted
130	* with the same context and optimizes the context switch flow by not doing
131	* preemption, but just sampling the new tail pointer).
132	*
133	*/
134	#include <linux/interrupt.h>
135
136	#include "i915_drv.h"
137	#include "i915_perf.h"
138	#include "i915_trace.h"
139	#include "i915_vgpu.h"
140	#include "intel_breadcrumbs.h"
141	#include "intel_context.h"
142	#include "intel_engine_pm.h"
143	#include "intel_gt.h"
144	#include "intel_gt_pm.h"
145	#include "intel_gt_requests.h"
146	#include "intel_lrc_reg.h"
147	#include "intel_mocs.h"
148	#include "intel_reset.h"
149	#include "intel_ring.h"
150	#include "intel_workarounds.h"
151	#include "shmem_utils.h"
152
153	#define RING_EXECLIST_QFULL(1 << 0x2) (1 << 0x2)
154	#define RING_EXECLIST1_VALID(1 << 0x3) (1 << 0x3)
155	#define RING_EXECLIST0_VALID(1 << 0x4) (1 << 0x4)
156	#define RING_EXECLIST_ACTIVE_STATUS(3 << 0xE) (3 << 0xE)
157	#define RING_EXECLIST1_ACTIVE(1 << 0x11) (1 << 0x11)
158	#define RING_EXECLIST0_ACTIVE(1 << 0x12) (1 << 0x12)
159
160	#define GEN8_CTX_STATUS_IDLE_ACTIVE(1 << 0) (1 << 0)
161	#define GEN8_CTX_STATUS_PREEMPTED(1 << 1) (1 << 1)
162	#define GEN8_CTX_STATUS_ELEMENT_SWITCH(1 << 2) (1 << 2)
163	#define GEN8_CTX_STATUS_ACTIVE_IDLE(1 << 3) (1 << 3)
164	#define GEN8_CTX_STATUS_COMPLETE(1 << 4) (1 << 4)
165	#define GEN8_CTX_STATUS_LITE_RESTORE(1 << 15) (1 << 15)
166
167	#define GEN8_CTX_STATUS_COMPLETED_MASK((1 << 4) \| (1 << 1)) \
168	(GEN8_CTX_STATUS_COMPLETE(1 << 4) \| GEN8_CTX_STATUS_PREEMPTED(1 << 1))
169
170	#define CTX_DESC_FORCE_RESTORE(1ULL << (2)) BIT_ULL(2)(1ULL << (2))
171
172	#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(0x1) (0x1) /* lower csb dword */
173	#define GEN12_CTX_SWITCH_DETAIL(csb_dw)((csb_dw) & 0xF) ((csb_dw) & 0xF) /* upper csb dword */
174	#define GEN12_CSB_SW_CTX_ID_MASK(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15 ))) GENMASK(25, 15)(((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15 )))
175	#define GEN12_IDLE_CTX_ID0x7FF 0x7FF
176	#define GEN12_CSB_CTX_VALID(csb_dw)(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15)))))(((csb_dw) & ((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15))))) >> (__builtin_ffsll((( (~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15)) )) - 1))) != 0x7FF) \
177	(FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw)((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15)))))(((csb_dw) & ((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15))))) >> (__builtin_ffsll((( (~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15)) )) - 1))) != GEN12_IDLE_CTX_ID0x7FF)
178
179	/* Typical size of the average request (2 pipecontrols and a MI_BB) */
180	#define EXECLISTS_REQUEST_SIZE64 64 /* bytes */
181
182	struct virtual_engine {
183	struct intel_engine_cs base;
184	struct intel_context context;
185	struct rcu_work rcu;
186
187	/*
188	* We allow only a single request through the virtual engine at a time
189	* (each request in the timeline waits for the completion fence of
190	* the previous before being submitted). By restricting ourselves to
191	* only submitting a single request, each request is placed on to a
192	* physical to maximise load spreading (by virtue of the late greedy
193	* scheduling -- each real engine takes the next available request
194	* upon idling).
195	*/
196	struct i915_request *request;
197
198	/*
199	* We keep a rbtree of available virtual engines inside each physical
200	* engine, sorted by priority. Here we preallocate the nodes we need
201	* for the virtual engine, indexed by physical_engine->id.
202	*/
203	struct ve_node {
204	struct rb_node rb;
205	int prio;
206	} nodes[I915_NUM_ENGINES];
207
208	/*
209	* Keep track of bonded pairs -- restrictions upon on our selection
210	* of physical engines any particular request may be submitted to.
211	* If we receive a submit-fence from a master engine, we will only
212	* use one of sibling_mask physical engines.
213	*/
214	struct ve_bond {
215	const struct intel_engine_cs *master;
216	intel_engine_mask_t sibling_mask;
217	} *bonds;
218	unsigned int num_bonds;
219
220	/* And finally, which physical engines this virtual engine maps onto. */
221	unsigned int num_siblings;
222	struct intel_engine_cs *siblings[];
223	};
224
225	static struct virtual_engine to_virtual_engine(struct intel_engine_cs engine)
226	{
227	GEM_BUG_ON(!intel_engine_is_virtual(engine))((void)0);
228	return container_of(engine, struct virtual_engine, base)({ const __typeof( ((struct virtual_engine )0)->base ) __mptr = (engine); (struct virtual_engine )( (char )__mptr - __builtin_offsetof (struct virtual_engine, base) );});
229	}
230
231	static int __execlists_context_alloc(struct intel_context *ce,
232	struct intel_engine_cs *engine);
233
234	static void execlists_init_reg_state(u32 *reg_state,
235	const struct intel_context *ce,
236	const struct intel_engine_cs *engine,
237	const struct intel_ring *ring,
238	bool_Bool close);
239	static void
240	__execlists_update_reg_state(const struct intel_context *ce,
241	const struct intel_engine_cs *engine,
242	u32 head);
243
244	static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
245	{
246	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
247	return 0x60;
248	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
249	return 0x54;
250	else if (engine->class == RENDER_CLASS0)
251	return 0x58;
252	else
253	return -1;
254	}
255
256	static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
257	{
258	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
259	return 0x74;
260	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
261	return 0x68;
262	else if (engine->class == RENDER_CLASS0)
263	return 0xd8;
264	else
265	return -1;
266	}
267
268	static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
269	{
270	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
271	return 0x12;
272	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9 \|\| engine->class == RENDER_CLASS0)
273	return 0x18;
274	else
275	return -1;
276	}
277
278	static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
279	{
280	int x;
281
282	x = lrc_ring_wa_bb_per_ctx(engine);
283	if (x < 0)
284	return x;
285
286	return x + 2;
287	}
288
289	static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
290	{
291	int x;
292
293	x = lrc_ring_indirect_ptr(engine);
294	if (x < 0)
295	return x;
296
297	return x + 2;
298	}
299
300	static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
301	{
302	if (engine->class != RENDER_CLASS0)
303	return -1;
304
305	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
306	return 0xb6;
307	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
308	return 0xaa;
309	else
310	return -1;
311	}
312
313	static u32
314	lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
315	{
316	switch (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen)) {
317	default:
318	MISSING_CASE(INTEL_GEN(engine->i915))({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n" , "((&(engine->i915)->__info)->gen)", (long)(((& (engine->i915)->__info)->gen))); __builtin_expect(!! (__ret), 0); });
319	fallthroughdo {} while (0);
320	case 12:
321	return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0xD;
322	case 11:
323	return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x1A;
324	case 10:
325	return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x19;
326	case 9:
327	return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x26;
328	case 8:
329	return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT0x17;
330	}
331	}
332
333	static void
334	lrc_ring_setup_indirect_ctx(u32 *regs,
335	const struct intel_engine_cs *engine,
336	u32 ctx_bb_ggtt_addr,
337	u32 size)
338	{
339	GEM_BUG_ON(!size)((void)0);
340	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES))((void)0);
341	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1)((void)0);
342	regs[lrc_ring_indirect_ptr(engine) + 1] =
343	ctx_bb_ggtt_addr \| (size / CACHELINE_BYTES64);
344
345	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1)((void)0);
346	regs[lrc_ring_indirect_offset(engine) + 1] =
347	lrc_ring_indirect_offset_default(engine) << 6;
348	}
349
350	static u32 intel_context_get_runtime(const struct intel_context *ce)
351	{
352	/*
353	* We can use either ppHWSP[16] which is recorded before the context
354	* switch (and so excludes the cost of context switches) or use the
355	* value from the context image itself, which is saved/restored earlier
356	* and so includes the cost of the save.
357	*/
358	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP])({ typeof(ce->lrc_reg_state[(0x22 + 1)]) __tmp = (volatile typeof(ce->lrc_reg_state[(0x22 + 1)]) )&(ce->lrc_reg_state [(0x22 + 1)]); membar_datadep_consumer(); __tmp; });
359	}
360
361	static void mark_eio(struct i915_request *rq)
362	{
363	if (i915_request_completed(rq))
364	return;
365
366	GEM_BUG_ON(i915_request_signaled(rq))((void)0);
367
368	i915_request_set_error_once(rq, -EIO5);
369	i915_request_mark_complete(rq);
370	}
371
372	static struct i915_request *
373	active_request(const struct intel_timeline * const tl, struct i915_request *rq)
374	{
375	struct i915_request *active = rq;
376
377	rcu_read_lock();
378	list_for_each_entry_continue_reverse(rq, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->link ) * __mptr = (rq->link.prev); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), link) );}); &rq-> link != (&tl->requests); rq = ({ const __typeof( ((__typeof (rq) )0)->link ) __mptr = (rq->link.prev); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(*rq), link ) );})) {
379	if (i915_request_completed(rq))
380	break;
381
382	active = rq;
383	}
384	rcu_read_unlock();
385
386	return active;
387	}
388
389	static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
390	{
391	return (i915_ggtt_offset(engine->status_page.vma) +
392	I915_GEM_HWS_PREEMPT_ADDR(0x32 * sizeof(u32)));
393	}
394
395	static inline void
396	ring_set_paused(const struct intel_engine_cs *engine, int state)
397	{
398	/*
399	* We inspect HWS_PREEMPT with a semaphore inside
400	* engine->emit_fini_breadcrumb. If the dword is true,
401	* the ring is paused as the semaphore will busywait
402	* until the dword is false.
403	*/
404	engine->status_page.addr[I915_GEM_HWS_PREEMPT0x32] = state;
405	if (state)
406	wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
407	}
408
409	static inline struct i915_priolist to_priolist(struct rb_node rb)
410	{
411	return rb_entry(rb, struct i915_priolist, node)({ const __typeof( ((struct i915_priolist )0)->node ) __mptr = (rb); (struct i915_priolist )( (char )__mptr - __builtin_offsetof (struct i915_priolist, node) );});
412	}
413
414	static inline int rq_prio(const struct i915_request *rq)
415	{
416	return READ_ONCE(rq->sched.attr.priority)({ typeof(rq->sched.attr.priority) __tmp = (volatile typeof (rq->sched.attr.priority) )&(rq->sched.attr.priority ); membar_datadep_consumer(); __tmp; });
417	}
418
419	static int effective_prio(const struct i915_request *rq)
420	{
421	int prio = rq_prio(rq);
422
423	/*
424	* If this request is special and must not be interrupted at any
425	* cost, so be it. Note we are only checking the most recent request
426	* in the context and so may be masking an earlier vip request. It
427	* is hoped that under the conditions where nopreempt is used, this
428	* will not matter (i.e. all requests to that context will be
429	* nopreempt for as long as desired).
430	*/
431	if (i915_request_has_nopreempt(rq))
432	prio = I915_PRIORITY_UNPREEMPTABLE0x7fffffff;
433
434	return prio;
435	}
436
437	static int queue_prio(const struct intel_engine_execlists *execlists)
438	{
439	struct i915_priolist *p;
440	struct rb_node *rb;
441
442	rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->queue)->rb_root), -1);
443	if (!rb)
444	return INT_MIN(-0x7fffffff-1);
445
446	/*
447	* As the priolist[] are inverted, with the highest priority in [0],
448	* we have to flip the index value to become priority.
449	*/
450	p = to_priolist(rb);
451	if (!I915_USER_PRIORITY_SHIFT0)
452	return p->priority;
453
454	return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT0) - ffs(p->used);
455	}
456
457	static inline bool_Bool need_preempt(const struct intel_engine_cs *engine,
458	const struct i915_request *rq,
459	struct rb_node *rb)
460	{
461	int last_prio;
462
463	if (!intel_engine_has_semaphores(engine))
464	return false0;
465
466	/*
467	* Check if the current priority hint merits a preemption attempt.
468	*
469	* We record the highest value priority we saw during rescheduling
470	* prior to this dequeue, therefore we know that if it is strictly
471	* less than the current tail of ESLP[0], we do not need to force
472	* a preempt-to-idle cycle.
473	*
474	* However, the priority hint is a mere hint that we may need to
475	* preempt. If that hint is stale or we may be trying to preempt
476	* ourselves, ignore the request.
477	*
478	* More naturally we would write
479	* prio >= max(0, last);
480	* except that we wish to prevent triggering preemption at the same
481	* priority level: the task that is running should remain running
482	* to preserve FIFO ordering of dependencies.
483	*/
484	last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1)(((effective_prio(rq))>(I915_PRIORITY_NORMAL - 1))?(effective_prio (rq)):(I915_PRIORITY_NORMAL - 1));
485	if (engine->execlists.queue_priority_hint <= last_prio)
486	return false0;
487
488	/*
489	* Check against the first request in ELSP[1], it will, thanks to the
490	* power of PI, be the highest priority of that context.
491	*/
492	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
493	rq_prio(list_next_entry(rq, sched.link)({ const __typeof( ((typeof((rq)) )0)->sched.link ) __mptr = (((rq)->sched.link.next)); (typeof((rq)) )( (char )__mptr - __builtin_offsetof(typeof(*(rq)), sched.link) );})) > last_prio)
494	return true1;
495
496	if (rb) {
497	struct virtual_engine *ve =
498	rb_entry(rb, typeof(ve), nodes[engine->id].rb)({ const __typeof( ((typeof(ve) )0)->nodes[engine->id ].rb ) __mptr = (rb); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), nodes[engine->id].rb) );});
499	bool_Bool preempt = false0;
500
501	if (engine == ve->siblings[0]) { /* only preempt one sibling */
502	struct i915_request *next;
503
504	rcu_read_lock();
505	next = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) )&(ve->request); membar_datadep_consumer(); __tmp; } );
506	if (next)
507	preempt = rq_prio(next) > last_prio;
508	rcu_read_unlock();
509	}
510
511	if (preempt)
512	return preempt;
513	}
514
515	/*
516	* If the inflight context did not trigger the preemption, then maybe
517	* it was the set of queued requests? Pick the highest priority in
518	* the queue (the first active priolist) and see if it deserves to be
519	* running instead of ELSP[0].
520	*
521	* The highest priority request in the queue can not be either
522	* ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same
523	* context, it's priority would not exceed ELSP[0] aka last_prio.
524	*/
525	return queue_prio(&engine->execlists) > last_prio;
526	}
527
528	__maybe_unused__attribute__((__unused__)) static inline bool_Bool
529	assert_priority_queue(const struct i915_request *prev,
530	const struct i915_request *next)
531	{
532	/*
533	* Without preemption, the prev may refer to the still active element
534	* which we refuse to let go.
535	*
536	* Even with preemption, there are times when we think it is better not
537	* to preempt and leave an ostensibly lower priority request in flight.
538	*/
539	if (i915_request_is_active(prev))
540	return true1;
541
542	return rq_prio(prev) >= rq_prio(next);
543	}
544
545	/*
546	* The context descriptor encodes various attributes of a context,
547	* including its GTT address and some flags. Because it's fairly
548	* expensive to calculate, we'll just do it once and cache the result,
549	* which remains valid until the context is unpinned.
550	*
551	* This is what a descriptor looks like, from LSB to MSB::
552	*
553	* bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
554	* bits 12-31: LRCA, GTT address of (the HWSP of) this context
555	* bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
556	* bits 53-54: mbz, reserved for use by hardware
557	* bits 55-63: group ID, currently unused and set to 0
558	*
559	* Starting from Gen11, the upper dword of the descriptor has a new format:
560	*
561	* bits 32-36: reserved
562	* bits 37-47: SW context ID
563	* bits 48:53: engine instance
564	* bit 54: mbz, reserved for use by hardware
565	* bits 55-60: SW counter
566	* bits 61-63: engine class
567	*
568	* engine info, SW context ID and SW counter need to form a unique number
569	* (Context ID) per lrc.
570	*/
571	static u32
572	lrc_descriptor(struct intel_context ce, struct intel_engine_cs engine)
573	{
574	u32 desc;
575
576	desc = INTEL_LEGACY_32B_CONTEXT;
577	if (i915_vm_is_4lvl(ce->vm))
578	desc = INTEL_LEGACY_64B_CONTEXT;
579	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT3;
580
581	desc \|= GEN8_CTX_VALID(1 << 0) \| GEN8_CTX_PRIVILEGE(1 << 8);
582	if (IS_GEN(engine->i915, 8)(0 + (&(engine->i915)->__info)->gen == (8)))
583	desc \|= GEN8_CTX_L3LLC_COHERENT(1 << 5);
584
585	return i915_ggtt_offset(ce->state) \| desc;
586	}
587
588	static inline unsigned int dword_in_page(void *addr)
589	{
590	return offset_in_page(addr)((vaddr_t)(addr) & ((1 << 12) - 1)) / sizeof(u32);
591	}
592
593	static void set_offsets(u32 *regs,
594	const u8 *data,
595	const struct intel_engine_cs *engine,
596	bool_Bool clear)
597	#define NOP(x) (BIT(7)(1UL << (7)) \| (x))
598	#define LRI(count, flags) ((flags) << 6 \| (count) \| BUILD_BUG_ON_ZERO(count >= BIT(6))0)
599	#define POSTED(1UL << (0)) BIT(0)(1UL << (0))
600	#define REG(x) (((x) >> 2) \| BUILD_BUG_ON_ZERO(x >= 0x200)0)
601	#define REG16(x) \
602	(((x) >> 9) \| BIT(7)(1UL << (7)) \| BUILD_BUG_ON_ZERO(x >= 0x10000)0), \
603	(((x) >> 2) & 0x7f)
604	#define END(total_state_size) 0, (total_state_size)
605	{
606	const u32 base = engine->mmio_base;
607
608	while (*data) {
609	u8 count, flags;
610
611	if (data & BIT(7)(1UL << (7))) { / skip */
612	count = *data++ & ~BIT(7)(1UL << (7));
613	if (clear)
614	memset32(regs, MI_NOOP(((0) << 23) \| (0)), count);
615	regs += count;
616	continue;
617	}
618
619	count = *data & 0x3f;
620	flags = *data >> 6;
621	data++;
622
623	regs = MI_LOAD_REGISTER_IMM(count)(((0x22) << 23) \| (2(count)-1));
624	if (flags & POSTED(1UL << (0)))
625	*regs \|= MI_LRI_FORCE_POSTED(1<<12);
626	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
627	*regs \|= MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
628	regs++;
629
630	GEM_BUG_ON(!count)((void)0);
631	do {
632	u32 offset = 0;
633	u8 v;
634
635	do {
636	v = *data++;
637	offset <<= 7;
638	offset \|= v & ~BIT(7)(1UL << (7));
639	} while (v & BIT(7)(1UL << (7)));
640
641	regs[0] = base + (offset << 2);
642	if (clear)
643	regs[1] = 0;
644	regs += 2;
645	} while (--count);
646	}
647
648	if (clear) {
649	u8 count = *++data;
650
651	/* Clear past the tail for HW access */
652	GEM_BUG_ON(dword_in_page(regs) > count)((void)0);
653	memset32(regs, MI_NOOP(((0) << 23) \| (0)), count - dword_in_page(regs));
654
655	/* Close the batch; used mainly by live_lrc_layout() */
656	*regs = MI_BATCH_BUFFER_END(((0x0a) << 23) \| (0));
657	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 10)
658	*regs \|= BIT(0)(1UL << (0));
659	}
660	}
661
662	static const u8 gen8_xcs_offsets[] = {
663	NOP(1),
664	LRI(11, 0),
665	REG16(0x244),
666	REG(0x034),
667	REG(0x030),
668	REG(0x038),
669	REG(0x03c),
670	REG(0x168),
671	REG(0x140),
672	REG(0x110),
673	REG(0x11c),
674	REG(0x114),
675	REG(0x118),
676
677	NOP(9),
678	LRI(9, 0),
679	REG16(0x3a8),
680	REG16(0x28c),
681	REG16(0x288),
682	REG16(0x284),
683	REG16(0x280),
684	REG16(0x27c),
685	REG16(0x278),
686	REG16(0x274),
687	REG16(0x270),
688
689	NOP(13),
690	LRI(2, 0),
691	REG16(0x200),
692	REG(0x028),
693
694	END(80)
695	};
696
697	static const u8 gen9_xcs_offsets[] = {
698	NOP(1),
699	LRI(14, POSTED(1UL << (0))),
700	REG16(0x244),
701	REG(0x034),
702	REG(0x030),
703	REG(0x038),
704	REG(0x03c),
705	REG(0x168),
706	REG(0x140),
707	REG(0x110),
708	REG(0x11c),
709	REG(0x114),
710	REG(0x118),
711	REG(0x1c0),
712	REG(0x1c4),
713	REG(0x1c8),
714
715	NOP(3),
716	LRI(9, POSTED(1UL << (0))),
717	REG16(0x3a8),
718	REG16(0x28c),
719	REG16(0x288),
720	REG16(0x284),
721	REG16(0x280),
722	REG16(0x27c),
723	REG16(0x278),
724	REG16(0x274),
725	REG16(0x270),
726
727	NOP(13),
728	LRI(1, POSTED(1UL << (0))),
729	REG16(0x200),
730
731	NOP(13),
732	LRI(44, POSTED(1UL << (0))),
733	REG(0x028),
734	REG(0x09c),
735	REG(0x0c0),
736	REG(0x178),
737	REG(0x17c),
738	REG16(0x358),
739	REG(0x170),
740	REG(0x150),
741	REG(0x154),
742	REG(0x158),
743	REG16(0x41c),
744	REG16(0x600),
745	REG16(0x604),
746	REG16(0x608),
747	REG16(0x60c),
748	REG16(0x610),
749	REG16(0x614),
750	REG16(0x618),
751	REG16(0x61c),
752	REG16(0x620),
753	REG16(0x624),
754	REG16(0x628),
755	REG16(0x62c),
756	REG16(0x630),
757	REG16(0x634),
758	REG16(0x638),
759	REG16(0x63c),
760	REG16(0x640),
761	REG16(0x644),
762	REG16(0x648),
763	REG16(0x64c),
764	REG16(0x650),
765	REG16(0x654),
766	REG16(0x658),
767	REG16(0x65c),
768	REG16(0x660),
769	REG16(0x664),
770	REG16(0x668),
771	REG16(0x66c),
772	REG16(0x670),
773	REG16(0x674),
774	REG16(0x678),
775	REG16(0x67c),
776	REG(0x068),
777
778	END(176)
779	};
780
781	static const u8 gen12_xcs_offsets[] = {
782	NOP(1),
783	LRI(13, POSTED(1UL << (0))),
784	REG16(0x244),
785	REG(0x034),
786	REG(0x030),
787	REG(0x038),
788	REG(0x03c),
789	REG(0x168),
790	REG(0x140),
791	REG(0x110),
792	REG(0x1c0),
793	REG(0x1c4),
794	REG(0x1c8),
795	REG(0x180),
796	REG16(0x2b4),
797
798	NOP(5),
799	LRI(9, POSTED(1UL << (0))),
800	REG16(0x3a8),
801	REG16(0x28c),
802	REG16(0x288),
803	REG16(0x284),
804	REG16(0x280),
805	REG16(0x27c),
806	REG16(0x278),
807	REG16(0x274),
808	REG16(0x270),
809
810	END(80)
811	};
812
813	static const u8 gen8_rcs_offsets[] = {
814	NOP(1),
815	LRI(14, POSTED(1UL << (0))),
816	REG16(0x244),
817	REG(0x034),
818	REG(0x030),
819	REG(0x038),
820	REG(0x03c),
821	REG(0x168),
822	REG(0x140),
823	REG(0x110),
824	REG(0x11c),
825	REG(0x114),
826	REG(0x118),
827	REG(0x1c0),
828	REG(0x1c4),
829	REG(0x1c8),
830
831	NOP(3),
832	LRI(9, POSTED(1UL << (0))),
833	REG16(0x3a8),
834	REG16(0x28c),
835	REG16(0x288),
836	REG16(0x284),
837	REG16(0x280),
838	REG16(0x27c),
839	REG16(0x278),
840	REG16(0x274),
841	REG16(0x270),
842
843	NOP(13),
844	LRI(1, 0),
845	REG(0x0c8),
846
847	END(80)
848	};
849
850	static const u8 gen9_rcs_offsets[] = {
851	NOP(1),
852	LRI(14, POSTED(1UL << (0))),
853	REG16(0x244),
854	REG(0x34),
855	REG(0x30),
856	REG(0x38),
857	REG(0x3c),
858	REG(0x168),
859	REG(0x140),
860	REG(0x110),
861	REG(0x11c),
862	REG(0x114),
863	REG(0x118),
864	REG(0x1c0),
865	REG(0x1c4),
866	REG(0x1c8),
867
868	NOP(3),
869	LRI(9, POSTED(1UL << (0))),
870	REG16(0x3a8),
871	REG16(0x28c),
872	REG16(0x288),
873	REG16(0x284),
874	REG16(0x280),
875	REG16(0x27c),
876	REG16(0x278),
877	REG16(0x274),
878	REG16(0x270),
879
880	NOP(13),
881	LRI(1, 0),
882	REG(0xc8),
883
884	NOP(13),
885	LRI(44, POSTED(1UL << (0))),
886	REG(0x28),
887	REG(0x9c),
888	REG(0xc0),
889	REG(0x178),
890	REG(0x17c),
891	REG16(0x358),
892	REG(0x170),
893	REG(0x150),
894	REG(0x154),
895	REG(0x158),
896	REG16(0x41c),
897	REG16(0x600),
898	REG16(0x604),
899	REG16(0x608),
900	REG16(0x60c),
901	REG16(0x610),
902	REG16(0x614),
903	REG16(0x618),
904	REG16(0x61c),
905	REG16(0x620),
906	REG16(0x624),
907	REG16(0x628),
908	REG16(0x62c),
909	REG16(0x630),
910	REG16(0x634),
911	REG16(0x638),
912	REG16(0x63c),
913	REG16(0x640),
914	REG16(0x644),
915	REG16(0x648),
916	REG16(0x64c),
917	REG16(0x650),
918	REG16(0x654),
919	REG16(0x658),
920	REG16(0x65c),
921	REG16(0x660),
922	REG16(0x664),
923	REG16(0x668),
924	REG16(0x66c),
925	REG16(0x670),
926	REG16(0x674),
927	REG16(0x678),
928	REG16(0x67c),
929	REG(0x68),
930
931	END(176)
932	};
933
934	static const u8 gen11_rcs_offsets[] = {
935	NOP(1),
936	LRI(15, POSTED(1UL << (0))),
937	REG16(0x244),
938	REG(0x034),
939	REG(0x030),
940	REG(0x038),
941	REG(0x03c),
942	REG(0x168),
943	REG(0x140),
944	REG(0x110),
945	REG(0x11c),
946	REG(0x114),
947	REG(0x118),
948	REG(0x1c0),
949	REG(0x1c4),
950	REG(0x1c8),
951	REG(0x180),
952
953	NOP(1),
954	LRI(9, POSTED(1UL << (0))),
955	REG16(0x3a8),
956	REG16(0x28c),
957	REG16(0x288),
958	REG16(0x284),
959	REG16(0x280),
960	REG16(0x27c),
961	REG16(0x278),
962	REG16(0x274),
963	REG16(0x270),
964
965	LRI(1, POSTED(1UL << (0))),
966	REG(0x1b0),
967
968	NOP(10),
969	LRI(1, 0),
970	REG(0x0c8),
971
972	END(80)
973	};
974
975	static const u8 gen12_rcs_offsets[] = {
976	NOP(1),
977	LRI(13, POSTED(1UL << (0))),
978	REG16(0x244),
979	REG(0x034),
980	REG(0x030),
981	REG(0x038),
982	REG(0x03c),
983	REG(0x168),
984	REG(0x140),
985	REG(0x110),
986	REG(0x1c0),
987	REG(0x1c4),
988	REG(0x1c8),
989	REG(0x180),
990	REG16(0x2b4),
991
992	NOP(5),
993	LRI(9, POSTED(1UL << (0))),
994	REG16(0x3a8),
995	REG16(0x28c),
996	REG16(0x288),
997	REG16(0x284),
998	REG16(0x280),
999	REG16(0x27c),
1000	REG16(0x278),
1001	REG16(0x274),
1002	REG16(0x270),
1003
1004	LRI(3, POSTED(1UL << (0))),
1005	REG(0x1b0),
1006	REG16(0x5a8),
1007	REG16(0x5ac),
1008
1009	NOP(6),
1010	LRI(1, 0),
1011	REG(0x0c8),
1012	NOP(3 + 9 + 1),
1013
1014	LRI(51, POSTED(1UL << (0))),
1015	REG16(0x588),
1016	REG16(0x588),
1017	REG16(0x588),
1018	REG16(0x588),
1019	REG16(0x588),
1020	REG16(0x588),
1021	REG(0x028),
1022	REG(0x09c),
1023	REG(0x0c0),
1024	REG(0x178),
1025	REG(0x17c),
1026	REG16(0x358),
1027	REG(0x170),
1028	REG(0x150),
1029	REG(0x154),
1030	REG(0x158),
1031	REG16(0x41c),
1032	REG16(0x600),
1033	REG16(0x604),
1034	REG16(0x608),
1035	REG16(0x60c),
1036	REG16(0x610),
1037	REG16(0x614),
1038	REG16(0x618),
1039	REG16(0x61c),
1040	REG16(0x620),
1041	REG16(0x624),
1042	REG16(0x628),
1043	REG16(0x62c),
1044	REG16(0x630),
1045	REG16(0x634),
1046	REG16(0x638),
1047	REG16(0x63c),
1048	REG16(0x640),
1049	REG16(0x644),
1050	REG16(0x648),
1051	REG16(0x64c),
1052	REG16(0x650),
1053	REG16(0x654),
1054	REG16(0x658),
1055	REG16(0x65c),
1056	REG16(0x660),
1057	REG16(0x664),
1058	REG16(0x668),
1059	REG16(0x66c),
1060	REG16(0x670),
1061	REG16(0x674),
1062	REG16(0x678),
1063	REG16(0x67c),
1064	REG(0x068),
1065	REG(0x084),
1066	NOP(1),
1067
1068	END(192)
1069	};
1070
1071	#undef END
1072	#undef REG16
1073	#undef REG
1074	#undef LRI
1075	#undef NOP
1076
1077	static const u8 reg_offsets(const struct intel_engine_cs engine)
1078	{
1079	/*
1080	* The gen12+ lists only have the registers we program in the basic
1081	* default state. We rely on the context image using relative
1082	* addressing to automatic fixup the register state between the
1083	* physical engines for virtual engine.
1084	*/
1085	GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&((void)0)
1086	!intel_engine_has_relative_mmio(engine))((void)0);
1087
1088	if (engine->class == RENDER_CLASS0) {
1089	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
1090	return gen12_rcs_offsets;
1091	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
1092	return gen11_rcs_offsets;
1093	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
1094	return gen9_rcs_offsets;
1095	else
1096	return gen8_rcs_offsets;
1097	} else {
1098	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
1099	return gen12_xcs_offsets;
1100	else if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 9)
1101	return gen9_xcs_offsets;
1102	else
1103	return gen8_xcs_offsets;
1104	}
1105	}
1106
1107	static struct i915_request *
1108	__unwind_incomplete_requests(struct intel_engine_cs *engine)
1109	{
1110	struct i915_request rq, rn, active = NULL((void )0);
1111	struct list_head *pl;
1112	int prio = I915_PRIORITY_INVALID((-0x7fffffff-1) \| (u8)((1UL << (0)) - 1));
1113
1114	lockdep_assert_held(&engine->active.lock)do { (void)(&engine->active.lock); } while(0);
1115
1116	list_for_each_entry_safe_reverse(rq, rn,for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = ((&engine->active.requests)->prev); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}), rn = ({ const __typeof( ((__typeof(rq) )0)-> sched.link ) __mptr = ((rq)->sched.link.prev); (__typeof( rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}); &(rq)->sched.link != (&engine->active .requests); rq = rn, rn = ({ const __typeof( ((__typeof(rn) )0)->sched.link ) __mptr = (rn->sched.link.prev); (__typeof (rn) )( (char )__mptr - __builtin_offsetof(__typeof(*rn), sched .link) );}))
1117	&engine->active.requests,for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = ((&engine->active.requests)->prev); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}), rn = ({ const __typeof( ((__typeof(rq) )0)-> sched.link ) __mptr = ((rq)->sched.link.prev); (__typeof( rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}); &(rq)->sched.link != (&engine->active .requests); rq = rn, rn = ({ const __typeof( ((__typeof(rn) )0)->sched.link ) __mptr = (rn->sched.link.prev); (__typeof (rn) )( (char )__mptr - __builtin_offsetof(__typeof(*rn), sched .link) );}))
1118	sched.link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = ((&engine->active.requests)->prev); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}), rn = ({ const __typeof( ((__typeof(rq) )0)-> sched.link ) __mptr = ((rq)->sched.link.prev); (__typeof( rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}); &(rq)->sched.link != (&engine->active .requests); rq = rn, rn = ({ const __typeof( ((__typeof(rn) )0)->sched.link ) __mptr = (rn->sched.link.prev); (__typeof (rn) )( (char )__mptr - __builtin_offsetof(__typeof(*rn), sched .link) );})) {
1119	if (i915_request_completed(rq))
1120	continue; /* XXX */
1121
1122	__i915_request_unsubmit(rq);
1123
1124	/*
1125	* Push the request back into the queue for later resubmission.
1126	* If this request is not native to this physical engine (i.e.
1127	* it came from a virtual source), push it back onto the virtual
1128	* engine so that it can be moved across onto another physical
1129	* engine as load dictates.
1130	*/
1131	if (likely(rq->execution_mask == engine->mask)__builtin_expect(!!(rq->execution_mask == engine->mask) , 1)) {
1132	GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID)((void)0);
1133	if (rq_prio(rq) != prio) {
1134	prio = rq_prio(rq);
1135	pl = i915_sched_lookup_priolist(engine, prio);
1136	}
1137	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))((void)0);
1138
1139	list_move(&rq->sched.link, pl);
1140	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1141
1142	/* Check in case we rollback so far we wrap [size/2] */
1143	if (intel_ring_direction(rq->ring,
1144	rq->tail,
1145	rq->ring->tail + 8) > 0)
1146	rq->context->lrc.desc \|= CTX_DESC_FORCE_RESTORE(1ULL << (2));
1147
1148	active = rq;
1149	} else {
1150	struct intel_engine_cs *owner = rq->context->engine;
1151
1152	WRITE_ONCE(rq->engine, owner)({ typeof(rq->engine) __tmp = (owner); (volatile typeof(rq ->engine) )&(rq->engine) = __tmp; __tmp; });
1153	owner->submit_request(rq);
1154	active = NULL((void *)0);
1155	}
1156	}
1157
1158	return active;
1159	}
1160
1161	struct i915_request *
1162	execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
1163	{
1164	struct intel_engine_cs *engine =
1165	container_of(execlists, typeof(engine), execlists)({ const __typeof( ((typeof(engine) )0)->execlists ) __mptr = (execlists); (typeof(engine) )( (char )__mptr - __builtin_offsetof (typeof(engine), execlists) );});
1166
1167	return __unwind_incomplete_requests(engine);
1168	}
1169
1170	static inline void
1171	execlists_context_status_change(struct i915_request *rq, unsigned long status)
1172	{
1173	/*
1174	* Only used when GVT-g is enabled now. When GVT-g is disabled,
1175	* The compiler should eliminate this function as dead-code.
1176	*/
1177	if (!IS_ENABLED(CONFIG_DRM_I915_GVT)0)
1178	return;
1179
1180	#ifdef notyet
1181	atomic_notifier_call_chain(&rq->engine->context_status_notifier,
1182	status, rq);
1183	#endif
1184	}
1185
1186	static void intel_engine_context_in(struct intel_engine_cs *engine)
1187	{
1188	unsigned long flags;
1189
1190	if (atomic_add_unless(&engine->stats.active, 1, 0))
1191	return;
1192
1193	write_seqlock_irqsave(&engine->stats.lock, flags)do { flags = 0; __write_seqlock_irqsave(&engine->stats .lock); } while (0);
1194	if (!atomic_add_unless(&engine->stats.active, 1, 0)) {
1195	engine->stats.start = ktime_get();
1196	atomic_inc(&engine->stats.active)__sync_fetch_and_add(&engine->stats.active, 1);
1197	}
1198	write_sequnlock_irqrestore(&engine->stats.lock, flags)do { (void)(flags); __write_sequnlock_irqrestore(&engine-> stats.lock); } while (0);
1199	}
1200
1201	static void intel_engine_context_out(struct intel_engine_cs *engine)
1202	{
1203	unsigned long flags;
1204
1205	GEM_BUG_ON(!atomic_read(&engine->stats.active))((void)0);
1206
1207	if (atomic_add_unless(&engine->stats.active, -1, 1))
1208	return;
1209
1210	write_seqlock_irqsave(&engine->stats.lock, flags)do { flags = 0; __write_seqlock_irqsave(&engine->stats .lock); } while (0);
1211	if (atomic_dec_and_test(&engine->stats.active)(__sync_sub_and_fetch((&engine->stats.active), 1) == 0 )) {
1212	engine->stats.total =
1213	ktime_add(engine->stats.total,
1214	ktime_sub(ktime_get(), engine->stats.start));
1215	}
1216	write_sequnlock_irqrestore(&engine->stats.lock, flags)do { (void)(flags); __write_sequnlock_irqrestore(&engine-> stats.lock); } while (0);
1217	}
1218
1219	static void
1220	execlists_check_context(const struct intel_context *ce,
1221	const struct intel_engine_cs *engine)
1222	{
1223	const struct intel_ring *ring = ce->ring;
1224	u32 *regs = ce->lrc_reg_state;
1225	bool_Bool valid = true1;
1226	int x;
1227
1228	if (regs[CTX_RING_START(0x08 + 1)] != i915_ggtt_offset(ring->vma)) {
1229	pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n" , engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring-> vma))
1230	engine->name,printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n" , engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring-> vma))
1231	regs[CTX_RING_START],printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n" , engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring-> vma))
1232	i915_ggtt_offset(ring->vma))printk("\0013" "%s: context submitted with incorrect RING_START [%08x], expected %08x\n" , engine->name, regs[(0x08 + 1)], i915_ggtt_offset(ring-> vma));
1233	regs[CTX_RING_START(0x08 + 1)] = i915_ggtt_offset(ring->vma);
1234	valid = false0;
1235	}
1236
1237	if ((regs[CTX_RING_CTL(0x0a + 1)] & ~(RING_WAIT(1 << 11) \| RING_WAIT_SEMAPHORE(1 << 10))) !=
1238	(RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) \| RING_VALID0x00000001)) {
1239	pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n" , engine->name, regs[(0x0a + 1)], (u32)(((ring->size) - (1 << 12)) \| 0x00000001))
1240	engine->name,printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n" , engine->name, regs[(0x0a + 1)], (u32)(((ring->size) - (1 << 12)) \| 0x00000001))
1241	regs[CTX_RING_CTL],printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n" , engine->name, regs[(0x0a + 1)], (u32)(((ring->size) - (1 << 12)) \| 0x00000001))
1242	(u32)(RING_CTL_SIZE(ring->size) \| RING_VALID))printk("\0013" "%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n" , engine->name, regs[(0x0a + 1)], (u32)(((ring->size) - (1 << 12)) \| 0x00000001));
1243	regs[CTX_RING_CTL(0x0a + 1)] = RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) \| RING_VALID0x00000001;
1244	valid = false0;
1245	}
1246
1247	x = lrc_ring_mi_mode(engine);
1248	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING(1 << 8)) {
1249	pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",printk("\0013" "%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n" , engine->name, regs[x + 1])
1250	engine->name, regs[x + 1])printk("\0013" "%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n" , engine->name, regs[x + 1]);
1251	regs[x + 1] &= ~STOP_RING(1 << 8);
1252	regs[x + 1] \|= STOP_RING(1 << 8) << 16;
1253	valid = false0;
1254	}
1255
1256	WARN_ONCE(!valid, "Invalid lrc state found before submission\n")({ static int __warned; int __ret = !!(!valid); if (__ret && !__warned) { printf("Invalid lrc state found before submission\n" ); __warned = 1; } __builtin_expect(!!(__ret), 0); });
1257	}
1258
1259	static void restore_default_state(struct intel_context *ce,
1260	struct intel_engine_cs *engine)
1261	{
1262	u32 *regs;
1263
1264	regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE)__builtin_memset((ce->lrc_reg_state), (0), (engine->context_size - (1 << 12)));
1265	execlists_init_reg_state(regs, ce, engine, ce->ring, true1);
1266
1267	ce->runtime.last = intel_context_get_runtime(ce);
1268	}
1269
1270	static void reset_active(struct i915_request *rq,
1271	struct intel_engine_cs *engine)
1272	{
1273	struct intel_context * const ce = rq->context;
1274	u32 head;
1275
1276	/*
1277	* The executing context has been cancelled. We want to prevent
1278	* further execution along this context and propagate the error on
1279	* to anything depending on its results.
1280	*
1281	* In __i915_request_submit(), we apply the -EIO and remove the
1282	* requests' payloads for any banned requests. But first, we must
1283	* rewind the context back to the start of the incomplete request so
1284	* that we do not jump back into the middle of the batch.
1285	*
1286	* We preserve the breadcrumbs and semaphores of the incomplete
1287	* requests so that inter-timeline dependencies (i.e other timelines)
1288	* remain correctly ordered. And we defer to __i915_request_submit()
1289	* so that all asynchronous waits are correctly handled.
1290	*/
1291	ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
1292	rq->fence.context, rq->fence.seqno)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
1293
1294	/* On resubmission of the active request, payload will be scrubbed */
1295	if (i915_request_completed(rq))
1296	head = rq->tail;
1297	else
1298	head = active_request(ce->timeline, rq)->head;
1299	head = intel_ring_wrap(ce->ring, head);
1300
1301	/* Scrub the context image to prevent replaying the previous batch */
1302	restore_default_state(ce, engine);
1303	__execlists_update_reg_state(ce, engine, head);
1304
1305	/* We've switched away, so this should be a no-op, but intent matters */
1306	ce->lrc.desc \|= CTX_DESC_FORCE_RESTORE(1ULL << (2));
1307	}
1308
1309	static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
1310	{
1311	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
1312	ce->runtime.num_underflow += dt < 0;
1313	ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt)({ u32 __max_a = (ce->runtime.max_underflow); u32 __max_b = (-dt); __max_a > __max_b ? __max_a : __max_b; });
1314	#endif
1315	}
1316
1317	static void intel_context_update_runtime(struct intel_context *ce)
1318	{
1319	u32 old;
1320	s32 dt;
1321
1322	if (intel_context_is_barrier(ce))
1323	return;
1324
1325	old = ce->runtime.last;
1326	ce->runtime.last = intel_context_get_runtime(ce);
1327	dt = ce->runtime.last - old;
1328
1329	if (unlikely(dt <= 0)__builtin_expect(!!(dt <= 0), 0)) {
1330	CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",do { const struct intel_context ce__ = (ce); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (ce__-> engine); do { } while (0); } while (0); } while (0)
1331	old, ce->runtime.last, dt)do { const struct intel_context ce__ = (ce); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (ce__-> engine); do { } while (0); } while (0); } while (0);
1332	st_update_runtime_underflow(ce, dt);
1333	return;
1334	}
1335
1336	ewma_runtime_add(&ce->runtime.avg, dt);
1337	ce->runtime.total += dt;
1338	}
1339
1340	static inline struct intel_engine_cs *
1341	__execlists_schedule_in(struct i915_request *rq)
1342	{
1343	struct intel_engine_cs * const engine = rq->engine;
1344	struct intel_context * const ce = rq->context;
1345
1346	intel_context_get(ce);
1347
1348	if (unlikely(intel_context_is_banned(ce))__builtin_expect(!!(intel_context_is_banned(ce)), 0))
1349	reset_active(rq, engine);
1350
1351	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
1352	execlists_check_context(ce, engine);
1353
1354	if (ce->tag) {
1355	/* Use a fixed tag for OA and friends */
1356	GEM_BUG_ON(ce->tag <= BITS_PER_LONG)((void)0);
1357	ce->lrc.ccid = ce->tag;
1358	} else {
1359	/* We don't need a strict matching tag, just different values */
1360	unsigned int tag = ffs(READ_ONCE(engine->context_tag)({ typeof(engine->context_tag) __tmp = (volatile typeof(engine ->context_tag) )&(engine->context_tag); membar_datadep_consumer (); __tmp; }));
1361
1362	GEM_BUG_ON(tag == 0 \|\| tag >= BITS_PER_LONG)((void)0);
1363	clear_bit(tag - 1, &engine->context_tag);
1364	ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT37 - 32);
1365
1366	BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID)extern char _ctassert[(!(64 > ((1<<11) - 1))) ? 1 : - 1 ] __attribute__((__unused__));
1367	}
1368
1369	ce->lrc.ccid \|= engine->execlists.ccid;
1370
1371	__intel_gt_pm_get(engine->gt);
1372	if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)__sync_fetch_and_add(&engine->fw_active, 1))
1373	intel_uncore_forcewake_get(engine->uncore, engine->fw_domain);
1374	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
1375	intel_engine_context_in(engine);
1376
1377	return engine;
1378	}
1379
1380	static inline struct i915_request *
1381	execlists_schedule_in(struct i915_request *rq, int idx)
1382	{
1383	struct intel_context * const ce = rq->context;
1384	struct intel_engine_cs *old;
1385
1386	GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine))((void)0);
1387	trace_i915_request_in(rq, idx);
1388
1389	old = READ_ONCE(ce->inflight)({ typeof(ce->inflight) __tmp = (volatile typeof(ce->inflight ) )&(ce->inflight); membar_datadep_consumer(); __tmp; });
1390	do {
1391	if (!old) {
1392	WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq))({ typeof(ce->inflight) __tmp = (__execlists_schedule_in(rq )); (volatile typeof(ce->inflight) )&(ce->inflight ) = __tmp; __tmp; });
1393	break;
1394	}
1395	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))({ __typeof(&ce->inflight) __op = (__typeof((&ce-> inflight)))(&old); __typeof((&ce->inflight)) __o = __op; __typeof((&ce->inflight)) __p = __sync_val_compare_and_swap ((&ce->inflight), (__o), (({ unsigned long __v = (unsigned long)(old); (typeof(old))(__v + 1); }))); if (__p != __o) __op = __p; (__p == __o); }));
1396
1397	GEM_BUG_ON(intel_context_inflight(ce) != rq->engine)((void)0);
1398	return i915_request_get(rq);
1399	}
1400
1401	static void kick_siblings(struct i915_request rq, struct intel_context ce)
1402	{
1403	struct virtual_engine ve = container_of(ce, typeof(ve), context)({ const __typeof( ((typeof(ve) )0)->context ) __mptr = (ce); (typeof(ve) )( (char )__mptr - __builtin_offsetof(typeof (*ve), context) );});
1404	struct i915_request next = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) *)&(ve->request); membar_datadep_consumer(); __tmp; } );
1405
1406	if (next == rq \|\| (next && next->execution_mask & ~rq->execution_mask))
1407	tasklet_hi_schedule(&ve->base.execlists.tasklet);
1408	}
1409
1410	static inline void
1411	__execlists_schedule_out(struct i915_request *rq,
1412	struct intel_engine_cs * const engine,
1413	unsigned int ccid)
1414	{
1415	struct intel_context * const ce = rq->context;
1416
1417	/*
1418	* NB process_csb() is not under the engine->active.lock and hence
1419	* schedule_out can race with schedule_in meaning that we should
1420	* refrain from doing non-trivial work here.
1421	*/
1422
1423	/*
1424	* If we have just completed this context, the engine may now be
1425	* idle and we want to re-enter powersaving.
1426	*/
1427	if (list_is_last_rcu(&rq->link, &ce->timeline->requests) &&
1428	i915_request_completed(rq))
1429	intel_engine_add_retire(engine, ce->timeline);
1430
1431	ccid >>= GEN11_SW_CTX_ID_SHIFT37 - 32;
1432	ccid &= GEN12_MAX_CONTEXT_HW_ID((1<<11) - 1);
1433	if (ccid < BITS_PER_LONG64) {
1434	GEM_BUG_ON(ccid == 0)((void)0);
1435	GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag))((void)0);
1436	set_bit(ccid - 1, &engine->context_tag);
1437	}
1438
1439	intel_context_update_runtime(ce);
1440	intel_engine_context_out(engine);
1441	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
1442	if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)__sync_sub_and_fetch((&engine->fw_active), 1))
1443	intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
1444	intel_gt_pm_put_async(engine->gt);
1445
1446	/*
1447	* If this is part of a virtual engine, its next request may
1448	* have been blocked waiting for access to the active context.
1449	* We have to kick all the siblings again in case we need to
1450	* switch (e.g. the next request is not runnable on this
1451	* engine). Hopefully, we will already have submitted the next
1452	* request before the tasklet runs and do not need to rebuild
1453	* each virtual tree and kick everyone again.
1454	*/
1455	if (ce->engine != engine)
1456	kick_siblings(rq, ce);
1457
1458	intel_context_put(ce);
1459	}
1460
1461	static inline void
1462	execlists_schedule_out(struct i915_request *rq)
1463	{
1464	struct intel_context * const ce = rq->context;
1465	struct intel_engine_cs cur, old;
1466	u32 ccid;
1467
1468	trace_i915_request_out(rq);
1469
1470	ccid = rq->context->lrc.ccid;
1471	old = READ_ONCE(ce->inflight)({ typeof(ce->inflight) __tmp = (volatile typeof(ce->inflight ) )&(ce->inflight); membar_datadep_consumer(); __tmp; });
1472	do
1473	cur = ptr_unmask_bits(old, 2)((unsigned long)(old) & ((1UL << (2)) - 1)) ? ptr_dec(old)({ unsigned long __v = (unsigned long)(old); (typeof(old))(__v - 1); }) : NULL((void *)0);
1474	while (!try_cmpxchg(&ce->inflight, &old, cur)({ __typeof(&ce->inflight) __op = (__typeof((&ce-> inflight)))(&old); __typeof((&ce->inflight)) __o = __op; __typeof((&ce->inflight)) __p = __sync_val_compare_and_swap ((&ce->inflight), (__o), (cur)); if (__p != __o) __op = __p; (__p == __o); }));
1475	if (!cur)
1476	__execlists_schedule_out(rq, old, ccid);
1477
1478	i915_request_put(rq);
1479	}
1480
1481	static u64 execlists_update_context(struct i915_request *rq)
1482	{
1483	struct intel_context *ce = rq->context;
1484	u64 desc = ce->lrc.desc;
1485	u32 tail, prev;
1486
1487	/*
1488	* WaIdleLiteRestore:bdw,skl
1489	*
1490	* We should never submit the context with the same RING_TAIL twice
1491	* just in case we submit an empty ring, which confuses the HW.
1492	*
1493	* We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
1494	* the normal request to be able to always advance the RING_TAIL on
1495	* subsequent resubmissions (for lite restore). Should that fail us,
1496	* and we try and submit the same tail again, force the context
1497	* reload.
1498	*
1499	* If we need to return to a preempted context, we need to skip the
1500	* lite-restore and force it to reload the RING_TAIL. Otherwise, the
1501	* HW has a tendency to ignore us rewinding the TAIL to the end of
1502	* an earlier request.
1503	*/
1504	GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail)((void)0);
1505	prev = rq->ring->tail;
1506	tail = intel_ring_set_tail(rq->ring, rq->tail);
1507	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)__builtin_expect(!!(intel_ring_direction(rq->ring, tail, prev ) <= 0), 0))
1508	desc \|= CTX_DESC_FORCE_RESTORE(1ULL << (2));
1509	ce->lrc_reg_state[CTX_RING_TAIL(0x06 + 1)] = tail;
1510	rq->tail = rq->wa_tail;
1511
1512	/*
1513	* Make sure the context image is complete before we submit it to HW.
1514	*
1515	* Ostensibly, writes (including the WCB) should be flushed prior to
1516	* an uncached write such as our mmio register access, the empirical
1517	* evidence (esp. on Braswell) suggests that the WC write into memory
1518	* may not be visible to the HW prior to the completion of the UC
1519	* register write and that we may begin execution from the context
1520	* before its image is complete leading to invalid PD chasing.
1521	*/
1522	wmb()do { __asm volatile("sfence" ::: "memory"); } while (0);
1523
1524	ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE(1ULL << (2));
1525	return desc;
1526	}
1527
1528	static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
1529	{
1530	if (execlists->ctrl_reg) {
1531	writel(lower_32_bits(desc), execlists->submit_reg + port * 2)iowrite32(((u32)(desc)), execlists->submit_reg + port * 2);
1532	writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1)iowrite32(((u32)(((desc) >> 16) >> 16)), execlists ->submit_reg + port * 2 + 1);
1533	} else {
1534	writel(upper_32_bits(desc), execlists->submit_reg)iowrite32(((u32)(((desc) >> 16) >> 16)), execlists ->submit_reg);
1535	writel(lower_32_bits(desc), execlists->submit_reg)iowrite32(((u32)(desc)), execlists->submit_reg);
1536	}
1537	}
1538
1539	static __maybe_unused__attribute__((__unused__)) char *
1540	dump_port(char buf, int buflen, const char prefix, struct i915_request *rq)
1541	{
1542	if (!rq)
1543	return "";
1544
1545	snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d",
1546	prefix,
1547	rq->context->lrc.ccid,
1548	rq->fence.context, rq->fence.seqno,
1549	i915_request_completed(rq) ? "!" :
1550	i915_request_started(rq) ? "*" :
1551	"",
1552	rq_prio(rq));
1553
1554	return buf;
1555	}
1556
1557	static __maybe_unused__attribute__((__unused__)) void
1558	trace_ports(const struct intel_engine_execlists *execlists,
1559	const char *msg,
1560	struct i915_request * const *ports)
1561	{
1562	const struct intel_engine_cs *engine =
1563	container_of(execlists, typeof(engine), execlists)({ const __typeof( ((typeof(engine) )0)->execlists ) __mptr = (execlists); (typeof(engine) )( (char )__mptr - __builtin_offsetof (typeof(engine), execlists) );});
1564	char __maybe_unused__attribute__((__unused__)) p0[40], p1[40];
1565
1566	if (!ports[0])
1567	return;
1568
1569	ENGINE_TRACE(engine, "%s { %s%s }\n", msg,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
1570	dump_port(p0, sizeof(p0), "", ports[0]),do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
1571	dump_port(p1, sizeof(p1), ", ", ports[1]))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
1572	}
1573
1574	static inline bool_Bool
1575	reset_in_progress(const struct intel_engine_execlists *execlists)
1576	{
1577	return unlikely(!__tasklet_is_enabled(&execlists->tasklet))__builtin_expect(!!(!__tasklet_is_enabled(&execlists-> tasklet)), 0);
1578	}
1579
1580	static __maybe_unused__attribute__((__unused__)) bool_Bool
1581	assert_pending_valid(const struct intel_engine_execlists *execlists,
1582	const char *msg)
1583	{
1584	struct intel_engine_cs *engine =
	Value stored to 'engine' during its initialization is never read
1585	container_of(execlists, typeof(engine), execlists)({ const __typeof( ((typeof(engine) )0)->execlists ) __mptr = (execlists); (typeof(engine) )( (char )__mptr - __builtin_offsetof (typeof(engine), execlists) );});
1586	struct i915_request * const port, rq;
1587	struct intel_context ce = NULL((void )0);
1588	bool_Bool sentinel = false0;
1589	u32 ccid = -1;
1590
1591	trace_ports(execlists, msg, execlists->pending);
1592
1593	/* We may be messing around with the lists during reset, lalala */
1594	if (reset_in_progress(execlists))
1595	return true1;
1596
1597	if (!execlists->pending[0]) {
1598	GEM_TRACE_ERR("%s: Nothing pending for promotion!\n",do { } while (0)
1599	engine->name)do { } while (0);
1600	return false0;
1601	}
1602
1603	if (execlists->pending[execlists_num_ports(execlists)]) {
1604	GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n",do { } while (0)
1605	engine->name, execlists_num_ports(execlists))do { } while (0);
1606	return false0;
1607	}
1608
1609	for (port = execlists->pending; (rq = *port); port++) {
1610	unsigned long flags;
1611	bool_Bool ok = true1;
1612
1613	GEM_BUG_ON(!kref_read(&rq->fence.refcount))((void)0);
1614	GEM_BUG_ON(!i915_request_is_active(rq))((void)0);
1615
1616	if (ce == rq->context) {
1617	GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n",do { } while (0)
1618	engine->name,do { } while (0)
1619	ce->timeline->fence_context,do { } while (0)
1620	port - execlists->pending)do { } while (0);
1621	return false0;
1622	}
1623	ce = rq->context;
1624
1625	if (ccid == ce->lrc.ccid) {
1626	GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n",do { } while (0)
1627	engine->name,do { } while (0)
1628	ccid, ce->timeline->fence_context,do { } while (0)
1629	port - execlists->pending)do { } while (0);
1630	return false0;
1631	}
1632	ccid = ce->lrc.ccid;
1633
1634	/*
1635	* Sentinels are supposed to be the last request so they flush
1636	* the current execution off the HW. Check that they are the only
1637	* request in the pending submission.
1638	*/
1639	if (sentinel) {
1640	GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n",do { } while (0)
1641	engine->name,do { } while (0)
1642	ce->timeline->fence_context,do { } while (0)
1643	port - execlists->pending)do { } while (0);
1644	return false0;
1645	}
1646	sentinel = i915_request_has_sentinel(rq);
1647
1648	/* Hold tightly onto the lock to prevent concurrent retires! */
1649	if (!spin_trylock_irqsave(&rq->lock, flags)({ (void)(flags); mtx_enter_try(&rq->lock) ? 1 : 0; }))
1650	continue;
1651
1652	if (i915_request_completed(rq))
1653	goto unlock;
1654
1655	if (i915_active_is_idle(&ce->active) &&
1656	!intel_context_is_barrier(ce)) {
1657	GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n",do { } while (0)
1658	engine->name,do { } while (0)
1659	ce->timeline->fence_context,do { } while (0)
1660	port - execlists->pending)do { } while (0);
1661	ok = false0;
1662	goto unlock;
1663	}
1664
1665	if (!i915_vma_is_pinned(ce->state)) {
1666	GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n",do { } while (0)
1667	engine->name,do { } while (0)
1668	ce->timeline->fence_context,do { } while (0)
1669	port - execlists->pending)do { } while (0);
1670	ok = false0;
1671	goto unlock;
1672	}
1673
1674	if (!i915_vma_is_pinned(ce->ring->vma)) {
1675	GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n",do { } while (0)
1676	engine->name,do { } while (0)
1677	ce->timeline->fence_context,do { } while (0)
1678	port - execlists->pending)do { } while (0);
1679	ok = false0;
1680	goto unlock;
1681	}
1682
1683	unlock:
1684	spin_unlock_irqrestore(&rq->lock, flags)do { (void)(flags); mtx_leave(&rq->lock); } while (0);
1685	if (!ok)
1686	return false0;
1687	}
1688
1689	return ce;
1690	}
1691
1692	static void execlists_submit_ports(struct intel_engine_cs *engine)
1693	{
1694	struct intel_engine_execlists *execlists = &engine->execlists;
1695	unsigned int n;
1696
1697	GEM_BUG_ON(!assert_pending_valid(execlists, "submit"))((void)0);
1698
1699	/*
1700	* We can skip acquiring intel_runtime_pm_get() here as it was taken
1701	* on our behalf by the request (see i915_gem_mark_busy()) and it will
1702	* not be relinquished until the device is idle (see
1703	* i915_gem_idle_work_handler()). As a precaution, we make sure
1704	* that all ELSP are drained i.e. we have processed the CSB,
1705	* before allowing ourselves to idle and calling intel_runtime_pm_put().
1706	*/
1707	GEM_BUG_ON(!intel_engine_pm_is_awake(engine))((void)0);
1708
1709	/*
1710	* ELSQ note: the submit queue is not cleared after being submitted
1711	* to the HW so we need to make sure we always clean it up. This is
1712	* currently ensured by the fact that we always write the same number
1713	* of elsq entries, keep this in mind before changing the loop below.
1714	*/
1715	for (n = execlists_num_ports(execlists); n--; ) {
1716	struct i915_request *rq = execlists->pending[n];
1717
1718	write_desc(execlists,
1719	rq ? execlists_update_context(rq) : 0,
1720	n);
1721	}
1722
1723	/* we need to manually load the submit queue */
1724	if (execlists->ctrl_reg)
1725	writel(EL_CTRL_LOAD, execlists->ctrl_reg)iowrite32((1 << 0), execlists->ctrl_reg);
1726	}
1727
1728	static bool_Bool ctx_single_port_submission(const struct intel_context *ce)
1729	{
1730	return (IS_ENABLED(CONFIG_DRM_I915_GVT)0 &&
1731	intel_context_force_single_submission(ce));
1732	}
1733
1734	static bool_Bool can_merge_ctx(const struct intel_context *prev,
1735	const struct intel_context *next)
1736	{
1737	if (prev != next)
1738	return false0;
1739
1740	if (ctx_single_port_submission(prev))
1741	return false0;
1742
1743	return true1;
1744	}
1745
1746	static unsigned long i915_request_flags(const struct i915_request *rq)
1747	{
1748	return READ_ONCE(rq->fence.flags)({ typeof(rq->fence.flags) __tmp = (volatile typeof(rq-> fence.flags) )&(rq->fence.flags); membar_datadep_consumer (); __tmp; });
1749	}
1750
1751	static bool_Bool can_merge_rq(const struct i915_request *prev,
1752	const struct i915_request *next)
1753	{
1754	GEM_BUG_ON(prev == next)((void)0);
1755	GEM_BUG_ON(!assert_priority_queue(prev, next))((void)0);
1756
1757	/*
1758	* We do not submit known completed requests. Therefore if the next
1759	* request is already completed, we can pretend to merge it in
1760	* with the previous context (and we will skip updating the ELSP
1761	* and tracking). Thus hopefully keeping the ELSP full with active
1762	* contexts, despite the best efforts of preempt-to-busy to confuse
1763	* us.
1764	*/
1765	if (i915_request_completed(next))
1766	return true1;
1767
1768	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &__builtin_expect(!!((i915_request_flags(prev) ^ i915_request_flags (next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) \| ( 1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
1769	(BIT(I915_FENCE_FLAG_NOPREEMPT) \|__builtin_expect(!!((i915_request_flags(prev) ^ i915_request_flags (next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) \| ( 1UL << (I915_FENCE_FLAG_SENTINEL)))), 0)
1770	BIT(I915_FENCE_FLAG_SENTINEL)))__builtin_expect(!!((i915_request_flags(prev) ^ i915_request_flags (next)) & ((1UL << (I915_FENCE_FLAG_NOPREEMPT)) \| ( 1UL << (I915_FENCE_FLAG_SENTINEL)))), 0))
1771	return false0;
1772
1773	if (!can_merge_ctx(prev->context, next->context))
1774	return false0;
1775
1776	GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno))((void)0);
1777	return true1;
1778	}
1779
1780	static void virtual_update_register_offsets(u32 *regs,
1781	struct intel_engine_cs *engine)
1782	{
1783	set_offsets(regs, reg_offsets(engine), engine, false0);
1784	}
1785
1786	static bool_Bool virtual_matches(const struct virtual_engine *ve,
1787	const struct i915_request *rq,
1788	const struct intel_engine_cs *engine)
1789	{
1790	const struct intel_engine_cs *inflight;
1791
1792	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
1793	return false0;
1794
1795	/*
1796	* We track when the HW has completed saving the context image
1797	* (i.e. when we have seen the final CS event switching out of
1798	* the context) and must not overwrite the context image before
1799	* then. This restricts us to only using the active engine
1800	* while the previous virtualized request is inflight (so
1801	* we reuse the register offsets). This is a very small
1802	* hystersis on the greedy seelction algorithm.
1803	*/
1804	inflight = intel_context_inflight(&ve->context)({ unsigned long __v = (unsigned long)(({ typeof((&ve-> context)->inflight) __tmp = (volatile typeof((&ve-> context)->inflight) )&((&ve->context)->inflight ); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof(( &ve->context)->inflight) __tmp = (volatile typeof( (&ve->context)->inflight) )&((&ve->context )->inflight); membar_datadep_consumer(); __tmp; })))(__v & -(1UL << (2))); });
1805	if (inflight && inflight != engine)
1806	return false0;
1807
1808	return true1;
1809	}
1810
1811	static void virtual_xfer_context(struct virtual_engine *ve,
1812	struct intel_engine_cs *engine)
1813	{
1814	unsigned int n;
1815
1816	if (likely(engine == ve->siblings[0])__builtin_expect(!!(engine == ve->siblings[0]), 1))
1817	return;
1818
1819	GEM_BUG_ON(READ_ONCE(ve->context.inflight))((void)0);
1820	if (!intel_engine_has_relative_mmio(engine))
1821	virtual_update_register_offsets(ve->context.lrc_reg_state,
1822	engine);
1823
1824	/*
1825	* Move the bound engine to the top of the list for
1826	* future execution. We then kick this tasklet first
1827	* before checking others, so that we preferentially
1828	* reuse this set of bound registers.
1829	*/
1830	for (n = 1; n < ve->num_siblings; n++) {
1831	if (ve->siblings[n] == engine) {
1832	swap(ve->siblings[n], ve->siblings[0])do { __typeof(ve->siblings[n]) __tmp = (ve->siblings[n] ); (ve->siblings[n]) = (ve->siblings[0]); (ve->siblings [0]) = __tmp; } while(0);
1833	break;
1834	}
1835	}
1836	}
1837
1838	#define for_each_waiter(p__, rq__)for (p__ = ({ const __typeof( ((__typeof(p__) )0)->wait_link ) __mptr = ((&(rq__)->sched.waiters_list)->next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}); &p__->wait_link != (&(rq__ )->sched.waiters_list); p__ = ({ const __typeof( ((__typeof (p__) )0)->wait_link ) __mptr = (p__->wait_link.next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );})) \
1839	list_for_each_entry_lockless(p__, \for (p__ = ({ const __typeof( ((__typeof(p__) )0)->wait_link ) __mptr = ((&(rq__)->sched.waiters_list)->next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}); &p__->wait_link != (&(rq__ )->sched.waiters_list); p__ = ({ const __typeof( ((__typeof (p__) )0)->wait_link ) __mptr = (p__->wait_link.next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}))
1840	&(rq__)->sched.waiters_list, \for (p__ = ({ const __typeof( ((__typeof(p__) )0)->wait_link ) __mptr = ((&(rq__)->sched.waiters_list)->next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}); &p__->wait_link != (&(rq__ )->sched.waiters_list); p__ = ({ const __typeof( ((__typeof (p__) )0)->wait_link ) __mptr = (p__->wait_link.next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}))
1841	wait_link)for (p__ = ({ const __typeof( ((__typeof(p__) )0)->wait_link ) __mptr = ((&(rq__)->sched.waiters_list)->next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}); &p__->wait_link != (&(rq__ )->sched.waiters_list); p__ = ({ const __typeof( ((__typeof (p__) )0)->wait_link ) __mptr = (p__->wait_link.next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), wait_link) );}))
1842
1843	#define for_each_signaler(p__, rq__)for (p__ = ({ const __typeof( ((__typeof(p__) )0)->signal_link ) __mptr = ((&(rq__)->sched.signalers_list)->next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), signal_link) );}); &p__->signal_link != (& (rq__)->sched.signalers_list); p__ = ({ const __typeof( (( __typeof(p__) )0)->signal_link ) __mptr = (p__->signal_link .next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof (__typeof(p__), signal_link) );})) \
1844	list_for_each_entry_rcu(p__, \for (p__ = ({ const __typeof( ((__typeof(p__) )0)->signal_link ) __mptr = ((&(rq__)->sched.signalers_list)->next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), signal_link) );}); &p__->signal_link != (& (rq__)->sched.signalers_list); p__ = ({ const __typeof( (( __typeof(p__) )0)->signal_link ) __mptr = (p__->signal_link .next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof (__typeof(p__), signal_link) );}))
1845	&(rq__)->sched.signalers_list, \for (p__ = ({ const __typeof( ((__typeof(p__) )0)->signal_link ) __mptr = ((&(rq__)->sched.signalers_list)->next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), signal_link) );}); &p__->signal_link != (& (rq__)->sched.signalers_list); p__ = ({ const __typeof( (( __typeof(p__) )0)->signal_link ) __mptr = (p__->signal_link .next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof (__typeof(p__), signal_link) );}))
1846	signal_link)for (p__ = ({ const __typeof( ((__typeof(p__) )0)->signal_link ) __mptr = ((&(rq__)->sched.signalers_list)->next ); (__typeof(p__) )( (char )__mptr - __builtin_offsetof(__typeof (p__), signal_link) );}); &p__->signal_link != (& (rq__)->sched.signalers_list); p__ = ({ const __typeof( (( __typeof(p__) )0)->signal_link ) __mptr = (p__->signal_link .next); (__typeof(p__) )( (char )__mptr - __builtin_offsetof (__typeof(p__), signal_link) );}))
1847
1848	static void defer_request(struct i915_request rq, struct list_head const pl)
1849	{
1850	DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
1851
1852	/*
1853	* We want to move the interrupted request to the back of
1854	* the round-robin list (i.e. its priority level), but
1855	* in doing so, we must then move all requests that were in
1856	* flight and were waiting for the interrupted request to
1857	* be run after it again.
1858	*/
1859	do {
1860	struct i915_dependency *p;
1861
1862	GEM_BUG_ON(i915_request_is_active(rq))((void)0);
1863	list_move_tail(&rq->sched.link, pl);
1864
1865	for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(p) )0)->wait_link ) __mptr = ((&(rq)->sched.waiters_list)->next); ( __typeof(p) )( (char )__mptr - __builtin_offsetof(__typeof (p), wait_link) );}); &p->wait_link != (&(rq)-> sched.waiters_list); p = ({ const __typeof( ((__typeof(p) ) 0)->wait_link ) __mptr = (p->wait_link.next); (__typeof (p) )( (char )__mptr - __builtin_offsetof(__typeof(p), wait_link ) );})) {
1866	struct i915_request *w =
1867	container_of(p->waiter, typeof(w), sched)({ const __typeof( ((typeof(w) )0)->sched ) __mptr = (p ->waiter); (typeof(w) )( (char )__mptr - __builtin_offsetof (typeof(w), sched) );});
1868
1869	if (p->flags & I915_DEPENDENCY_WEAK(1UL << (2)))
1870	continue;
1871
1872	/* Leave semaphores spinning on the other engines */
1873	if (w->engine != rq->engine)
1874	continue;
1875
1876	/* No waiter should start before its signaler */
1877	GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) &&((void)0)
1878	i915_request_started(w) &&((void)0)
1879	!i915_request_completed(rq))((void)0);
1880
1881	GEM_BUG_ON(i915_request_is_active(w))((void)0);
1882	if (!i915_request_is_ready(w))
1883	continue;
1884
1885	if (rq_prio(w) < rq_prio(rq))
1886	continue;
1887
1888	GEM_BUG_ON(rq_prio(w) > rq_prio(rq))((void)0);
1889	list_move_tail(&w->sched.link, &list);
1890	}
1891
1892	rq = list_first_entry_or_null(&list, typeof(rq), sched.link)(list_empty(&list) ? ((void )0) : ({ const __typeof( ((typeof (rq) )0)->sched.link ) __mptr = ((&list)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof( *rq), sched.link) );}));
1893	} while (rq);
1894	}
1895
1896	static void defer_active(struct intel_engine_cs *engine)
1897	{
1898	struct i915_request *rq;
1899
1900	rq = __unwind_incomplete_requests(engine);
1901	if (!rq)
1902	return;
1903
1904	defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
1905	}
1906
1907	static bool_Bool
1908	need_timeslice(const struct intel_engine_cs *engine,
1909	const struct i915_request *rq,
1910	const struct rb_node *rb)
1911	{
1912	int hint;
1913
1914	if (!intel_engine_has_timeslices(engine))
1915	return false0;
1916
1917	hint = engine->execlists.queue_priority_hint;
1918
1919	if (rb) {
1920	const struct virtual_engine *ve =
1921	rb_entry(rb, typeof(ve), nodes[engine->id].rb)({ const __typeof( ((typeof(ve) )0)->nodes[engine->id ].rb ) __mptr = (rb); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), nodes[engine->id].rb) );});
1922	const struct intel_engine_cs *inflight =
1923	intel_context_inflight(&ve->context)({ unsigned long __v = (unsigned long)(({ typeof((&ve-> context)->inflight) __tmp = (volatile typeof((&ve-> context)->inflight) )&((&ve->context)->inflight ); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof(( &ve->context)->inflight) __tmp = (volatile typeof( (&ve->context)->inflight) )&((&ve->context )->inflight); membar_datadep_consumer(); __tmp; })))(__v & -(1UL << (2))); });
1924
1925	if (!inflight \|\| inflight == engine) {
1926	struct i915_request *next;
1927
1928	rcu_read_lock();
1929	next = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) )&(ve->request); membar_datadep_consumer(); __tmp; } );
1930	if (next)
1931	hint = max(hint, rq_prio(next))(((hint)>(rq_prio(next)))?(hint):(rq_prio(next)));
1932	rcu_read_unlock();
1933	}
1934	}
1935
1936	if (!list_is_last(&rq->sched.link, &engine->active.requests))
1937	hint = max(hint, rq_prio(list_next_entry(rq, sched.link)))(((hint)>(rq_prio(({ const __typeof( ((typeof((rq)) )0)-> sched.link ) __mptr = (((rq)->sched.link.next)); (typeof( (rq)) )( (char )__mptr - __builtin_offsetof(typeof((rq)), sched.link) );}))))?(hint):(rq_prio(({ const __typeof( ((typeof ((rq)) )0)->sched.link ) __mptr = (((rq)->sched.link .next)); (typeof((rq)) )( (char )__mptr - __builtin_offsetof (typeof((rq)), sched.link) );}))));
1938
1939	GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE)((void)0);
1940	return hint >= effective_prio(rq);
1941	}
1942
1943	static bool_Bool
1944	timeslice_yield(const struct intel_engine_execlists *el,
1945	const struct i915_request *rq)
1946	{
1947	/*
1948	* Once bitten, forever smitten!
1949	*
1950	* If the active context ever busy-waited on a semaphore,
1951	* it will be treated as a hog until the end of its timeslice (i.e.
1952	* until it is scheduled out and replaced by a new submission,
1953	* possibly even its own lite-restore). The HW only sends an interrupt
1954	* on the first miss, and we do know if that semaphore has been
1955	* signaled, or even if it is now stuck on another semaphore. Play
1956	* safe, yield if it might be stuck -- it will be given a fresh
1957	* timeslice in the near future.
1958	*/
1959	return rq->context->lrc.ccid == READ_ONCE(el->yield)({ typeof(el->yield) __tmp = (volatile typeof(el->yield ) )&(el->yield); membar_datadep_consumer(); __tmp; });
1960	}
1961
1962	static bool_Bool
1963	timeslice_expired(const struct intel_engine_execlists *el,
1964	const struct i915_request *rq)
1965	{
1966	return timer_expired(&el->timer) \|\| timeslice_yield(el, rq);
1967	}
1968
1969	static int
1970	switch_prio(struct intel_engine_cs engine, const struct i915_request rq)
1971	{
1972	if (list_is_last(&rq->sched.link, &engine->active.requests))
1973	return engine->execlists.queue_priority_hint;
1974
1975	return rq_prio(list_next_entry(rq, sched.link)({ const __typeof( ((typeof((rq)) )0)->sched.link ) __mptr = (((rq)->sched.link.next)); (typeof((rq)) )( (char )__mptr - __builtin_offsetof(typeof(*(rq)), sched.link) );}));
1976	}
1977
1978	static inline unsigned long
1979	timeslice(const struct intel_engine_cs *engine)
1980	{
1981	return READ_ONCE(engine->props.timeslice_duration_ms)({ typeof(engine->props.timeslice_duration_ms) __tmp = (volatile typeof(engine->props.timeslice_duration_ms) )&(engine ->props.timeslice_duration_ms); membar_datadep_consumer(); __tmp; });
1982	}
1983
1984	static unsigned long active_timeslice(const struct intel_engine_cs *engine)
1985	{
1986	const struct intel_engine_execlists *execlists = &engine->execlists;
1987	const struct i915_request rq = execlists->active;
1988
1989	if (!rq \|\| i915_request_completed(rq))
1990	return 0;
1991
1992	if (READ_ONCE(execlists->switch_priority_hint)({ typeof(execlists->switch_priority_hint) __tmp = (volatile typeof(execlists->switch_priority_hint) )&(execlists ->switch_priority_hint); membar_datadep_consumer(); __tmp; }) < effective_prio(rq))
1993	return 0;
1994
1995	return timeslice(engine);
1996	}
1997
1998	static void set_timeslice(struct intel_engine_cs *engine)
1999	{
2000	unsigned long duration;
2001
2002	if (!intel_engine_has_timeslices(engine))
2003	return;
2004
2005	duration = active_timeslice(engine);
2006	ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2007
2008	set_timer_ms(&engine->execlists.timer, duration);
2009	}
2010
2011	static void start_timeslice(struct intel_engine_cs *engine, int prio)
2012	{
2013	struct intel_engine_execlists *execlists = &engine->execlists;
2014	unsigned long duration;
2015
2016	if (!intel_engine_has_timeslices(engine))
2017	return;
2018
2019	WRITE_ONCE(execlists->switch_priority_hint, prio)({ typeof(execlists->switch_priority_hint) __tmp = (prio); (volatile typeof(execlists->switch_priority_hint) )& (execlists->switch_priority_hint) = __tmp; __tmp; });
2020	if (prio == INT_MIN(-0x7fffffff-1))
2021	return;
2022
2023	if (timer_pending(&execlists->timer)(((&execlists->timer))->to_flags & 0x02))
2024	return;
2025
2026	duration = timeslice(engine);
2027	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2028	"start timeslicing, prio:%d, interval:%lu",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2029	prio, duration)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2030
2031	set_timer_ms(&execlists->timer, duration);
2032	}
2033
2034	static void record_preemption(struct intel_engine_execlists *execlists)
2035	{
2036	(void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++)0;
2037	}
2038
2039	static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
2040	const struct i915_request *rq)
2041	{
2042	if (!rq)
2043	return 0;
2044
2045	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
2046	if (unlikely(intel_context_is_banned(rq->context))__builtin_expect(!!(intel_context_is_banned(rq->context)), 0))
2047	return 1;
2048
2049	return READ_ONCE(engine->props.preempt_timeout_ms)({ typeof(engine->props.preempt_timeout_ms) __tmp = (volatile typeof(engine->props.preempt_timeout_ms) )&(engine-> props.preempt_timeout_ms); membar_datadep_consumer(); __tmp; } );
2050	}
2051
2052	static void set_preempt_timeout(struct intel_engine_cs *engine,
2053	const struct i915_request *rq)
2054	{
2055	if (!intel_engine_has_preempt_reset(engine))
2056	return;
2057
2058	set_timer_ms(&engine->execlists.preempt,
2059	active_preempt_timeout(engine, rq));
2060	}
2061
2062	static inline void clear_ports(struct i915_request **ports, int count)
2063	{
2064	memset_p((void *)ports, NULL((void )0), count);
2065	}
2066
2067	static inline void
2068	copy_ports(struct i915_request dst, struct i915_request src, int count)
2069	{
2070	/* A memcpy_p() would be very useful here! */
2071	while (count--)
2072	WRITE_ONCE(dst++, src++)({ typeof(dst++) __tmp = (src++); (volatile typeof(dst++) )&(dst++) = __tmp; __tmp; }); /* avoid write tearing */
2073	}
2074
2075	static void execlists_dequeue(struct intel_engine_cs *engine)
2076	{
2077	struct intel_engine_execlists * const execlists = &engine->execlists;
2078	struct i915_request **port = execlists->pending;
2079	struct i915_request ** const last_port = port + execlists->port_mask;
2080	struct i915_request * const *active;
2081	struct i915_request *last;
2082	struct rb_node *rb;
2083	bool_Bool submit = false0;
2084
2085	/*
2086	* Hardware submission is through 2 ports. Conceptually each port
2087	* has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
2088	* static for a context, and unique to each, so we only execute
2089	* requests belonging to a single context from each ring. RING_HEAD
2090	* is maintained by the CS in the context image, it marks the place
2091	* where it got up to last time, and through RING_TAIL we tell the CS
2092	* where we want to execute up to this time.
2093	*
2094	* In this list the requests are in order of execution. Consecutive
2095	* requests from the same context are adjacent in the ringbuffer. We
2096	* can combine these requests into a single RING_TAIL update:
2097	*
2098	* RING_HEAD...req1...req2
2099	* ^- RING_TAIL
2100	* since to execute req2 the CS must first execute req1.
2101	*
2102	* Our goal then is to point each port to the end of a consecutive
2103	* sequence of requests as being the most optimal (fewest wake ups
2104	* and context switches) submission.
2105	*/
2106
2107	for (rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->virtual)->rb_root), -1); rb; ) {
2108	struct virtual_engine *ve =
2109	rb_entry(rb, typeof(ve), nodes[engine->id].rb)({ const __typeof( ((typeof(ve) )0)->nodes[engine->id ].rb ) __mptr = (rb); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), nodes[engine->id].rb) );});
2110	struct i915_request rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) *)&(ve->request); membar_datadep_consumer(); __tmp; } );
2111
2112	if (!rq) { /* lazily cleanup after another engine handled rq */
2113	rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists ->virtual)->rb_root), (rb));
2114	RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
2115	rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->virtual)->rb_root), -1);
2116	continue;
2117	}
2118
2119	if (!virtual_matches(ve, rq, engine)) {
2120	rb = rb_next(rb)linux_root_RB_NEXT((rb));
2121	continue;
2122	}
2123
2124	break;
2125	}
2126
2127	/*
2128	* If the queue is higher priority than the last
2129	* request in the currently active context, submit afresh.
2130	* We will resubmit again afterwards in case we need to split
2131	* the active context to interject the preemption request,
2132	* i.e. we will retrigger preemption following the ack in case
2133	* of trouble.
2134	*/
2135	active = READ_ONCE(execlists->active)({ typeof(execlists->active) __tmp = (volatile typeof(execlists ->active) )&(execlists->active); membar_datadep_consumer (); __tmp; });
2136
2137	/*
2138	* In theory we can skip over completed contexts that have not
2139	* yet been processed by events (as those events are in flight):
2140	*
2141	* while ((last = *active) && i915_request_completed(last))
2142	* active++;
2143	*
2144	* However, the GPU cannot handle this as it will ultimately
2145	* find itself trying to jump back into a context it has just
2146	* completed and barf.
2147	*/
2148
2149	if ((last = *active)) {
2150	if (need_preempt(engine, last, rb)) {
2151	if (i915_request_completed(last)) {
2152	tasklet_hi_schedule(&execlists->tasklet);
2153	return;
2154	}
2155
2156	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2157	"preempting last=%llx:%lld, prio=%d, hint=%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2158	last->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2159	last->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2160	last->sched.attr.priority,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2161	execlists->queue_priority_hint)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2162	record_preemption(execlists);
2163
2164	/*
2165	* Don't let the RING_HEAD advance past the breadcrumb
2166	* as we unwind (and until we resubmit) so that we do
2167	* not accidentally tell it to go backwards.
2168	*/
2169	ring_set_paused(engine, 1);
2170
2171	/*
2172	* Note that we have not stopped the GPU at this point,
2173	* so we are unwinding the incomplete requests as they
2174	* remain inflight and so by the time we do complete
2175	* the preemption, some of the unwound requests may
2176	* complete!
2177	*/
2178	__unwind_incomplete_requests(engine);
2179
2180	last = NULL((void *)0);
2181	} else if (need_timeslice(engine, last, rb) &&
2182	timeslice_expired(execlists, last)) {
2183	if (i915_request_completed(last)) {
2184	tasklet_hi_schedule(&execlists->tasklet);
2185	return;
2186	}
2187
2188	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2189	"expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2190	last->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2191	last->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2192	last->sched.attr.priority,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2193	execlists->queue_priority_hint,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2194	yesno(timeslice_yield(execlists, last)))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2195
2196	ring_set_paused(engine, 1);
2197	defer_active(engine);
2198
2199	/*
2200	* Unlike for preemption, if we rewind and continue
2201	* executing the same context as previously active,
2202	* the order of execution will remain the same and
2203	* the tail will only advance. We do not need to
2204	* force a full context restore, as a lite-restore
2205	* is sufficient to resample the monotonic TAIL.
2206	*
2207	* If we switch to any other context, similarly we
2208	* will not rewind TAIL of current context, and
2209	* normal save/restore will preserve state and allow
2210	* us to later continue executing the same request.
2211	*/
2212	last = NULL((void *)0);
2213	} else {
2214	/*
2215	* Otherwise if we already have a request pending
2216	* for execution after the current one, we can
2217	* just wait until the next CS event before
2218	* queuing more. In either case we will force a
2219	* lite-restore preemption event, but if we wait
2220	* we hopefully coalesce several updates into a single
2221	* submission.
2222	*/
2223	if (!list_is_last(&last->sched.link,
2224	&engine->active.requests)) {
2225	/*
2226	* Even if ELSP[1] is occupied and not worthy
2227	* of timeslices, our queue might be.
2228	*/
2229	start_timeslice(engine, queue_prio(execlists));
2230	return;
2231	}
2232	}
2233	}
2234
2235	while (rb) { /* XXX virtual is always taking precedence */
2236	struct virtual_engine *ve =
2237	rb_entry(rb, typeof(ve), nodes[engine->id].rb)({ const __typeof( ((typeof(ve) )0)->nodes[engine->id ].rb ) __mptr = (rb); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), nodes[engine->id].rb) );});
2238	struct i915_request *rq;
2239
2240	spin_lock(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
2241
2242	rq = ve->request;
2243	if (unlikely(!rq)__builtin_expect(!!(!rq), 0)) { /* lost the race to a sibling */
2244	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2245	rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists ->virtual)->rb_root), (rb));
2246	RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
2247	rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->virtual)->rb_root), -1);
2248	continue;
2249	}
2250
2251	GEM_BUG_ON(rq != ve->request)((void)0);
2252	GEM_BUG_ON(rq->engine != &ve->base)((void)0);
2253	GEM_BUG_ON(rq->context != &ve->context)((void)0);
2254
2255	if (rq_prio(rq) >= queue_prio(execlists)) {
2256	if (!virtual_matches(ve, rq, engine)) {
2257	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2258	rb = rb_next(rb)linux_root_RB_NEXT((rb));
2259	continue;
2260	}
2261
2262	if (last && !can_merge_rq(last, rq)) {
2263	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2264	start_timeslice(engine, rq_prio(rq));
2265	return; /* leave this for another sibling */
2266	}
2267
2268	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2269	"virtual rq=%llx:%lld%s, new engine? %s\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2270	rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2271	rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2272	i915_request_completed(rq) ? "!" :do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2273	i915_request_started(rq) ? "" :do { const struct intel_engine_cs e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2274	"",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2275	yesno(engine != ve->siblings[0]))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2276
2277	WRITE_ONCE(ve->request, NULL)({ typeof(ve->request) __tmp = (((void )0)); (volatile typeof (ve->request) *)&(ve->request) = __tmp; __tmp; });
2278	WRITE_ONCE(ve->base.execlists.queue_priority_hint,({ typeof(ve->base.execlists.queue_priority_hint) __tmp = ( (-0x7fffffff-1)); (volatile typeof(ve->base.execlists.queue_priority_hint ) )&(ve->base.execlists.queue_priority_hint) = __tmp; __tmp; })
2279	INT_MIN)({ typeof(ve->base.execlists.queue_priority_hint) __tmp = ( (-0x7fffffff-1)); (volatile typeof(ve->base.execlists.queue_priority_hint ) )&(ve->base.execlists.queue_priority_hint) = __tmp; __tmp; });
2280	rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists ->virtual)->rb_root), (rb));
2281	RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
2282
2283	GEM_BUG_ON(!(rq->execution_mask & engine->mask))((void)0);
2284	WRITE_ONCE(rq->engine, engine)({ typeof(rq->engine) __tmp = (engine); (volatile typeof( rq->engine) )&(rq->engine) = __tmp; __tmp; });
2285
2286	if (__i915_request_submit(rq)) {
2287	/*
2288	* Only after we confirm that we will submit
2289	* this request (i.e. it has not already
2290	* completed), do we want to update the context.
2291	*
2292	* This serves two purposes. It avoids
2293	* unnecessary work if we are resubmitting an
2294	* already completed request after timeslicing.
2295	* But more importantly, it prevents us altering
2296	* ve->siblings[] on an idle context, where
2297	* we may be using ve->siblings[] in
2298	* virtual_context_enter / virtual_context_exit.
2299	*/
2300	virtual_xfer_context(ve, engine);
2301	GEM_BUG_ON(ve->siblings[0] != engine)((void)0);
2302
2303	submit = true1;
2304	last = rq;
2305	}
2306	i915_request_put(rq);
2307
2308	/*
2309	* Hmm, we have a bunch of virtual engine requests,
2310	* but the first one was already completed (thanks
2311	* preempt-to-busy!). Keep looking at the veng queue
2312	* until we have no more relevant requests (i.e.
2313	* the normal submit queue has higher priority).
2314	*/
2315	if (!submit) {
2316	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2317	rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->virtual)->rb_root), -1);
2318	continue;
2319	}
2320	}
2321
2322	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2323	break;
2324	}
2325
2326	while ((rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->queue)->rb_root), -1))) {
2327	struct i915_priolist *p = to_priolist(rb);
2328	struct i915_request rq, rn;
2329	int i;
2330
2331	priolist_for_each_request_consume(rq, rn, p, i)for (; (p)->used ? (i = __builtin_ctzl((p)->used)), 1 : 0; (p)->used &= ~(1UL << (i))) for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = (( &(p)->requests[i])->next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched.link) );} ), rn = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = (rq->sched.link.next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched.link) );} ); &rq->sched.link != (&(p)->requests[i]); rq = rn, rn = ({ const __typeof( ((__typeof(rn) )0)->sched.link ) __mptr = (rn->sched.link.next); (__typeof(rn) )( (char )__mptr - __builtin_offsetof(__typeof(*rn), sched.link) );} )) {
2332	bool_Bool merge = true1;
2333
2334	/*
2335	* Can we combine this request with the current port?
2336	* It has to be the same context/ringbuffer and not
2337	* have any exceptions (e.g. GVT saying never to
2338	* combine contexts).
2339	*
2340	* If we can combine the requests, we can execute both
2341	* by updating the RING_TAIL to point to the end of the
2342	* second request, and so we never need to tell the
2343	* hardware about the first.
2344	*/
2345	if (last && !can_merge_rq(last, rq)) {
2346	/*
2347	* If we are on the second port and cannot
2348	* combine this request with the last, then we
2349	* are done.
2350	*/
2351	if (port == last_port)
2352	goto done;
2353
2354	/*
2355	* We must not populate both ELSP[] with the
2356	* same LRCA, i.e. we must submit 2 different
2357	* contexts if we submit 2 ELSP.
2358	*/
2359	if (last->context == rq->context)
2360	goto done;
2361
2362	if (i915_request_has_sentinel(last))
2363	goto done;
2364
2365	/*
2366	* If GVT overrides us we only ever submit
2367	* port[0], leaving port[1] empty. Note that we
2368	* also have to be careful that we don't queue
2369	* the same context (even though a different
2370	* request) to the second port.
2371	*/
2372	if (ctx_single_port_submission(last->context) \|\|
2373	ctx_single_port_submission(rq->context))
2374	goto done;
2375
2376	merge = false0;
2377	}
2378
2379	if (__i915_request_submit(rq)) {
2380	if (!merge) {
2381	*port = execlists_schedule_in(last, port - execlists->pending);
2382	port++;
2383	last = NULL((void *)0);
2384	}
2385
2386	GEM_BUG_ON(last &&((void)0)
2387	!can_merge_ctx(last->context,((void)0)
2388	rq->context))((void)0);
2389	GEM_BUG_ON(last &&((void)0)
2390	i915_seqno_passed(last->fence.seqno,((void)0)
2391	rq->fence.seqno))((void)0);
2392
2393	submit = true1;
2394	last = rq;
2395	}
2396	}
2397
2398	rb_erase_cached(&p->node, &execlists->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists ->queue)->rb_root), (&p->node));
2399	i915_priolist_free(p);
2400	}
2401
2402	done:
2403	/*
2404	* Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
2405	*
2406	* We choose the priority hint such that if we add a request of greater
2407	* priority than this, we kick the submission tasklet to decide on
2408	* the right order of submitting the requests to hardware. We must
2409	* also be prepared to reorder requests as they are in-flight on the
2410	* HW. We derive the priority hint then as the first "hole" in
2411	* the HW submission ports and if there are no available slots,
2412	* the priority of the lowest executing request, i.e. last.
2413	*
2414	* When we do receive a higher priority request ready to run from the
2415	* user, see queue_request(), the priority hint is bumped to that
2416	* request triggering preemption on the next dequeue (or subsequent
2417	* interrupt for secondary ports).
2418	*/
2419	execlists->queue_priority_hint = queue_prio(execlists);
2420
2421	if (submit) {
2422	*port = execlists_schedule_in(last, port - execlists->pending);
2423	execlists->switch_priority_hint =
2424	switch_prio(engine, *execlists->pending);
2425
2426	/*
2427	* Skip if we ended up with exactly the same set of requests,
2428	* e.g. trying to timeslice a pair of ordered contexts
2429	*/
2430	if (!memcmp(active, execlists->pending,__builtin_memcmp((active), (execlists->pending), ((port - execlists ->pending + 1) * sizeof(*port)))
2431	(port - execlists->pending + 1) * sizeof(port))__builtin_memcmp((active), (execlists->pending), ((port - execlists ->pending + 1) sizeof(*port)))) {
2432	do
2433	execlists_schedule_out(fetch_and_zero(port)({ typeof(port) __T = (port); (port) = (typeof(port))0; __T ; }));
2434	while (port-- != execlists->pending);
2435
2436	goto skip_submit;
2437	}
2438	clear_ports(port + 1, last_port - port);
2439
2440	WRITE_ONCE(execlists->yield, -1)({ typeof(execlists->yield) __tmp = (-1); (volatile typeof (execlists->yield) )&(execlists->yield) = __tmp; __tmp ; });
2441	set_preempt_timeout(engine, *active);
2442	execlists_submit_ports(engine);
2443	} else {
2444	start_timeslice(engine, execlists->queue_priority_hint);
2445	skip_submit:
2446	ring_set_paused(engine, 0);
2447	}
2448	}
2449
2450	static void
2451	cancel_port_requests(struct intel_engine_execlists * const execlists)
2452	{
2453	struct i915_request * const *port;
2454
2455	for (port = execlists->pending; *port; port++)
2456	execlists_schedule_out(*port);
2457	clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)(sizeof((execlists->pending)) / sizeof((execlists->pending )[0])));
2458
2459	/* Mark the end of active before we overwrite active /
2460	for (port = xchg(&execlists->active, execlists->pending)__sync_lock_test_and_set(&execlists->active, execlists ->pending); *port; port++)
2461	execlists_schedule_out(*port);
2462	clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)(sizeof((execlists->inflight)) / sizeof((execlists->inflight )[0])));
2463
2464	smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* complete the seqlock for execlists_active() */
2465	WRITE_ONCE(execlists->active, execlists->inflight)({ typeof(execlists->active) __tmp = (execlists->inflight ); (volatile typeof(execlists->active) )&(execlists-> active) = __tmp; __tmp; });
2466	}
2467
2468	static inline void
2469	invalidate_csb_entries(const u64 first, const u64 last)
2470	{
2471	clflush((vaddr_t)first);
2472	clflush((vaddr_t)last);
2473	}
2474
2475	/*
2476	* Starting with Gen12, the status has a new format:
2477	*
2478	* bit 0: switched to new queue
2479	* bit 1: reserved
2480	* bit 2: semaphore wait mode (poll or signal), only valid when
2481	* switch detail is set to "wait on semaphore"
2482	* bits 3-5: engine class
2483	* bits 6-11: engine instance
2484	* bits 12-14: reserved
2485	* bits 15-25: sw context id of the lrc the GT switched to
2486	* bits 26-31: sw counter of the lrc the GT switched to
2487	* bits 32-35: context switch detail
2488	* - 0: ctx complete
2489	* - 1: wait on sync flip
2490	* - 2: wait on vblank
2491	* - 3: wait on scanline
2492	* - 4: wait on semaphore
2493	* - 5: context preempted (not on SEMAPHORE_WAIT or
2494	* WAIT_FOR_EVENT)
2495	* bit 36: reserved
2496	* bits 37-43: wait detail (for switch detail 1 to 4)
2497	* bits 44-46: reserved
2498	* bits 47-57: sw context id of the lrc the GT switched away from
2499	* bits 58-63: sw counter of the lrc the GT switched away from
2500	*/
2501	static inline bool_Bool gen12_csb_parse(const u64 *csb)
2502	{
2503	bool_Bool ctx_away_valid;
2504	bool_Bool new_queue;
2505	u64 entry;
2506
2507	/* HSD#22011248461 */
2508	entry = READ_ONCE(csb)({ typeof(csb) __tmp = (volatile typeof(csb) )&(csb) ; membar_datadep_consumer(); __tmp; });
2509	if (unlikely(entry == -1)__builtin_expect(!!(entry == -1), 0)) {
2510	preempt_disable();
2511	if (wait_for_atomic_us((entry = READ_ONCE(csb)) != -1, 50)({ extern char _ctassert[(!(!__builtin_constant_p(50))) ? 1 : -1 ] __attribute__((__unused__)); extern char _ctassert[(!(( 50) > 50000)) ? 1 : -1 ] __attribute__((__unused__)); ({ int cpu, ret, timeout = ((50)) 1000; u64 base; do { } while (0 ); if (!(1)) { ; cpu = (({struct cpu_info __ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_cpuid); } base = local_clock (); for (;;) { u64 now = local_clock(); if (!(1)) ; __asm volatile ("" : : : "memory"); if (((entry = ({ typeof(csb) __tmp = ( volatile typeof(csb) )&(csb); membar_datadep_consumer( ); __tmp; })) != -1)) { ret = 0; break; } if (now - base >= timeout) { ret = -60; break; } cpu_relax(); if (!(1)) { ; if (__builtin_expect(!!(cpu != (({struct cpu_info __ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_cpuid)), 0)) { timeout -= now - base; cpu = (({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self ))); __ci;})->ci_cpuid); base = local_clock(); } } } ret; } ); }))
2512	GEM_WARN_ON("50us CSB timeout")({ __builtin_expect(!!(!!("50us CSB timeout")), 0); });
2513	preempt_enable();
2514	}
2515	WRITE_ONCE((u64 )csb, -1)({ typeof((u64 )csb) __tmp = (-1); (volatile typeof((u64 * )csb) )&((u64 *)csb) = __tmp; __tmp; });
2516
2517	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry))(((typeof((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << (15)))))(((((u32)(((entry) >> 16) >> 16))) & ((((~0UL) >> (64 - (25) - 1)) & ((~0UL) << ( 15))))) >> (__builtin_ffsll((((~0UL) >> (64 - (25 ) - 1)) & ((~0UL) << (15)))) - 1))) != 0x7FF);
2518	new_queue =
2519	lower_32_bits(entry)((u32)(entry)) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE(0x1);
2520
2521	/*
2522	* The context switch detail is not guaranteed to be 5 when a preemption
2523	* occurs, so we can't just check for that. The check below works for
2524	* all the cases we care about, including preemptions of WAIT
2525	* instructions and lite-restore. Preempt-to-idle via the CTRL register
2526	* would require some extra handling, but we don't support that.
2527	*/
2528	if (!ctx_away_valid \|\| new_queue) {
2529	GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)))((void)0);
2530	return true1;
2531	}
2532
2533	/*
2534	* switch detail = 5 is covered by the case above and we do not expect a
2535	* context switch on an unsuccessful wait instruction since we always
2536	* use polling mode.
2537	*/
2538	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)))((void)0);
2539	return false0;
2540	}
2541
2542	static inline bool_Bool gen8_csb_parse(const u64 *csb)
2543	{
2544	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE(1 << 0) \| GEN8_CTX_STATUS_PREEMPTED(1 << 1));
2545	}
2546
2547	static void process_csb(struct intel_engine_cs *engine)
2548	{
2549	struct intel_engine_execlists * const execlists = &engine->execlists;
2550	const u64 * const buf = execlists->csb_status;
2551	const u8 num_entries = execlists->csb_size;
2552	u8 head, tail;
2553
2554	/*
2555	* As we modify our execlists state tracking we require exclusive
2556	* access. Either we are inside the tasklet, or the tasklet is disabled
2557	* and we assume that is only inside the reset paths and so serialised.
2558	*/
2559	GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&((void)0)
2560	!reset_in_progress(execlists))((void)0);
2561	GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine))((void)0);
2562
2563	/*
2564	* Note that csb_write, csb_status may be either in HWSP or mmio.
2565	* When reading from the csb_write mmio register, we have to be
2566	* careful to only use the GEN8_CSB_WRITE_PTR portion, which is
2567	* the low 4bits. As it happens we know the next 4bits are always
2568	* zero and so we can simply masked off the low u8 of the register
2569	* and treat it identically to reading from the HWSP (without having
2570	* to use explicit shifting and masking, and probably bifurcating
2571	* the code to handle the legacy mmio read).
2572	*/
2573	head = execlists->csb_head;
2574	tail = READ_ONCE(execlists->csb_write)({ typeof(execlists->csb_write) __tmp = (volatile typeof (execlists->csb_write) )&(execlists->csb_write); membar_datadep_consumer(); __tmp; });
2575	if (unlikely(head == tail)__builtin_expect(!!(head == tail), 0))
2576	return;
2577
2578	/*
2579	* We will consume all events from HW, or at least pretend to.
2580	*
2581	* The sequence of events from the HW is deterministic, and derived
2582	* from our writes to the ELSP, with a smidgen of variability for
2583	* the arrival of the asynchronous requests wrt to the inflight
2584	* execution. If the HW sends an event that does not correspond with
2585	* the one we are expecting, we have to abandon all hope as we lose
2586	* all tracking of what the engine is actually executing. We will
2587	* only detect we are out of sequence with the HW when we get an
2588	* 'impossible' event because we have already drained our own
2589	* preemption/promotion queue. If this occurs, we know that we likely
2590	* lost track of execution earlier and must unwind and restart, the
2591	* simplest way is by stop processing the event queue and force the
2592	* engine to reset.
2593	*/
2594	execlists->csb_head = tail;
2595	ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2596
2597	/*
2598	* Hopefully paired with a wmb() in HW!
2599	*
2600	* We must complete the read of the write pointer before any reads
2601	* from the CSB, so that we do not see stale values. Without an rmb
2602	* (lfence) the HW may speculatively perform the CSB[] reads before
2603	* we perform the READ_ONCE(*csb_write).
2604	*/
2605	rmb()do { __asm volatile("lfence" ::: "memory"); } while (0);
2606	do {
2607	bool_Bool promote;
2608
2609	if (++head == num_entries)
2610	head = 0;
2611
2612	/*
2613	* We are flying near dragons again.
2614	*
2615	* We hold a reference to the request in execlist_port[]
2616	* but no more than that. We are operating in softirq
2617	* context and so cannot hold any mutex or sleep. That
2618	* prevents us stopping the requests we are processing
2619	* in port[] from being retired simultaneously (the
2620	* breadcrumb will be complete before we see the
2621	* context-switch). As we only hold the reference to the
2622	* request, any pointer chasing underneath the request
2623	* is subject to a potential use-after-free. Thus we
2624	* store all of the bookkeeping within port[] as
2625	* required, and avoid using unguarded pointers beneath
2626	* request itself. The same applies to the atomic
2627	* status notifier.
2628	*/
2629
2630	ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2631	head,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2632	upper_32_bits(buf[head]),do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2633	lower_32_bits(buf[head]))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2634
2635	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
2636	promote = gen12_csb_parse(buf + head);
2637	else
2638	promote = gen8_csb_parse(buf + head);
2639	if (promote) {
2640	struct i915_request * const *old = execlists->active;
2641
2642	if (GEM_WARN_ON(!execlists->pending)({ __builtin_expect(!!(!!(!execlists->pending)), 0); })) {
2643	execlists->error_interrupt \|= ERROR_CSB(1UL << (31));
2644	break;
2645	}
2646
2647	ring_set_paused(engine, 0);
2648
2649	/* Point active to the new ELSP; prevent overwriting */
2650	WRITE_ONCE(execlists->active, execlists->pending)({ typeof(execlists->active) __tmp = (execlists->pending ); (volatile typeof(execlists->active) )&(execlists-> active) = __tmp; __tmp; });
2651	smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* notify execlists_active() */
2652
2653	/* cancel old inflight, prepare for switch */
2654	trace_ports(execlists, "preempted", old);
2655	while (*old)
2656	execlists_schedule_out(*old++);
2657
2658	/* switch pending to inflight */
2659	GEM_BUG_ON(!assert_pending_valid(execlists, "promote"))((void)0);
2660	copy_ports(execlists->inflight,
2661	execlists->pending,
2662	execlists_num_ports(execlists));
2663	smp_wmb()do { __asm volatile("" ::: "memory"); } while (0); /* complete the seqlock */
2664	WRITE_ONCE(execlists->active, execlists->inflight)({ typeof(execlists->active) __tmp = (execlists->inflight ); (volatile typeof(execlists->active) )&(execlists-> active) = __tmp; __tmp; });
2665
2666	/* XXX Magic delay for tgl */
2667	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x3a0) })));
2668
2669	WRITE_ONCE(execlists->pending[0], NULL)({ typeof(execlists->pending[0]) __tmp = (((void )0)); ( volatile typeof(execlists->pending[0]) *)&(execlists-> pending[0]) = __tmp; __tmp; });
2670	} else {
2671	if (GEM_WARN_ON(!execlists->active)({ __builtin_expect(!!(!!(!execlists->active)), 0); })) {
2672	execlists->error_interrupt \|= ERROR_CSB(1UL << (31));
2673	break;
2674	}
2675
2676	/* port0 completed, advanced to port1 */
2677	trace_ports(execlists, "completed", execlists->active);
2678
2679	/*
2680	* We rely on the hardware being strongly
2681	* ordered, that the breadcrumb write is
2682	* coherent (visible from the CPU) before the
2683	* user interrupt is processed. One might assume
2684	* that the breadcrumb write being before the
2685	* user interrupt and the CS event for the context
2686	* switch would therefore be before the CS event
2687	* itself...
2688	*/
2689	if (GEM_SHOW_DEBUG()(0) &&
2690	!i915_request_completed(*execlists->active)) {
2691	struct i915_request rq = execlists->active;
2692	const u32 *regs __maybe_unused__attribute__((__unused__)) =
2693	rq->context->lrc_reg_state;
2694
2695	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2696	"context completed before request!\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2697	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2698	"ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2699	ENGINE_READ(engine, RING_START),do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2700	ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2701	ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2702	ENGINE_READ(engine, RING_CTL),do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2703	ENGINE_READ(engine, RING_MI_MODE))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2704	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2705	"rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2706	i915_ggtt_offset(rq->ring->vma),do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2707	rq->head, rq->tail,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2708	rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2709	lower_32_bits(rq->fence.seqno),do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2710	hwsp_seqno(rq))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2711	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2712	"ctx:{start:%08x, head:%04x, tail:%04x}, ",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2713	regs[CTX_RING_START],do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2714	regs[CTX_RING_HEAD],do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
2715	regs[CTX_RING_TAIL])do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
2716	}
2717
2718	execlists_schedule_out(*execlists->active++);
2719
2720	GEM_BUG_ON(execlists->active - execlists->inflight >((void)0)
2721	execlists_num_ports(execlists))((void)0);
2722	}
2723	} while (head != tail);
2724
2725	set_timeslice(engine);
2726
2727	/*
2728	* Gen11 has proven to fail wrt global observation point between
2729	* entry and tail update, failing on the ordering and thus
2730	* we see an old entry in the context status buffer.
2731	*
2732	* Forcibly evict out entries for the next gpu csb update,
2733	* to increase the odds that we get a fresh entries with non
2734	* working hardware. The cost for doing so comes out mostly with
2735	* the wash as hardware, working or not, will need to do the
2736	* invalidation before.
2737	*/
2738	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
2739	}
2740
2741	static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
2742	{
2743	lockdep_assert_held(&engine->active.lock)do { (void)(&engine->active.lock); } while(0);
2744	if (!READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = (volatile typeof(engine->execlists.pending[0]) )&(engine->execlists .pending[0]); membar_datadep_consumer(); __tmp; })) {
2745	rcu_read_lock(); /* protect peeking at execlists->active */
2746	execlists_dequeue(engine);
2747	rcu_read_unlock();
2748	}
2749	}
2750
2751	static void __execlists_hold(struct i915_request *rq)
2752	{
2753	DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
2754
2755	do {
2756	struct i915_dependency *p;
2757
2758	if (i915_request_is_active(rq))
2759	__i915_request_unsubmit(rq);
2760
2761	clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2762	list_move_tail(&rq->sched.link, &rq->engine->active.hold);
2763	i915_request_set_hold(rq);
2764	RQ_TRACE(rq, "on hold\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
2765
2766	for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(p) )0)->wait_link ) __mptr = ((&(rq)->sched.waiters_list)->next); ( __typeof(p) )( (char )__mptr - __builtin_offsetof(__typeof (p), wait_link) );}); &p->wait_link != (&(rq)-> sched.waiters_list); p = ({ const __typeof( ((__typeof(p) ) 0)->wait_link ) __mptr = (p->wait_link.next); (__typeof (p) )( (char )__mptr - __builtin_offsetof(__typeof(p), wait_link ) );})) {
2767	struct i915_request *w =
2768	container_of(p->waiter, typeof(w), sched)({ const __typeof( ((typeof(w) )0)->sched ) __mptr = (p ->waiter); (typeof(w) )( (char )__mptr - __builtin_offsetof (typeof(w), sched) );});
2769
2770	/* Leave semaphores spinning on the other engines */
2771	if (w->engine != rq->engine)
2772	continue;
2773
2774	if (!i915_request_is_ready(w))
2775	continue;
2776
2777	if (i915_request_completed(w))
2778	continue;
2779
2780	if (i915_request_on_hold(w))
2781	continue;
2782
2783	list_move_tail(&w->sched.link, &list);
2784	}
2785
2786	rq = list_first_entry_or_null(&list, typeof(rq), sched.link)(list_empty(&list) ? ((void )0) : ({ const __typeof( ((typeof (rq) )0)->sched.link ) __mptr = ((&list)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof( *rq), sched.link) );}));
2787	} while (rq);
2788	}
2789
2790	static bool_Bool execlists_hold(struct intel_engine_cs *engine,
2791	struct i915_request *rq)
2792	{
2793	if (i915_request_on_hold(rq))
2794	return false0;
2795
2796	spin_lock_irq(&engine->active.lock)mtx_enter(&engine->active.lock);
2797
2798	if (i915_request_completed(rq)) { /* too late! */
2799	rq = NULL((void *)0);
2800	goto unlock;
2801	}
2802
2803	if (rq->engine != engine) { /* preempted virtual engine */
2804	struct virtual_engine *ve = to_virtual_engine(rq->engine);
2805
2806	/*
2807	* intel_context_inflight() is only protected by virtue
2808	* of process_csb() being called only by the tasklet (or
2809	* directly from inside reset while the tasklet is suspended).
2810	* Assert that neither of those are allowed to run while we
2811	* poke at the request queues.
2812	*/
2813	GEM_BUG_ON(!reset_in_progress(&engine->execlists))((void)0);
2814
2815	/*
2816	* An unsubmitted request along a virtual engine will
2817	* remain on the active (this) engine until we are able
2818	* to process the context switch away (and so mark the
2819	* context as no longer in flight). That cannot have happened
2820	* yet, otherwise we would not be hanging!
2821	*/
2822	spin_lock(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
2823	GEM_BUG_ON(intel_context_inflight(rq->context) != engine)((void)0);
2824	GEM_BUG_ON(ve->request != rq)((void)0);
2825	ve->request = NULL((void *)0);
2826	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
2827	i915_request_put(rq);
2828
2829	rq->engine = engine;
2830	}
2831
2832	/*
2833	* Transfer this request onto the hold queue to prevent it
2834	* being resumbitted to HW (and potentially completed) before we have
2835	* released it. Since we may have already submitted following
2836	* requests, we need to remove those as well.
2837	*/
2838	GEM_BUG_ON(i915_request_on_hold(rq))((void)0);
2839	GEM_BUG_ON(rq->engine != engine)((void)0);
2840	__execlists_hold(rq);
2841	GEM_BUG_ON(list_empty(&engine->active.hold))((void)0);
2842
2843	unlock:
2844	spin_unlock_irq(&engine->active.lock)mtx_leave(&engine->active.lock);
2845	return rq;
2846	}
2847
2848	static bool_Bool hold_request(const struct i915_request *rq)
2849	{
2850	struct i915_dependency *p;
2851	bool_Bool result = false0;
2852
2853	/*
2854	* If one of our ancestors is on hold, we must also be on hold,
2855	* otherwise we will bypass it and execute before it.
2856	*/
2857	rcu_read_lock();
2858	for_each_signaler(p, rq)for (p = ({ const __typeof( ((__typeof(p) )0)->signal_link ) __mptr = ((&(rq)->sched.signalers_list)->next); (__typeof(p) )( (char )__mptr - __builtin_offsetof(__typeof (p), signal_link) );}); &p->signal_link != (&(rq) ->sched.signalers_list); p = ({ const __typeof( ((__typeof (p) )0)->signal_link ) __mptr = (p->signal_link.next ); (__typeof(p) )( (char )__mptr - __builtin_offsetof(__typeof (p), signal_link) );})) {
2859	const struct i915_request *s =
2860	container_of(p->signaler, typeof(s), sched)({ const __typeof( ((typeof(s) )0)->sched ) __mptr = (p ->signaler); (typeof(s) )( (char )__mptr - __builtin_offsetof (typeof(s), sched) );});
2861
2862	if (s->engine != rq->engine)
2863	continue;
2864
2865	result = i915_request_on_hold(s);
2866	if (result)
2867	break;
2868	}
2869	rcu_read_unlock();
2870
2871	return result;
2872	}
2873
2874	static void __execlists_unhold(struct i915_request *rq)
2875	{
2876	DRM_LIST_HEAD(list)struct list_head list = { &(list), &(list) };
2877
2878	do {
2879	struct i915_dependency *p;
2880
2881	RQ_TRACE(rq, "hold release\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
2882
2883	GEM_BUG_ON(!i915_request_on_hold(rq))((void)0);
2884	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit))((void)0);
2885
2886	i915_request_clear_hold(rq);
2887	list_move_tail(&rq->sched.link,
2888	i915_sched_lookup_priolist(rq->engine,
2889	rq_prio(rq)));
2890	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
2891
2892	/* Also release any children on this engine that are ready */
2893	for_each_waiter(p, rq)for (p = ({ const __typeof( ((__typeof(p) )0)->wait_link ) __mptr = ((&(rq)->sched.waiters_list)->next); ( __typeof(p) )( (char )__mptr - __builtin_offsetof(__typeof (p), wait_link) );}); &p->wait_link != (&(rq)-> sched.waiters_list); p = ({ const __typeof( ((__typeof(p) ) 0)->wait_link ) __mptr = (p->wait_link.next); (__typeof (p) )( (char )__mptr - __builtin_offsetof(__typeof(p), wait_link ) );})) {
2894	struct i915_request *w =
2895	container_of(p->waiter, typeof(w), sched)({ const __typeof( ((typeof(w) )0)->sched ) __mptr = (p ->waiter); (typeof(w) )( (char )__mptr - __builtin_offsetof (typeof(w), sched) );});
2896
2897	/* Propagate any change in error status */
2898	if (rq->fence.error)
2899	i915_request_set_error_once(w, rq->fence.error);
2900
2901	if (w->engine != rq->engine)
2902	continue;
2903
2904	if (!i915_request_on_hold(w))
2905	continue;
2906
2907	/* Check that no other parents are also on hold */
2908	if (hold_request(w))
2909	continue;
2910
2911	list_move_tail(&w->sched.link, &list);
2912	}
2913
2914	rq = list_first_entry_or_null(&list, typeof(rq), sched.link)(list_empty(&list) ? ((void )0) : ({ const __typeof( ((typeof (rq) )0)->sched.link ) __mptr = ((&list)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof( *rq), sched.link) );}));
2915	} while (rq);
2916	}
2917
2918	static void execlists_unhold(struct intel_engine_cs *engine,
2919	struct i915_request *rq)
2920	{
2921	spin_lock_irq(&engine->active.lock)mtx_enter(&engine->active.lock);
2922
2923	/*
2924	* Move this request back to the priority queue, and all of its
2925	* children and grandchildren that were suspended along with it.
2926	*/
2927	__execlists_unhold(rq);
2928
2929	if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
2930	engine->execlists.queue_priority_hint = rq_prio(rq);
2931	tasklet_hi_schedule(&engine->execlists.tasklet);
2932	}
2933
2934	spin_unlock_irq(&engine->active.lock)mtx_leave(&engine->active.lock);
2935	}
2936
2937	struct execlists_capture {
2938	struct work_struct work;
2939	struct i915_request *rq;
2940	struct i915_gpu_coredump *error;
2941	};
2942
2943	static void execlists_capture_work(struct work_struct *work)
2944	{
2945	struct execlists_capture cap = container_of(work, typeof(cap), work)({ const __typeof( ((typeof(cap) )0)->work ) __mptr = ( work); (typeof(cap) )( (char )__mptr - __builtin_offsetof( typeof(*cap), work) );});
2946	const gfp_t gfp = GFP_KERNEL(0x0001 \| 0x0004) \| __GFP_RETRY_MAYFAIL0 \| __GFP_NOWARN0;
2947	struct intel_engine_cs *engine = cap->rq->engine;
2948	struct intel_gt_coredump *gt = cap->error->gt;
2949	struct intel_engine_capture_vma *vma;
2950
2951	/* Compress all the objects attached to the request, slow! */
2952	vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
2953	if (vma) {
2954	struct i915_vma_compress *compress =
2955	i915_vma_capture_prepare(gt);
2956
2957	intel_engine_coredump_add_vma(gt->engine, vma, compress);
2958	i915_vma_capture_finish(gt, compress);
2959	}
2960
2961	gt->simulated = gt->engine->simulated;
2962	cap->error->simulated = gt->simulated;
2963
2964	/* Publish the error state, and announce it to the world */
2965	i915_error_state_store(cap->error);
2966	i915_gpu_coredump_put(cap->error);
2967
2968	/* Return this request and all that depend upon it for signaling */
2969	execlists_unhold(engine, cap->rq);
2970	i915_request_put(cap->rq);
2971
2972	kfree(cap);
2973	}
2974
2975	static struct execlists_capture capture_regs(struct intel_engine_cs engine)
2976	{
2977	const gfp_t gfp = GFP_ATOMIC0x0002 \| __GFP_NOWARN0;
2978	struct execlists_capture *cap;
2979
2980	cap = kmalloc(sizeof(*cap), gfp);
2981	if (!cap)
2982	return NULL((void *)0);
2983
2984	cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
2985	if (!cap->error)
2986	goto err_cap;
2987
2988	cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
2989	if (!cap->error->gt)
2990	goto err_gpu;
2991
2992	cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
2993	if (!cap->error->gt->engine)
2994	goto err_gt;
2995
2996	return cap;
2997
2998	err_gt:
2999	kfree(cap->error->gt);
3000	err_gpu:
3001	kfree(cap->error);
3002	err_cap:
3003	kfree(cap);
3004	return NULL((void *)0);
3005	}
3006
3007	static struct i915_request *
3008	active_context(struct intel_engine_cs *engine, u32 ccid)
3009	{
3010	const struct intel_engine_execlists * const el = &engine->execlists;
3011	struct i915_request * const port, rq;
3012
3013	/*
3014	* Use the most recent result from process_csb(), but just in case
3015	* we trigger an error (via interrupt) before the first CS event has
3016	* been written, peek at the next submission.
3017	*/
3018
3019	for (port = el->active; (rq = *port); port++) {
3020	if (rq->context->lrc.ccid == ccid) {
3021	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
3022	"ccid found at active:%zd\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
3023	port - el->active)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
3024	return rq;
3025	}
3026	}
3027
3028	for (port = el->pending; (rq = *port); port++) {
3029	if (rq->context->lrc.ccid == ccid) {
3030	ENGINE_TRACE(engine,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
3031	"ccid found at pending:%zd\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
3032	port - el->pending)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
3033	return rq;
3034	}
3035	}
3036
3037	ENGINE_TRACE(engine, "ccid:%x not found\n", ccid)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
3038	return NULL((void *)0);
3039	}
3040
3041	static u32 active_ccid(struct intel_engine_cs *engine)
3042	{
3043	return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x234 + 4) }));
3044	}
3045
3046	static void execlists_capture(struct intel_engine_cs *engine)
3047	{
3048	struct execlists_capture *cap;
3049
3050	if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1)
3051	return;
3052
3053	/*
3054	* We need to _quickly_ capture the engine state before we reset.
3055	* We are inside an atomic section (softirq) here and we are delaying
3056	* the forced preemption event.
3057	*/
3058	cap = capture_regs(engine);
3059	if (!cap)
3060	return;
3061
3062	spin_lock_irq(&engine->active.lock)mtx_enter(&engine->active.lock);
3063	cap->rq = active_context(engine, active_ccid(engine));
3064	if (cap->rq) {
3065	cap->rq = active_request(cap->rq->context->timeline, cap->rq);
3066	cap->rq = i915_request_get_rcu(cap->rq);
3067	}
3068	spin_unlock_irq(&engine->active.lock)mtx_leave(&engine->active.lock);
3069	if (!cap->rq)
3070	goto err_free;
3071
3072	/*
3073	* Remove the request from the execlists queue, and take ownership
3074	* of the request. We pass it to our worker who will _slowly_ compress
3075	* all the pages the _user_ requested for debugging their batch, after
3076	* which we return it to the queue for signaling.
3077	*
3078	* By removing them from the execlists queue, we also remove the
3079	* requests from being processed by __unwind_incomplete_requests()
3080	* during the intel_engine_reset(), and so they will not be replayed
3081	* afterwards.
3082	*
3083	* Note that because we have not yet reset the engine at this point,
3084	* it is possible for the request that we have identified as being
3085	* guilty, did in fact complete and we will then hit an arbitration
3086	* point allowing the outstanding preemption to succeed. The likelihood
3087	* of that is very low (as capturing of the engine registers should be
3088	* fast enough to run inside an irq-off atomic section!), so we will
3089	* simply hold that request accountable for being non-preemptible
3090	* long enough to force the reset.
3091	*/
3092	if (!execlists_hold(engine, cap->rq))
3093	goto err_rq;
3094
3095	INIT_WORK(&cap->work, execlists_capture_work);
3096	schedule_work(&cap->work);
3097	return;
3098
3099	err_rq:
3100	i915_request_put(cap->rq);
3101	err_free:
3102	i915_gpu_coredump_put(cap->error);
3103	kfree(cap);
3104	}
3105
3106	static void execlists_reset(struct intel_engine_cs engine, const char msg)
3107	{
3108	const unsigned int bit = I915_RESET_ENGINE2 + engine->id;
3109	unsigned long *lock = &engine->gt->reset.flags;
3110
3111	if (!intel_has_reset_engine(engine->gt))
3112	return;
3113
3114	if (test_and_set_bit(bit, lock))
3115	return;
3116
3117	ENGINE_TRACE(engine, "reset for %s\n", msg)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
3118
3119	/* Mark this tasklet as disabled to avoid waiting for it to complete */
3120	tasklet_disable_nosync(&engine->execlists.tasklet);
3121
3122	ring_set_paused(engine, 1); /* Freeze the current request in place */
3123	execlists_capture(engine);
3124	intel_engine_reset(engine, msg);
3125
3126	tasklet_enable(&engine->execlists.tasklet);
3127	clear_and_wake_up_bit(bit, lock);
3128	}
3129
3130	static bool_Bool preempt_timeout(const struct intel_engine_cs *const engine)
3131	{
3132	const struct timeout *t = &engine->execlists.preempt;
3133
3134	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT640)
3135	return false0;
3136
3137	if (!timer_expired(t))
3138	return false0;
3139
3140	return READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = (volatile typeof(engine->execlists.pending[0]) )&(engine->execlists .pending[0]); membar_datadep_consumer(); __tmp; });
3141	}
3142
3143	/*
3144	* Check the unread Context Status Buffers and manage the submission of new
3145	* contexts to the ELSP accordingly.
3146	*/
3147	static void execlists_submission_tasklet(unsigned long data)
3148	{
3149	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
3150	bool_Bool timeout = preempt_timeout(engine);
3151
3152	process_csb(engine);
3153
3154	if (unlikely(READ_ONCE(engine->execlists.error_interrupt))__builtin_expect(!!(({ typeof(engine->execlists.error_interrupt ) __tmp = (volatile typeof(engine->execlists.error_interrupt ) )&(engine->execlists.error_interrupt); membar_datadep_consumer (); __tmp; })), 0)) {
3155	const char *msg;
3156
3157	/* Generate the error message in priority wrt to the user! */
3158	if (engine->execlists.error_interrupt & GENMASK(15, 0)(((~0UL) >> (64 - (15) - 1)) & ((~0UL) << (0) )))
3159	msg = "CS error"; /* thrown by a user payload */
3160	else if (engine->execlists.error_interrupt & ERROR_CSB(1UL << (31)))
3161	msg = "invalid CSB event";
3162	else
3163	msg = "internal error";
3164
3165	engine->execlists.error_interrupt = 0;
3166	execlists_reset(engine, msg);
3167	}
3168
3169	if (!READ_ONCE(engine->execlists.pending[0])({ typeof(engine->execlists.pending[0]) __tmp = (volatile typeof(engine->execlists.pending[0]) )&(engine->execlists .pending[0]); membar_datadep_consumer(); __tmp; }) \|\| timeout) {
3170	unsigned long flags;
3171
3172	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
3173	__execlists_submission_tasklet(engine);
3174	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
3175
3176	/* Recheck after serialising with direct-submission */
3177	if (unlikely(timeout && preempt_timeout(engine))__builtin_expect(!!(timeout && preempt_timeout(engine )), 0)) {
3178	cancel_timer(&engine->execlists.preempt);
3179	execlists_reset(engine, "preemption time out");
3180	}
3181	}
3182	}
3183
3184	static void __execlists_kick(struct intel_engine_execlists *execlists)
3185	{
3186	/* Kick the tasklet for some interrupt coalescing and reset handling */
3187	tasklet_hi_schedule(&execlists->tasklet);
3188	}
3189
3190	#define execlists_kick(t, member)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists )0)->member ) __mptr = (t); (struct intel_engine_execlists )( (char )__mptr - __builtin_offsetof(struct intel_engine_execlists , member) );})) \
3191	__execlists_kick(container_of(t, struct intel_engine_execlists, member)({ const __typeof( ((struct intel_engine_execlists )0)->member ) __mptr = (t); (struct intel_engine_execlists )( (char ) __mptr - __builtin_offsetof(struct intel_engine_execlists, member ) );}))
3192
3193	#ifdef __linux__
3194
3195	static void execlists_timeslice(struct timer_list *timer)
3196	{
3197	execlists_kick(timer, timer)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists )0)->timer ) __mptr = (timer); (struct intel_engine_execlists )( (char )__mptr - __builtin_offsetof(struct intel_engine_execlists , timer) );}));
3198	}
3199
3200	static void execlists_preempt(struct timer_list *timer)
3201	{
3202	execlists_kick(timer, preempt)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists )0)->preempt ) __mptr = (timer); (struct intel_engine_execlists )( (char )__mptr - __builtin_offsetof(struct intel_engine_execlists , preempt) );}));
3203	}
3204
3205	#else
3206
3207	static void execlists_timeslice(void *arg)
3208	{
3209	struct timeout *timer = arg;
3210	execlists_kick(timer, timer)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists )0)->timer ) __mptr = (timer); (struct intel_engine_execlists )( (char )__mptr - __builtin_offsetof(struct intel_engine_execlists , timer) );}));
3211	}
3212
3213	static void execlists_preempt(void *arg)
3214	{
3215	struct timeout *timer = arg;
3216	execlists_kick(timer, preempt)__execlists_kick(({ const __typeof( ((struct intel_engine_execlists )0)->preempt ) __mptr = (timer); (struct intel_engine_execlists )( (char )__mptr - __builtin_offsetof(struct intel_engine_execlists , preempt) );}));
3217	}
3218
3219	#endif
3220
3221	static void queue_request(struct intel_engine_cs *engine,
3222	struct i915_request *rq)
3223	{
3224	GEM_BUG_ON(!list_empty(&rq->sched.link))((void)0);
3225	list_add_tail(&rq->sched.link,
3226	i915_sched_lookup_priolist(engine, rq_prio(rq)));
3227	set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3228	}
3229
3230	static void __submit_queue_imm(struct intel_engine_cs *engine)
3231	{
3232	struct intel_engine_execlists * const execlists = &engine->execlists;
3233
3234	if (reset_in_progress(execlists))
3235	return; /* defer until we restart the engine following reset */
3236
3237	__execlists_submission_tasklet(engine);
3238	}
3239
3240	static void submit_queue(struct intel_engine_cs *engine,
3241	const struct i915_request *rq)
3242	{
3243	struct intel_engine_execlists *execlists = &engine->execlists;
3244
3245	if (rq_prio(rq) <= execlists->queue_priority_hint)
3246	return;
3247
3248	execlists->queue_priority_hint = rq_prio(rq);
3249	__submit_queue_imm(engine);
3250	}
3251
3252	static bool_Bool ancestor_on_hold(const struct intel_engine_cs *engine,
3253	const struct i915_request *rq)
3254	{
3255	GEM_BUG_ON(i915_request_on_hold(rq))((void)0);
3256	return !list_empty(&engine->active.hold) && hold_request(rq);
3257	}
3258
3259	static void flush_csb(struct intel_engine_cs *engine)
3260	{
3261	struct intel_engine_execlists *el = &engine->execlists;
3262
3263	if (READ_ONCE(el->pending[0])({ typeof(el->pending[0]) __tmp = (volatile typeof(el-> pending[0]) )&(el->pending[0]); membar_datadep_consumer (); __tmp; }) && tasklet_trylock(&el->tasklet)) {
3264	if (!reset_in_progress(el))
3265	process_csb(engine);
3266	tasklet_unlock(&el->tasklet);
3267	}
3268	}
3269
3270	static void execlists_submit_request(struct i915_request *request)
3271	{
3272	struct intel_engine_cs *engine = request->engine;
3273	unsigned long flags;
3274
3275	/* Hopefully we clear execlists->pending[] to let us through */
3276	flush_csb(engine);
3277
3278	/* Will be called from irq-context when using foreign fences. */
3279	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
3280
3281	if (unlikely(ancestor_on_hold(engine, request))__builtin_expect(!!(ancestor_on_hold(engine, request)), 0)) {
3282	RQ_TRACE(request, "ancestor on hold\n")do { const struct i915_request rq__ = (request); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
3283	list_add_tail(&request->sched.link, &engine->active.hold);
3284	i915_request_set_hold(request);
3285	} else {
3286	queue_request(engine, request);
3287
3288	GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))((void)0);
3289	GEM_BUG_ON(list_empty(&request->sched.link))((void)0);
3290
3291	submit_queue(engine, request);
3292	}
3293
3294	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
3295	}
3296
3297	static void __execlists_context_fini(struct intel_context *ce)
3298	{
3299	intel_ring_put(ce->ring);
3300	i915_vma_put(ce->state);
3301	}
3302
3303	static void execlists_context_destroy(struct kref *kref)
3304	{
3305	struct intel_context ce = container_of(kref, typeof(ce), ref)({ const __typeof( ((typeof(ce) )0)->ref ) __mptr = (kref ); (typeof(ce) )( (char )__mptr - __builtin_offsetof(typeof (*ce), ref) );});
3306
3307	GEM_BUG_ON(!i915_active_is_idle(&ce->active))((void)0);
3308	GEM_BUG_ON(intel_context_is_pinned(ce))((void)0);
3309
3310	if (ce->state)
3311	__execlists_context_fini(ce);
3312
3313	intel_context_fini(ce);
3314	intel_context_free(ce);
3315	}
3316
3317	static void
3318	set_redzone(void vaddr, const struct intel_engine_cs engine)
3319	{
3320	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
3321	return;
3322
3323	vaddr += engine->context_size;
3324
3325	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)__builtin_memset((vaddr), (0xdb), ((1ULL << (12))));
3326	}
3327
3328	static void
3329	check_redzone(const void vaddr, const struct intel_engine_cs engine)
3330	{
3331	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
3332	return;
3333
3334	vaddr += engine->context_size;
3335
3336	if (memchr_inv(vaddr, CONTEXT_REDZONE0xdb, I915_GTT_PAGE_SIZE(1ULL << (12))))
3337	drm_err_once(&engine->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "%s context redzone overwritten!\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , engine ->name)
3338	"%s context redzone overwritten!\n",printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "%s context redzone overwritten!\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , engine ->name)
3339	engine->name)printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "%s context redzone overwritten!\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , engine ->name);
3340	}
3341
3342	static void execlists_context_unpin(struct intel_context *ce)
3343	{
3344	check_redzone((void )ce->lrc_reg_state - LRC_STATE_OFFSET(((0) + (1)) (1 << 12)),
3345	ce->engine);
3346	}
3347
3348	static void execlists_context_post_unpin(struct intel_context *ce)
3349	{
3350	i915_gem_object_unpin_map(ce->state->obj);
3351	}
3352
3353	static u32 *
3354	gen12_emit_timestamp_wa(const struct intel_context ce, u32 cs)
3355	{
3356	*cs++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) \| (2)) \|
3357	MI_SRM_LRM_GLOBAL_GTT(1<<22) \|
3358	MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3359	cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) 8) }));
3360	cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) (1 << 12)) +
3361	CTX_TIMESTAMP(0x22 + 1) * sizeof(u32);
3362	*cs++ = 0;
3363
3364	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \|
3365	MI_LRR_SOURCE_CS_MMIO((u32)((1UL << (18)) + 0)) \|
3366	MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3367	cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) 8) }));
3368	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)((const i915_reg_t){ .reg = ((0) + 0x3a8) }));
3369
3370	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \|
3371	MI_LRR_SOURCE_CS_MMIO((u32)((1UL << (18)) + 0)) \|
3372	MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3373	cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) 8) }));
3374	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0)((const i915_reg_t){ .reg = ((0) + 0x3a8) }));
3375
3376	return cs;
3377	}
3378
3379	static u32 *
3380	gen12_emit_restore_scratch(const struct intel_context ce, u32 cs)
3381	{
3382	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1)((void)0);
3383
3384	*cs++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) \| (2)) \|
3385	MI_SRM_LRM_GLOBAL_GTT(1<<22) \|
3386	MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3387	cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) 8) }));
3388	cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) (1 << 12)) +
3389	(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3390	*cs++ = 0;
3391
3392	return cs;
3393	}
3394
3395	static u32 *
3396	gen12_emit_cmd_buf_wa(const struct intel_context ce, u32 cs)
3397	{
3398	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1)((void)0);
3399
3400	*cs++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) \| (2)) \|
3401	MI_SRM_LRM_GLOBAL_GTT(1<<22) \|
3402	MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3403	cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) 8) }));
3404	cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) (1 << 12)) +
3405	(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
3406	*cs++ = 0;
3407
3408	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \|
3409	MI_LRR_SOURCE_CS_MMIO((u32)((1UL << (18)) + 0)) \|
3410	MI_LRI_LRM_CS_MMIO((u32)((1UL << (19)) + 0));
3411	cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0)((const i915_reg_t){ .reg = ((0) + 0x600 + (0) 8) }));
3412	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0)((const i915_reg_t){ .reg = ((0) + 0x84) }));
3413
3414	return cs;
3415	}
3416
3417	static u32 *
3418	gen12_emit_indirect_ctx_rcs(const struct intel_context ce, u32 cs)
3419	{
3420	cs = gen12_emit_timestamp_wa(ce, cs);
3421	cs = gen12_emit_cmd_buf_wa(ce, cs);
3422	cs = gen12_emit_restore_scratch(ce, cs);
3423
3424	return cs;
3425	}
3426
3427	static u32 *
3428	gen12_emit_indirect_ctx_xcs(const struct intel_context ce, u32 cs)
3429	{
3430	cs = gen12_emit_timestamp_wa(ce, cs);
3431	cs = gen12_emit_restore_scratch(ce, cs);
3432
3433	return cs;
3434	}
3435
3436	static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3437	{
3438	return PAGE_SIZE(1 << 12) * ce->wa_bb_page;
3439	}
3440
3441	static u32 context_indirect_bb(const struct intel_context ce)
3442	{
3443	void *ptr;
3444
3445	GEM_BUG_ON(!ce->wa_bb_page)((void)0);
3446
3447	ptr = ce->lrc_reg_state;
3448	ptr -= LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)); /* back to start of context image */
3449	ptr += context_wa_bb_offset(ce);
3450
3451	return ptr;
3452	}
3453
3454	static void
3455	setup_indirect_ctx_bb(const struct intel_context *ce,
3456	const struct intel_engine_cs *engine,
3457	u32 (emit)(const struct intel_context , u32 ))
3458	{
3459	u32 * const start = context_indirect_bb(ce);
3460	u32 *cs;
3461
3462	cs = emit(ce, start);
3463	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs))((void)0);
3464	while ((unsigned long)cs % CACHELINE_BYTES64)
3465	*cs++ = MI_NOOP(((0) << 23) \| (0));
3466
3467	lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3468	i915_ggtt_offset(ce->state) +
3469	context_wa_bb_offset(ce),
3470	(cs - start) * sizeof(*cs));
3471	}
3472
3473	static void
3474	__execlists_update_reg_state(const struct intel_context *ce,
3475	const struct intel_engine_cs *engine,
3476	u32 head)
3477	{
3478	struct intel_ring *ring = ce->ring;
3479	u32 *regs = ce->lrc_reg_state;
3480
3481	GEM_BUG_ON(!intel_ring_offset_valid(ring, head))((void)0);
3482	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail))((void)0);
3483
3484	regs[CTX_RING_START(0x08 + 1)] = i915_ggtt_offset(ring->vma);
3485	regs[CTX_RING_HEAD(0x04 + 1)] = head;
3486	regs[CTX_RING_TAIL(0x06 + 1)] = ring->tail;
3487	regs[CTX_RING_CTL(0x0a + 1)] = RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) \| RING_VALID0x00000001;
3488
3489	/* RPCS */
3490	if (engine->class == RENDER_CLASS0) {
3491	regs[CTX_R_PWR_CLK_STATE(0x42 + 1)] =
3492	intel_sseu_make_rpcs(engine->gt, &ce->sseu);
3493
3494	i915_oa_init_reg_state(ce, engine);
3495	}
3496
3497	if (ce->wa_bb_page) {
3498	u32 (fn)(const struct intel_context ce, u32 cs);
3499
3500	fn = gen12_emit_indirect_ctx_xcs;
3501	if (ce->engine->class == RENDER_CLASS0)
3502	fn = gen12_emit_indirect_ctx_rcs;
3503
3504	/* Mutually exclusive wrt to global indirect bb */
3505	GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size)((void)0);
3506	setup_indirect_ctx_bb(ce, engine, fn);
3507	}
3508	}
3509
3510	static int
3511	execlists_context_pre_pin(struct intel_context *ce,
3512	struct i915_gem_ww_ctx ww, void *vaddr)
3513	{
3514	GEM_BUG_ON(!ce->state)((void)0);
3515	GEM_BUG_ON(!i915_vma_is_pinned(ce->state))((void)0);
3516
3517	*vaddr = i915_gem_object_pin_map(ce->state->obj,
3518	i915_coherent_map_type(ce->engine->i915) \|
3519	I915_MAP_OVERRIDE(1UL << (31)));
3520
3521	return PTR_ERR_OR_ZERO(*vaddr);
3522	}
3523
3524	static int
3525	__execlists_context_pin(struct intel_context *ce,
3526	struct intel_engine_cs *engine,
3527	void *vaddr)
3528	{
3529	ce->lrc.lrca = lrc_descriptor(ce, engine) \| CTX_DESC_FORCE_RESTORE(1ULL << (2));
3530	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12));
3531	__execlists_update_reg_state(ce, engine, ce->ring->tail);
3532
3533	return 0;
3534	}
3535
3536	static int execlists_context_pin(struct intel_context ce, void vaddr)
3537	{
3538	return __execlists_context_pin(ce, ce->engine, vaddr);
3539	}
3540
3541	static int execlists_context_alloc(struct intel_context *ce)
3542	{
3543	return __execlists_context_alloc(ce, ce->engine);
3544	}
3545
3546	static void execlists_context_reset(struct intel_context *ce)
3547	{
3548	CE_TRACE(ce, "reset\n")do { const struct intel_context ce__ = (ce); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (ce__-> engine); do { } while (0); } while (0); } while (0);
3549	GEM_BUG_ON(!intel_context_is_pinned(ce))((void)0);
3550
3551	intel_ring_reset(ce->ring, ce->ring->emit);
3552
3553	/* Scrub away the garbage */
3554	execlists_init_reg_state(ce->lrc_reg_state,
3555	ce, ce->engine, ce->ring, true1);
3556	__execlists_update_reg_state(ce, ce->engine, ce->ring->tail);
3557
3558	ce->lrc.desc \|= CTX_DESC_FORCE_RESTORE(1ULL << (2));
3559	}
3560
3561	static const struct intel_context_ops execlists_context_ops = {
3562	.alloc = execlists_context_alloc,
3563
3564	.pre_pin = execlists_context_pre_pin,
3565	.pin = execlists_context_pin,
3566	.unpin = execlists_context_unpin,
3567	.post_unpin = execlists_context_post_unpin,
3568
3569	.enter = intel_context_enter_engine,
3570	.exit = intel_context_exit_engine,
3571
3572	.reset = execlists_context_reset,
3573	.destroy = execlists_context_destroy,
3574	};
3575
3576	static u32 hwsp_offset(const struct i915_request *rq)
3577	{
3578	const struct intel_timeline_cacheline *cl;
3579
3580	/* Before the request is executed, the timeline/cachline is fixed */
3581
3582	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1)(rq->hwsp_cacheline);
3583	if (cl)
3584	return cl->ggtt_offset;
3585
3586	return rcu_dereference_protected(rq->timeline, 1)(rq->timeline)->hwsp_offset;
3587	}
3588
3589	static int gen8_emit_init_breadcrumb(struct i915_request *rq)
3590	{
3591	u32 *cs;
3592
3593	GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq))((void)0);
3594	if (!i915_request_timeline(rq)->has_initial_breadcrumb)
3595	return 0;
3596
3597	cs = intel_ring_begin(rq, 6);
3598	if (IS_ERR(cs))
3599	return PTR_ERR(cs);
3600
3601	/*
3602	* Check if we have been preempted before we even get started.
3603	*
3604	* After this point i915_request_started() reports true, even if
3605	* we get preempted and so are no longer running.
3606	*/
3607	*cs++ = MI_ARB_CHECK(((0x05) << 23) \| (0));
3608	*cs++ = MI_NOOP(((0) << 23) \| (0));
3609
3610	*cs++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) \| (2)) \| MI_USE_GGTT(1 << 22);
3611	*cs++ = hwsp_offset(rq);
3612	*cs++ = 0;
3613	*cs++ = rq->fence.seqno - 1;
3614
3615	intel_ring_advance(rq, cs);
3616
3617	/* Record the updated position of the request's payload */
3618	rq->infix = intel_ring_offset(rq, cs);
3619
3620	__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
3621
3622	return 0;
3623	}
3624
3625	static int emit_pdps(struct i915_request *rq)
3626	{
3627	const struct intel_engine_cs * const engine = rq->engine;
3628	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm);
3629	int err, i;
3630	u32 *cs;
3631
3632	GEM_BUG_ON(intel_vgpu_active(rq->engine->i915))((void)0);
3633
3634	/*
3635	* Beware ye of the dragons, this sequence is magic!
3636	*
3637	* Small changes to this sequence can cause anything from
3638	* GPU hangs to forcewake errors and machine lockups!
3639	*/
3640
3641	/* Flush any residual operations from the context load */
3642	err = engine->emit_flush(rq, EMIT_FLUSH(1UL << (1)));
3643	if (err)
3644	return err;
3645
3646	/* Magic required to prevent forcewake errors! */
3647	err = engine->emit_flush(rq, EMIT_INVALIDATE(1UL << (0)));
3648	if (err)
3649	return err;
3650
3651	cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES4 + 2);
3652	if (IS_ERR(cs))
3653	return PTR_ERR(cs);
3654
3655	/* Ensure the LRI have landed before we invalidate & continue */
3656	cs++ = MI_LOAD_REGISTER_IMM(2 GEN8_3LVL_PDPES)(((0x22) << 23) \| (2(2 4)-1)) \| MI_LRI_FORCE_POSTED(1<<12);
3657	for (i = GEN8_3LVL_PDPES4; i--; ) {
3658	const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
3659	u32 base = engine->mmio_base;
3660
3661	cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)((const i915_reg_t){ .reg = ((base) + 0x270 + (i) 8 + 4) }));
3662	*cs++ = upper_32_bits(pd_daddr)((u32)(((pd_daddr) >> 16) >> 16));
3663	cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)((const i915_reg_t){ .reg = ((base) + 0x270 + (i) 8) }));
3664	*cs++ = lower_32_bits(pd_daddr)((u32)(pd_daddr));
3665	}
3666	*cs++ = MI_NOOP(((0) << 23) \| (0));
3667
3668	intel_ring_advance(rq, cs);
3669
3670	return 0;
3671	}
3672
3673	static int execlists_request_alloc(struct i915_request *request)
3674	{
3675	int ret;
3676
3677	GEM_BUG_ON(!intel_context_is_pinned(request->context))((void)0);
3678
3679	/*
3680	* Flush enough space to reduce the likelihood of waiting after
3681	* we start building the request - in which case we will just
3682	* have to repeat work.
3683	*/
3684	request->reserved_space += EXECLISTS_REQUEST_SIZE64;
3685
3686	/*
3687	* Note that after this point, we have committed to using
3688	* this request as it is being used to both track the
3689	* state of engine initialisation and liveness of the
3690	* golden renderstate above. Think twice before you try
3691	* to cancel/unwind this request now.
3692	*/
3693
3694	if (!i915_vm_is_4lvl(request->context->vm)) {
3695	ret = emit_pdps(request);
3696	if (ret)
3697	return ret;
3698	}
3699
3700	/* Unconditionally invalidate GPU caches and TLBs. */
3701	ret = request->engine->emit_flush(request, EMIT_INVALIDATE(1UL << (0)));
3702	if (ret)
3703	return ret;
3704
3705	request->reserved_space -= EXECLISTS_REQUEST_SIZE64;
3706	return 0;
3707	}
3708
3709	/*
3710	* In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
3711	* PIPE_CONTROL instruction. This is required for the flush to happen correctly
3712	* but there is a slight complication as this is applied in WA batch where the
3713	* values are only initialized once so we cannot take register value at the
3714	* beginning and reuse it further; hence we save its value to memory, upload a
3715	* constant value with bit21 set and then we restore it back with the saved value.
3716	* To simplify the WA, a constant value is formed by using the default value
3717	* of this register. This shouldn't be a problem because we are only modifying
3718	* it for a short period and this batch in non-premptible. We can ofcourse
3719	* use additional instructions that read the actual value of the register
3720	* at that time and set our bit of interest but it makes the WA complicated.
3721	*
3722	* This WA is also required for Gen9 so extracting as a function avoids
3723	* code duplication.
3724	*/
3725	static u32 *
3726	gen8_emit_flush_coherentl3_wa(struct intel_engine_cs engine, u32 batch)
3727	{
3728	/* NB no one else is allowed to scribble over scratch + 256! */
3729	*batch++ = MI_STORE_REGISTER_MEM_GEN8(((0x24) << 23) \| (2)) \| MI_SRM_LRM_GLOBAL_GTT(1<<22);
3730	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4((const i915_reg_t){ .reg = (0xb118) }));
3731	*batch++ = intel_gt_scratch_offset(engine->gt,
3732	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3733	*batch++ = 0;
3734
3735	batch++ = MI_LOAD_REGISTER_IMM(1)(((0x22) << 23) \| (2(1)-1));
3736	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4((const i915_reg_t){ .reg = (0xb118) }));
3737	*batch++ = 0x40400000 \| GEN8_LQSC_FLUSH_COHERENT_LINES(1 << 21);
3738
3739	batch = gen8_emit_pipe_control(batch,
3740	PIPE_CONTROL_CS_STALL(1<<20) \|
3741	PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5),
3742	0);
3743
3744	*batch++ = MI_LOAD_REGISTER_MEM_GEN8(((0x29) << 23) \| (2)) \| MI_SRM_LRM_GLOBAL_GTT(1<<22);
3745	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4((const i915_reg_t){ .reg = (0xb118) }));
3746	*batch++ = intel_gt_scratch_offset(engine->gt,
3747	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
3748	*batch++ = 0;
3749
3750	return batch;
3751	}
3752
3753	/*
3754	* Typically we only have one indirect_ctx and per_ctx batch buffer which are
3755	* initialized at the beginning and shared across all contexts but this field
3756	* helps us to have multiple batches at different offsets and select them based
3757	* on a criteria. At the moment this batch always start at the beginning of the page
3758	* and at this point we don't have multiple wa_ctx batch buffers.
3759	*
3760	* The number of WA applied are not known at the beginning; we use this field
3761	* to return the no of DWORDS written.
3762	*
3763	* It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
3764	* so it adds NOOPs as padding to make it cacheline aligned.
3765	* MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
3766	* makes a complete batch buffer.
3767	*/
3768	static u32 gen8_init_indirectctx_bb(struct intel_engine_cs engine, u32 *batch)
3769	{
3770	/* WaDisableCtxRestoreArbitration:bdw,chv */
3771	*batch++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_DISABLE(0<<0);
3772
3773	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
3774	if (IS_BROADWELL(engine->i915)IS_PLATFORM(engine->i915, INTEL_BROADWELL))
3775	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3776
3777	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
3778	/* Actual scratch location is at 128 bytes offset */
3779	batch = gen8_emit_pipe_control(batch,
3780	PIPE_CONTROL_FLUSH_L3(1<<27) \|
3781	PIPE_CONTROL_STORE_DATA_INDEX(1<<21) \|
3782	PIPE_CONTROL_CS_STALL(1<<20) \|
3783	PIPE_CONTROL_QW_WRITE(1<<14),
3784	LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
3785
3786	*batch++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_ENABLE(1<<0);
3787
3788	/* Pad to end of cacheline */
3789	while ((unsigned long)batch % CACHELINE_BYTES64)
3790	*batch++ = MI_NOOP(((0) << 23) \| (0));
3791
3792	/*
3793	* MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
3794	* execution depends on the length specified in terms of cache lines
3795	* in the register CTX_RCS_INDIRECT_CTX
3796	*/
3797
3798	return batch;
3799	}
3800
3801	struct lri {
3802	i915_reg_t reg;
3803	u32 value;
3804	};
3805
3806	static u32 emit_lri(u32 batch, const struct lri *lri, unsigned int count)
3807	{
3808	GEM_BUG_ON(!count \|\| count > 63)((void)0);
3809
3810	batch++ = MI_LOAD_REGISTER_IMM(count)(((0x22) << 23) \| (2(count)-1));
3811	do {
3812	*batch++ = i915_mmio_reg_offset(lri->reg);
3813	*batch++ = lri->value;
3814	} while (lri++, --count);
3815	*batch++ = MI_NOOP(((0) << 23) \| (0));
3816
3817	return batch;
3818	}
3819
3820	static u32 gen9_init_indirectctx_bb(struct intel_engine_cs engine, u32 *batch)
3821	{
3822	static const struct lri lri[] = {
3823	/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
3824	{
3825	COMMON_SLICE_CHICKEN2((const i915_reg_t){ .reg = (0x7014) }),
3826	__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,(((1 << 12)) << 16 \| (0))
3827	0)(((1 << 12)) << 16 \| (0)),
3828	},
3829
3830	/* BSpec: 11391 */
3831	{
3832	FF_SLICE_CHICKEN((const i915_reg_t){ .reg = (0x2088) }),
3833	__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,(((1 << 1)) << 16 \| ((1 << 1)))
3834	FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX)(((1 << 1)) << 16 \| ((1 << 1))),
3835	},
3836
3837	/* BSpec: 11299 */
3838	{
3839	_3D_CHICKEN3((const i915_reg_t){ .reg = (0x2090) }),
3840	__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,(((1 << 12)) << 16 \| ((1 << 12)))
3841	_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX)(((1 << 12)) << 16 \| ((1 << 12))),
3842	}
3843	};
3844
3845	*batch++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_DISABLE(0<<0);
3846
3847	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
3848	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
3849
3850	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
3851	batch = gen8_emit_pipe_control(batch,
3852	PIPE_CONTROL_FLUSH_L3(1<<27) \|
3853	PIPE_CONTROL_STORE_DATA_INDEX(1<<21) \|
3854	PIPE_CONTROL_CS_STALL(1<<20) \|
3855	PIPE_CONTROL_QW_WRITE(1<<14),
3856	LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
3857
3858	batch = emit_lri(batch, lri, ARRAY_SIZE(lri)(sizeof((lri)) / sizeof((lri)[0])));
3859
3860	/* WaMediaPoolStateCmdInWABB:bxt,glk */
3861	if (HAS_POOLED_EU(engine->i915)((&(engine->i915)->__info)->has_pooled_eu)) {
3862	/*
3863	* EU pool configuration is setup along with golden context
3864	* during context initialization. This value depends on
3865	* device type (2x6 or 3x6) and needs to be updated based
3866	* on which subslice is disabled especially for 2x6
3867	* devices, however it is safe to load default
3868	* configuration of 3x6 device instead of masking off
3869	* corresponding bits because HW ignores bits of a disabled
3870	* subslice and drops down to appropriate config. Please
3871	* see render_state_setup() in i915_gem_render_state.c for
3872	* possible configurations, to avoid duplication they are
3873	* not shown here again.
3874	*/
3875	*batch++ = GEN9_MEDIA_POOL_STATE((0x3 << 29) \| (0x2 << 27) \| (0x5 << 16) \| 4 );
3876	*batch++ = GEN9_MEDIA_POOL_ENABLE(1 << 31);
3877	*batch++ = 0x00777000;
3878	*batch++ = 0;
3879	*batch++ = 0;
3880	*batch++ = 0;
3881	}
3882
3883	*batch++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_ENABLE(1<<0);
3884
3885	/* Pad to end of cacheline */
3886	while ((unsigned long)batch % CACHELINE_BYTES64)
3887	*batch++ = MI_NOOP(((0) << 23) \| (0));
3888
3889	return batch;
3890	}
3891
3892	static u32 *
3893	gen10_init_indirectctx_bb(struct intel_engine_cs engine, u32 batch)
3894	{
3895	int i;
3896
3897	/*
3898	* WaPipeControlBefore3DStateSamplePattern: cnl
3899	*
3900	* Ensure the engine is idle prior to programming a
3901	* 3DSTATE_SAMPLE_PATTERN during a context restore.
3902	*/
3903	batch = gen8_emit_pipe_control(batch,
3904	PIPE_CONTROL_CS_STALL(1<<20),
3905	0);
3906	/*
3907	* WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
3908	* the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
3909	* total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
3910	* confusing. Since gen8_emit_pipe_control() already advances the
3911	* batch by 6 dwords, we advance the other 10 here, completing a
3912	* cacheline. It's not clear if the workaround requires this padding
3913	* before other commands, or if it's just the regular padding we would
3914	* already have for the workaround bb, so leave it here for now.
3915	*/
3916	for (i = 0; i < 10; i++)
3917	*batch++ = MI_NOOP(((0) << 23) \| (0));
3918
3919	/* Pad to end of cacheline */
3920	while ((unsigned long)batch % CACHELINE_BYTES64)
3921	*batch++ = MI_NOOP(((0) << 23) \| (0));
3922
3923	return batch;
3924	}
3925
3926	#define CTX_WA_BB_OBJ_SIZE((1 << 12)) (PAGE_SIZE(1 << 12))
3927
3928	static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
3929	{
3930	struct drm_i915_gem_object *obj;
3931	struct i915_vma *vma;
3932	int err;
3933
3934	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE((1 << 12)));
3935	if (IS_ERR(obj))
3936	return PTR_ERR(obj);
3937
3938	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL((void *)0));
3939	if (IS_ERR(vma)) {
3940	err = PTR_ERR(vma);
3941	goto err;
3942	}
3943
3944	err = i915_ggtt_pin(vma, NULL((void *)0), 0, PIN_HIGH(1ULL << (5)));
3945	if (err)
3946	goto err;
3947
3948	engine->wa_ctx.vma = vma;
3949	return 0;
3950
3951	err:
3952	i915_gem_object_put(obj);
3953	return err;
3954	}
3955
3956	static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
3957	{
3958	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
3959
3960	/* Called on error unwind, clear all flags to prevent further use */
3961	memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx))__builtin_memset((&engine->wa_ctx), (0), (sizeof(engine ->wa_ctx)));
3962	}
3963
3964	typedef u32 (wa_bb_func_t)(struct intel_engine_cs engine, u32 batch);
3965
3966	static int intel_init_workaround_bb(struct intel_engine_cs *engine)
3967	{
3968	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
3969	struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
3970	&wa_ctx->per_ctx };
3971	wa_bb_func_t wa_bb_fn[2];
3972	void batch, batch_ptr;
3973	unsigned int i;
3974	int ret;
3975
3976	if (engine->class != RENDER_CLASS0)
3977	return 0;
3978
3979	switch (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen)) {
3980	case 12:
3981	case 11:
3982	return 0;
3983	case 10:
3984	wa_bb_fn[0] = gen10_init_indirectctx_bb;
3985	wa_bb_fn[1] = NULL((void *)0);
3986	break;
3987	case 9:
3988	wa_bb_fn[0] = gen9_init_indirectctx_bb;
3989	wa_bb_fn[1] = NULL((void *)0);
3990	break;
3991	case 8:
3992	wa_bb_fn[0] = gen8_init_indirectctx_bb;
3993	wa_bb_fn[1] = NULL((void *)0);
3994	break;
3995	default:
3996	MISSING_CASE(INTEL_GEN(engine->i915))({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n" , "((&(engine->i915)->__info)->gen)", (long)(((& (engine->i915)->__info)->gen))); __builtin_expect(!! (__ret), 0); });
3997	return 0;
3998	}
3999
4000	ret = lrc_setup_wa_ctx(engine);
4001	if (ret) {
4002	drm_dbg(&engine->i915->drm,drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "Failed to setup context WA page: %d\n", ret)
4003	"Failed to setup context WA page: %d\n", ret)drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "Failed to setup context WA page: %d\n", ret);
4004	return ret;
4005	}
4006
4007	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
4008
4009	/*
4010	* Emit the two workaround batch buffers, recording the offset from the
4011	* start of the workaround batch buffer object for each and their
4012	* respective sizes.
4013	*/
4014	batch_ptr = batch;
4015	for (i = 0; i < ARRAY_SIZE(wa_bb_fn)(sizeof((wa_bb_fn)) / sizeof((wa_bb_fn)[0])); i++) {
4016	wa_bb[i]->offset = batch_ptr - batch;
4017	if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,({ ((void)0); 0; })
4018	CACHELINE_BYTES))({ ((void)0); 0; })) {
4019	ret = -EINVAL22;
4020	break;
4021	}
4022	if (wa_bb_fn[i])
4023	batch_ptr = wa_bb_fn[i](engine, batch_ptr);
4024	wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
4025	}
4026	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE)((void)0);
4027
4028	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
4029	__i915_gem_object_release_map(wa_ctx->vma->obj);
4030	if (ret)
4031	lrc_destroy_wa_ctx(engine);
4032
4033	return ret;
4034	}
4035
4036	static void reset_csb_pointers(struct intel_engine_cs *engine)
4037	{
4038	struct intel_engine_execlists * const execlists = &engine->execlists;
4039	const unsigned int reset_value = execlists->csb_size - 1;
4040
4041	ring_set_paused(engine, 0);
4042
4043	/*
4044	* Sometimes Icelake forgets to reset its pointers on a GPU reset.
4045	* Bludgeon them with a mmio update to be sure.
4046	*/
4047	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff << 16 \| reset_value << 8 \| reset_value))
4048	0xffff << 16 \| reset_value << 8 \| reset_value)intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff << 16 \| reset_value << 8 \| reset_value));
4049	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x3a0) })));
4050
4051	/*
4052	* After a reset, the HW starts writing into CSB entry [0]. We
4053	* therefore have to set our HEAD pointer back one entry so that
4054	* the first entry we check is entry 0. To complicate this further,
4055	* as we don't wait for the first interrupt after reset, we have to
4056	* fake the HW write to point back to the last entry so that our
4057	* inline comparison of our cached head position against the last HW
4058	* write works even before the first interrupt.
4059	*/
4060	execlists->csb_head = reset_value;
4061	WRITE_ONCE(execlists->csb_write, reset_value)({ typeof(execlists->csb_write) __tmp = (reset_value); ( volatile typeof(execlists->csb_write) )&(execlists-> csb_write) = __tmp; __tmp; });
4062	wmb()do { __asm volatile("sfence" ::: "memory"); } while (0); /* Make sure this is visible to HW (paranoia?) */
4063
4064	/* Check that the GPU does indeed update the CSB entries! */
4065	memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64))__builtin_memset((execlists->csb_status), (-1), ((reset_value + 1) * sizeof(u64)));
4066	invalidate_csb_entries(&execlists->csb_status[0],
4067	&execlists->csb_status[reset_value]);
4068
4069	/* Once more for luck and our trusty paranoia */
4070	ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff << 16 \| reset_value << 8 \| reset_value))
4071	0xffff << 16 \| reset_value << 8 \| reset_value)intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0x3a0) }), (0xffff << 16 \| reset_value << 8 \| reset_value));
4072	ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x3a0) })));
4073
4074	GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value)((void)0);
4075	}
4076
4077	static void execlists_sanitize(struct intel_engine_cs *engine)
4078	{
4079	/*
4080	* Poison residual state on resume, in case the suspend didn't!
4081	*
4082	* We have to assume that across suspend/resume (or other loss
4083	* of control) that the contents of our pinned buffers has been
4084	* lost, replaced by garbage. Since this doesn't always happen,
4085	* let's poison such state so that we more quickly spot when
4086	* we falsely assume it has been preserved.
4087	*/
4088	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
4089	memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE)__builtin_memset((engine->status_page.addr), (0xdb), ((1 << 12)));
4090
4091	reset_csb_pointers(engine);
4092
4093	/*
4094	* The kernel_context HWSP is stored in the status_page. As above,
4095	* that may be lost on resume/initialisation, and so we need to
4096	* reset the value in the HWSP.
4097	*/
4098	intel_timeline_reset_seqno(engine->kernel_context->timeline);
4099
4100	/* And scrub the dirty cachelines for the HWSP */
4101	clflush_cache_range(engine->status_page.addr, PAGE_SIZE)pmap_flush_cache((vaddr_t)(engine->status_page.addr), (1 << 12));
4102	}
4103
4104	static void enable_error_interrupt(struct intel_engine_cs *engine)
4105	{
4106	u32 status;
4107
4108	engine->execlists.error_interrupt = 0;
4109	ENGINE_WRITE(engine, RING_EMR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0xb4) }), (~0u));
4110	ENGINE_WRITE(engine, RING_EIR, ~0u)intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0xb0) }), (~0u)); /* clear all existing errors */
4111
4112	status = ENGINE_READ(engine, RING_ESR)intel_uncore_read(((engine))->uncore, ((const i915_reg_t){ .reg = (((engine)->mmio_base) + 0xb8) }));
4113	if (unlikely(status)__builtin_expect(!!(status), 0)) {
4114	drm_err(&engine->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "engine '%s' resumed still in error: %08x\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , engine ->name, status)
4115	"engine '%s' resumed still in error: %08x\n",printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "engine '%s' resumed still in error: %08x\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , engine ->name, status)
4116	engine->name, status)printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "engine '%s' resumed still in error: %08x\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__ , engine ->name, status);
4117	__intel_gt_reset(engine->gt, engine->mask);
4118	}
4119
4120	/*
4121	* On current gen8+, we have 2 signals to play with
4122	*
4123	* - I915_ERROR_INSTUCTION (bit 0)
4124	*
4125	* Generate an error if the command parser encounters an invalid
4126	* instruction
4127	*
4128	* This is a fatal error.
4129	*
4130	* - CP_PRIV (bit 2)
4131	*
4132	* Generate an error on privilege violation (where the CP replaces
4133	* the instruction with a no-op). This also fires for writes into
4134	* read-only scratch pages.
4135	*
4136	* This is a non-fatal error, parsing continues.
4137	*
4138	* * there are a few others defined for odd HW that we do not use
4139	*
4140	* Since CP_PRIV fires for cases where we have chosen to ignore the
4141	* error (as the HW is validating and suppressing the mistakes), we
4142	* only unmask the instruction error bit.
4143	*/
4144	ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION)intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0xb4) }), (~(1 << 0 )));
4145	}
4146
4147	static void enable_execlists(struct intel_engine_cs *engine)
4148	{
4149	u32 mode;
4150
4151	assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
4152
4153	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
4154
4155	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11)
4156	mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)({ typeof((1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p (_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a) && __builtin_constant_p (_a)) do { } while (0); ((_a) << 16 \| (_a)); }); });
4157	else
4158	mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)({ typeof((1 << 15)) _a = ((1 << 15)); ({ if (__builtin_constant_p (_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a) && __builtin_constant_p (_a)) do { } while (0); ((_a) << 16 \| (_a)); }); });
4159	ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode)__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x29c) }), (mode));
4160
4161	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x9c) }), ((({ if (__builtin_constant_p (((1 << 8)))) do { } while (0); if (__builtin_constant_p (0)) do { } while (0); if (__builtin_constant_p(((1 << 8 ))) && __builtin_constant_p(0)) do { } while (0); ((( (1 << 8))) << 16 \| (0)); }))));
4162
4163	ENGINE_WRITE_FW(engine,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset (engine->status_page.vma)))
4164	RING_HWS_PGA,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset (engine->status_page.vma)))
4165	i915_ggtt_offset(engine->status_page.vma))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset (engine->status_page.vma)));
4166	ENGINE_POSTING_READ(engine, RING_HWS_PGA)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x80) })));
4167
4168	enable_error_interrupt(engine);
4169
4170	engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0)(((~0UL) >> (64 - (64 - 2) - 1)) & ((~0UL) << (0)));
4171	}
4172
4173	static bool_Bool unexpected_starting_state(struct intel_engine_cs *engine)
4174	{
4175	bool_Bool unexpected = false0;
4176
4177	if (ENGINE_READ_FW(engine, RING_MI_MODE)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0x9c) })) & STOP_RING(1 << 8)) {
4178	drm_dbg(&engine->i915->drm,drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "STOP_RING still set in RING_MI_MODE\n")
4179	"STOP_RING still set in RING_MI_MODE\n")drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "STOP_RING still set in RING_MI_MODE\n");
4180	unexpected = true1;
4181	}
4182
4183	return unexpected;
4184	}
4185
4186	static int execlists_resume(struct intel_engine_cs *engine)
4187	{
4188	intel_mocs_init_engine(engine);
4189
4190	intel_breadcrumbs_reset(engine->breadcrumbs);
4191
4192	if (GEM_SHOW_DEBUG()(0) && unexpected_starting_state(engine)) {
4193	struct drm_printer p = drm_debug_printer(__func__);
4194
4195	intel_engine_dump(engine, &p, NULL((void *)0));
4196	}
4197
4198	enable_execlists(engine);
4199
4200	return 0;
4201	}
4202
4203	static void execlists_reset_prepare(struct intel_engine_cs *engine)
4204	{
4205	struct intel_engine_execlists * const execlists = &engine->execlists;
4206	unsigned long flags;
4207
4208	ENGINE_TRACE(engine, "depth<-%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
4209	atomic_read(&execlists->tasklet.count))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
4210
4211	/*
4212	* Prevent request submission to the hardware until we have
4213	* completed the reset in i915_gem_reset_finish(). If a request
4214	* is completed by one engine, it may then queue a request
4215	* to a second via its execlists->tasklet just as we are
4216	* calling engine->resume() and also writing the ELSP.
4217	* Turning off the execlists->tasklet until the reset is over
4218	* prevents the race.
4219	*/
4220	__tasklet_disable_sync_once(&execlists->tasklet);
4221	GEM_BUG_ON(!reset_in_progress(execlists))((void)0);
4222
4223	/* And flush any current direct submission. */
4224	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
4225	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
4226
4227	/*
4228	* We stop engines, otherwise we might get failed reset and a
4229	* dead gpu (on elk). Also as modern gpu as kbl can suffer
4230	* from system hang if batchbuffer is progressing when
4231	* the reset is issued, regardless of READY_TO_RESET ack.
4232	* Thus assume it is best to stop engines on all gens
4233	* where we have a gpu reset.
4234	*
4235	* WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
4236	*
4237	* FIXME: Wa for more modern gens needs to be validated
4238	*/
4239	ring_set_paused(engine, 1);
4240	intel_engine_stop_cs(engine);
4241
4242	engine->execlists.reset_ccid = active_ccid(engine);
4243	}
4244
4245	static void __reset_stop_ring(u32 regs, const struct intel_engine_cs engine)
4246	{
4247	int x;
4248
4249	x = lrc_ring_mi_mode(engine);
4250	if (x != -1) {
4251	regs[x + 1] &= ~STOP_RING(1 << 8);
4252	regs[x + 1] \|= STOP_RING(1 << 8) << 16;
4253	}
4254	}
4255
4256	static void __execlists_reset_reg_state(const struct intel_context *ce,
4257	const struct intel_engine_cs *engine)
4258	{
4259	u32 *regs = ce->lrc_reg_state;
4260
4261	__reset_stop_ring(regs, engine);
4262	}
4263
4264	static void __execlists_reset(struct intel_engine_cs *engine, bool_Bool stalled)
4265	{
4266	struct intel_engine_execlists * const execlists = &engine->execlists;
4267	struct intel_context *ce;
4268	struct i915_request *rq;
4269	u32 head;
4270
4271	mb()do { __asm volatile("mfence" ::: "memory"); } while (0); /* paranoia: read the CSB pointers from after the reset */
4272	clflush((vaddr_t)execlists->csb_write);
4273	mb()do { __asm volatile("mfence" ::: "memory"); } while (0);
4274
4275	process_csb(engine); /* drain preemption events */
4276
4277	/* Following the reset, we need to reload the CSB read/write pointers */
4278	reset_csb_pointers(engine);
4279
4280	/*
4281	* Save the currently executing context, even if we completed
4282	* its request, it was still running at the time of the
4283	* reset and will have been clobbered.
4284	*/
4285	rq = active_context(engine, engine->execlists.reset_ccid);
4286	if (!rq)
4287	goto unwind;
4288
4289	ce = rq->context;
4290	GEM_BUG_ON(!i915_vma_is_pinned(ce->state))((void)0);
4291
4292	if (i915_request_completed(rq)) {
4293	/* Idle context; tidy up the ring so we can restart afresh */
4294	head = intel_ring_wrap(ce->ring, rq->tail);
4295	goto out_replay;
4296	}
4297
4298	/* We still have requests in-flight; the engine should be active */
4299	GEM_BUG_ON(!intel_engine_pm_is_awake(engine))((void)0);
4300
4301	/* Context has requests still in-flight; it should not be idle! */
4302	GEM_BUG_ON(i915_active_is_idle(&ce->active))((void)0);
4303
4304	rq = active_request(ce->timeline, rq);
4305	head = intel_ring_wrap(ce->ring, rq->head);
4306	GEM_BUG_ON(head == ce->ring->tail)((void)0);
4307
4308	/*
4309	* If this request hasn't started yet, e.g. it is waiting on a
4310	* semaphore, we need to avoid skipping the request or else we
4311	* break the signaling chain. However, if the context is corrupt
4312	* the request will not restart and we will be stuck with a wedged
4313	* device. It is quite often the case that if we issue a reset
4314	* while the GPU is loading the context image, that the context
4315	* image becomes corrupt.
4316	*
4317	* Otherwise, if we have not started yet, the request should replay
4318	* perfectly and we do not need to flag the result as being erroneous.
4319	*/
4320	if (!i915_request_started(rq))
4321	goto out_replay;
4322
4323	/*
4324	* If the request was innocent, we leave the request in the ELSP
4325	* and will try to replay it on restarting. The context image may
4326	* have been corrupted by the reset, in which case we may have
4327	* to service a new GPU hang, but more likely we can continue on
4328	* without impact.
4329	*
4330	* If the request was guilty, we presume the context is corrupt
4331	* and have to at least restore the RING register in the context
4332	* image back to the expected values to skip over the guilty request.
4333	*/
4334	__i915_request_reset(rq, stalled);
4335
4336	/*
4337	* We want a simple context + ring to execute the breadcrumb update.
4338	* We cannot rely on the context being intact across the GPU hang,
4339	* so clear it and rebuild just what we need for the breadcrumb.
4340	* All pending requests for this context will be zapped, and any
4341	* future request will be after userspace has had the opportunity
4342	* to recreate its own state.
4343	*/
4344	out_replay:
4345	ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
4346	head, ce->ring->tail)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
4347	__execlists_reset_reg_state(ce, engine);
4348	__execlists_update_reg_state(ce, engine, head);
4349	ce->lrc.desc \|= CTX_DESC_FORCE_RESTORE(1ULL << (2)); /* paranoid: GPU was reset! */
4350
4351	unwind:
4352	/* Push back any incomplete requests for replay after the reset. */
4353	cancel_port_requests(execlists);
4354	__unwind_incomplete_requests(engine);
4355	}
4356
4357	static void execlists_reset_rewind(struct intel_engine_cs *engine, bool_Bool stalled)
4358	{
4359	unsigned long flags;
4360
4361	ENGINE_TRACE(engine, "\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
4362
4363	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
4364
4365	__execlists_reset(engine, stalled);
4366
4367	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
4368	}
4369
4370	static void nop_submission_tasklet(unsigned long data)
4371	{
4372	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
4373
4374	/* The driver is wedged; don't process any more events. */
4375	WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN)({ typeof(engine->execlists.queue_priority_hint) __tmp = ( (-0x7fffffff-1)); (volatile typeof(engine->execlists.queue_priority_hint ) )&(engine->execlists.queue_priority_hint) = __tmp; __tmp ; });
4376	}
4377
4378	static void execlists_reset_cancel(struct intel_engine_cs *engine)
4379	{
4380	struct intel_engine_execlists * const execlists = &engine->execlists;
4381	struct i915_request rq, rn;
4382	struct rb_node *rb;
4383	unsigned long flags;
4384
4385	ENGINE_TRACE(engine, "\n")do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
4386
4387	/*
4388	* Before we call engine->cancel_requests(), we should have exclusive
4389	* access to the submission state. This is arranged for us by the
4390	* caller disabling the interrupt generation, the tasklet and other
4391	* threads that may then access the same state, giving us a free hand
4392	* to reset state. However, we still need to let lockdep be aware that
4393	* we know this state may be accessed in hardirq context, so we
4394	* disable the irq around this manipulation and we want to keep
4395	* the spinlock focused on its duties and not accidentally conflate
4396	* coverage to the submission's irq state. (Similarly, although we
4397	* shouldn't need to disable irq around the manipulation of the
4398	* submission's irq state, we also wish to remind ourselves that
4399	* it is irq state.)
4400	*/
4401	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
4402
4403	__execlists_reset(engine, true1);
4404
4405	/* Mark all executing requests as skipped. */
4406	list_for_each_entry(rq, &engine->active.requests, sched.link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = ((&engine->active.requests)->next); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}); &rq->sched.link != (&engine->active .requests); rq = ({ const __typeof( ((__typeof(rq) )0)-> sched.link ) __mptr = (rq->sched.link.next); (__typeof(rq ) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}))
4407	mark_eio(rq);
4408
4409	/* Flush the queued requests to the timeline list (for retiring). */
4410	while ((rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->queue)->rb_root), -1))) {
4411	struct i915_priolist *p = to_priolist(rb);
4412	int i;
4413
4414	priolist_for_each_request_consume(rq, rn, p, i)for (; (p)->used ? (i = __builtin_ctzl((p)->used)), 1 : 0; (p)->used &= ~(1UL << (i))) for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = (( &(p)->requests[i])->next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched.link) );} ), rn = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = (rq->sched.link.next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched.link) );} ); &rq->sched.link != (&(p)->requests[i]); rq = rn, rn = ({ const __typeof( ((__typeof(rn) )0)->sched.link ) __mptr = (rn->sched.link.next); (__typeof(rn) )( (char )__mptr - __builtin_offsetof(__typeof(*rn), sched.link) );} )) {
4415	mark_eio(rq);
4416	__i915_request_submit(rq);
4417	}
4418
4419	rb_erase_cached(&p->node, &execlists->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists ->queue)->rb_root), (&p->node));
4420	i915_priolist_free(p);
4421	}
4422
4423	/* On-hold requests will be flushed to timeline upon their release */
4424	list_for_each_entry(rq, &engine->active.hold, sched.link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = ((&engine->active.hold)->next); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}); &rq->sched.link != (&engine->active .hold); rq = ({ const __typeof( ((__typeof(rq) )0)->sched .link ) __mptr = (rq->sched.link.next); (__typeof(rq) ) ( (char )__mptr - __builtin_offsetof(__typeof(rq), sched.link ) );}))
4425	mark_eio(rq);
4426
4427	/* Cancel all attached virtual engines */
4428	while ((rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->virtual)->rb_root), -1))) {
4429	struct virtual_engine *ve =
4430	rb_entry(rb, typeof(ve), nodes[engine->id].rb)({ const __typeof( ((typeof(ve) )0)->nodes[engine->id ].rb ) __mptr = (rb); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), nodes[engine->id].rb) );});
4431
4432	rb_erase_cached(rb, &execlists->virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&execlists ->virtual)->rb_root), (rb));
4433	RB_CLEAR_NODE(rb)(((rb))->__entry.rbe_parent = (rb));
4434
4435	spin_lock(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
4436	rq = fetch_and_zero(&ve->request)({ typeof(&ve->request) __T = (&ve->request); (&ve->request) = (typeof(&ve->request))0; __T ; });
4437	if (rq) {
4438	mark_eio(rq);
4439
4440	rq->engine = engine;
4441	__i915_request_submit(rq);
4442	i915_request_put(rq);
4443
4444	ve->base.execlists.queue_priority_hint = INT_MIN(-0x7fffffff-1);
4445	}
4446	spin_unlock(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
4447	}
4448
4449	/* Remaining _unready_ requests will be nop'ed when submitted */
4450
4451	execlists->queue_priority_hint = INT_MIN(-0x7fffffff-1);
4452	execlists->queue = RB_ROOT_CACHED(struct rb_root_cached) { ((void *)0) };
4453
4454	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet))((void)0);
4455	execlists->tasklet.func = nop_submission_tasklet;
4456
4457	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
4458	}
4459
4460	static void execlists_reset_finish(struct intel_engine_cs *engine)
4461	{
4462	struct intel_engine_execlists * const execlists = &engine->execlists;
4463
4464	/*
4465	* After a GPU reset, we may have requests to replay. Do so now while
4466	* we still have the forcewake to be sure that the GPU is not allowed
4467	* to sleep before we restart and reload a context.
4468	*/
4469	GEM_BUG_ON(!reset_in_progress(execlists))((void)0);
4470	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)((&execlists->queue.rb_root)->rb_node == ((void *)0 )))
4471	execlists->tasklet.func(execlists->tasklet.data);
4472
4473	if (__tasklet_enable(&execlists->tasklet))
4474	/* And kick in case we missed a new request submission. */
4475	tasklet_hi_schedule(&execlists->tasklet);
4476	ENGINE_TRACE(engine, "depth->%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0)
4477	atomic_read(&execlists->tasklet.count))do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (engine); do { } while (0); } while (0);
4478	}
4479
4480	static int gen8_emit_bb_start_noarb(struct i915_request *rq,
4481	u64 offset, u32 len,
4482	const unsigned int flags)
4483	{
4484	u32 *cs;
4485
4486	cs = intel_ring_begin(rq, 4);
4487	if (IS_ERR(cs))
4488	return PTR_ERR(cs);
4489
4490	/*
4491	* WaDisableCtxRestoreArbitration:bdw,chv
4492	*
4493	* We don't need to perform MI_ARB_ENABLE as often as we do (in
4494	* particular all the gen that do not need the w/a at all!), if we
4495	* took care to make sure that on every switch into this context
4496	* (both ordinary and for preemption) that arbitrartion was enabled
4497	* we would be fine. However, for gen8 there is another w/a that
4498	* requires us to not preempt inside GPGPU execution, so we keep
4499	* arbitration disabled for gen8 batches. Arbitration will be
4500	* re-enabled before we close the request
4501	* (engine->emit_fini_breadcrumb).
4502	*/
4503	*cs++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_DISABLE(0<<0);
4504
4505	/* FIXME(BDW+): Address space and security selectors. */
4506	*cs++ = MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) \| (1)) \|
4507	(flags & I915_DISPATCH_SECURE(1UL << (0)) ? 0 : BIT(8)(1UL << (8)));
4508	*cs++ = lower_32_bits(offset)((u32)(offset));
4509	*cs++ = upper_32_bits(offset)((u32)(((offset) >> 16) >> 16));
4510
4511	intel_ring_advance(rq, cs);
4512
4513	return 0;
4514	}
4515
4516	static int gen8_emit_bb_start(struct i915_request *rq,
4517	u64 offset, u32 len,
4518	const unsigned int flags)
4519	{
4520	u32 *cs;
4521
4522	cs = intel_ring_begin(rq, 6);
4523	if (IS_ERR(cs))
4524	return PTR_ERR(cs);
4525
4526	*cs++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_ENABLE(1<<0);
4527
4528	*cs++ = MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) \| (1)) \|
4529	(flags & I915_DISPATCH_SECURE(1UL << (0)) ? 0 : BIT(8)(1UL << (8)));
4530	*cs++ = lower_32_bits(offset)((u32)(offset));
4531	*cs++ = upper_32_bits(offset)((u32)(((offset) >> 16) >> 16));
4532
4533	*cs++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_DISABLE(0<<0);
4534	*cs++ = MI_NOOP(((0) << 23) \| (0));
4535
4536	intel_ring_advance(rq, cs);
4537
4538	return 0;
4539	}
4540
4541	static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine)
4542	{
4543	ENGINE_WRITE(engine, RING_IMR,intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0xa8) }), (~(engine-> irq_enable_mask \| engine->irq_keep_mask)))
4544	~(engine->irq_enable_mask \| engine->irq_keep_mask))intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0xa8) }), (~(engine-> irq_enable_mask \| engine->irq_keep_mask)));
4545	ENGINE_POSTING_READ(engine, RING_IMR)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t ){ .reg = (((engine)->mmio_base) + 0xa8) })));
4546	}
4547
4548	static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
4549	{
4550	ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask)intel_uncore_write(((engine))->uncore, ((const i915_reg_t) { .reg = (((engine)->mmio_base) + 0xa8) }), (~engine->irq_keep_mask ));
4551	}
4552
4553	static int gen8_emit_flush(struct i915_request *request, u32 mode)
4554	{
4555	u32 cmd, *cs;
4556
4557	cs = intel_ring_begin(request, 4);
4558	if (IS_ERR(cs))
4559	return PTR_ERR(cs);
4560
4561	cmd = MI_FLUSH_DW(((0x26) << 23) \| (1)) + 1;
4562
4563	/* We always require a command barrier so that subsequent
4564	* commands, such as breadcrumb interrupts, are strictly ordered
4565	* wrt the contents of the write cache being flushed to memory
4566	* (and thus being coherent from the CPU).
4567	*/
4568	cmd \|= MI_FLUSH_DW_STORE_INDEX(1<<21) \| MI_FLUSH_DW_OP_STOREDW(1<<14);
4569
4570	if (mode & EMIT_INVALIDATE(1UL << (0))) {
4571	cmd \|= MI_INVALIDATE_TLB(1<<18);
4572	if (request->engine->class == VIDEO_DECODE_CLASS1)
4573	cmd \|= MI_INVALIDATE_BSD(1<<7);
4574	}
4575
4576	*cs++ = cmd;
4577	cs++ = LRC_PPHWSP_SCRATCH_ADDR(0x34 sizeof(u32));
4578	cs++ = 0; / upper addr */
4579	cs++ = 0; / value */
4580	intel_ring_advance(request, cs);
4581
4582	return 0;
4583	}
4584
4585	static int gen8_emit_flush_render(struct i915_request *request,
4586	u32 mode)
4587	{
4588	bool_Bool vf_flush_wa = false0, dc_flush_wa = false0;
4589	u32 *cs, flags = 0;
4590	int len;
4591
4592	flags \|= PIPE_CONTROL_CS_STALL(1<<20);
4593
4594	if (mode & EMIT_FLUSH(1UL << (1))) {
4595	flags \|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12);
4596	flags \|= PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0);
4597	flags \|= PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5);
4598	flags \|= PIPE_CONTROL_FLUSH_ENABLE(1<<7);
4599	}
4600
4601	if (mode & EMIT_INVALIDATE(1UL << (0))) {
4602	flags \|= PIPE_CONTROL_TLB_INVALIDATE(1<<18);
4603	flags \|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11);
4604	flags \|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10);
4605	flags \|= PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4);
4606	flags \|= PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3);
4607	flags \|= PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2);
4608	flags \|= PIPE_CONTROL_QW_WRITE(1<<14);
4609	flags \|= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4610
4611	/*
4612	* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
4613	* pipe control.
4614	*/
4615	if (IS_GEN(request->engine->i915, 9)(0 + (&(request->engine->i915)->__info)->gen == (9)))
4616	vf_flush_wa = true1;
4617
4618	/* WaForGAMHang:kbl */
4619	if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0)(IS_PLATFORM(request->engine->i915, INTEL_KABYLAKE) && kbl_revids[((request->engine->i915)->drm.pdev->revision )].gt_stepping >= 0 && kbl_revids[((request->engine ->i915)->drm.pdev->revision)].gt_stepping <= KBL_REVID_B0 ))
4620	dc_flush_wa = true1;
4621	}
4622
4623	len = 6;
4624
4625	if (vf_flush_wa)
4626	len += 6;
4627
4628	if (dc_flush_wa)
4629	len += 12;
4630
4631	cs = intel_ring_begin(request, len);
4632	if (IS_ERR(cs))
4633	return PTR_ERR(cs);
4634
4635	if (vf_flush_wa)
4636	cs = gen8_emit_pipe_control(cs, 0, 0);
4637
4638	if (dc_flush_wa)
4639	cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5),
4640	0);
4641
4642	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4643
4644	if (dc_flush_wa)
4645	cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL(1<<20), 0);
4646
4647	intel_ring_advance(request, cs);
4648
4649	return 0;
4650	}
4651
4652	static int gen11_emit_flush_render(struct i915_request *request,
4653	u32 mode)
4654	{
4655	if (mode & EMIT_FLUSH(1UL << (1))) {
4656	u32 *cs;
4657	u32 flags = 0;
4658
4659	flags \|= PIPE_CONTROL_CS_STALL(1<<20);
4660
4661	flags \|= PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28);
4662	flags \|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12);
4663	flags \|= PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0);
4664	flags \|= PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5);
4665	flags \|= PIPE_CONTROL_FLUSH_ENABLE(1<<7);
4666	flags \|= PIPE_CONTROL_QW_WRITE(1<<14);
4667	flags \|= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4668
4669	cs = intel_ring_begin(request, 6);
4670	if (IS_ERR(cs))
4671	return PTR_ERR(cs);
4672
4673	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4674	intel_ring_advance(request, cs);
4675	}
4676
4677	if (mode & EMIT_INVALIDATE(1UL << (0))) {
4678	u32 *cs;
4679	u32 flags = 0;
4680
4681	flags \|= PIPE_CONTROL_CS_STALL(1<<20);
4682
4683	flags \|= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE(1<<29);
4684	flags \|= PIPE_CONTROL_TLB_INVALIDATE(1<<18);
4685	flags \|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11);
4686	flags \|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10);
4687	flags \|= PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4);
4688	flags \|= PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3);
4689	flags \|= PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2);
4690	flags \|= PIPE_CONTROL_QW_WRITE(1<<14);
4691	flags \|= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4692
4693	cs = intel_ring_begin(request, 6);
4694	if (IS_ERR(cs))
4695	return PTR_ERR(cs);
4696
4697	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4698	intel_ring_advance(request, cs);
4699	}
4700
4701	return 0;
4702	}
4703
4704	static u32 preparser_disable(bool_Bool state)
4705	{
4706	return MI_ARB_CHECK(((0x05) << 23) \| (0)) \| 1 << 8 \| state;
4707	}
4708
4709	static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
4710	{
4711	static const i915_reg_t vd[] = {
4712	GEN12_VD0_AUX_NV((const i915_reg_t){ .reg = (0x4218) }),
4713	GEN12_VD1_AUX_NV((const i915_reg_t){ .reg = (0x4228) }),
4714	GEN12_VD2_AUX_NV((const i915_reg_t){ .reg = (0x4298) }),
4715	GEN12_VD3_AUX_NV((const i915_reg_t){ .reg = (0x42A8) }),
4716	};
4717
4718	static const i915_reg_t ve[] = {
4719	GEN12_VE0_AUX_NV((const i915_reg_t){ .reg = (0x4238) }),
4720	GEN12_VE1_AUX_NV((const i915_reg_t){ .reg = (0x42B8) }),
4721	};
4722
4723	if (engine->class == VIDEO_DECODE_CLASS1)
4724	return vd[engine->instance];
4725
4726	if (engine->class == VIDEO_ENHANCEMENT_CLASS2)
4727	return ve[engine->instance];
4728
4729	GEM_BUG_ON("unknown aux_inv_reg\n")((void)0);
4730
4731	return INVALID_MMIO_REG((const i915_reg_t){ .reg = (0) });
4732	}
4733
4734	static u32 *
4735	gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
4736	{
4737	cs++ = MI_LOAD_REGISTER_IMM(1)(((0x22) << 23) \| (2(1)-1));
4738	*cs++ = i915_mmio_reg_offset(inv_reg);
4739	*cs++ = AUX_INV((u32)((1UL << (0)) + 0));
4740	*cs++ = MI_NOOP(((0) << 23) \| (0));
4741
4742	return cs;
4743	}
4744
4745	static int gen12_emit_flush_render(struct i915_request *request,
4746	u32 mode)
4747	{
4748	if (mode & EMIT_FLUSH(1UL << (1))) {
4749	u32 flags = 0;
4750	u32 *cs;
4751
4752	flags \|= PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28);
4753	flags \|= PIPE_CONTROL_FLUSH_L3(1<<27);
4754	flags \|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12);
4755	flags \|= PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0);
4756	/* Wa_1409600907:tgl */
4757	flags \|= PIPE_CONTROL_DEPTH_STALL(1<<13);
4758	flags \|= PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5);
4759	flags \|= PIPE_CONTROL_FLUSH_ENABLE(1<<7);
4760
4761	flags \|= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4762	flags \|= PIPE_CONTROL_QW_WRITE(1<<14);
4763
4764	flags \|= PIPE_CONTROL_CS_STALL(1<<20);
4765
4766	cs = intel_ring_begin(request, 6);
4767	if (IS_ERR(cs))
4768	return PTR_ERR(cs);
4769
4770	cs = gen12_emit_pipe_control(cs,
4771	PIPE_CONTROL0_HDC_PIPELINE_FLUSH((u32)((1UL << (9)) + 0)),
4772	flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4773	intel_ring_advance(request, cs);
4774	}
4775
4776	if (mode & EMIT_INVALIDATE(1UL << (0))) {
4777	u32 flags = 0;
4778	u32 *cs;
4779
4780	flags \|= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE(1<<29);
4781	flags \|= PIPE_CONTROL_TLB_INVALIDATE(1<<18);
4782	flags \|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE(1<<11);
4783	flags \|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE(1<<10);
4784	flags \|= PIPE_CONTROL_VF_CACHE_INVALIDATE(1<<4);
4785	flags \|= PIPE_CONTROL_CONST_CACHE_INVALIDATE(1<<3);
4786	flags \|= PIPE_CONTROL_STATE_CACHE_INVALIDATE(1<<2);
4787
4788	flags \|= PIPE_CONTROL_STORE_DATA_INDEX(1<<21);
4789	flags \|= PIPE_CONTROL_QW_WRITE(1<<14);
4790
4791	flags \|= PIPE_CONTROL_CS_STALL(1<<20);
4792
4793	cs = intel_ring_begin(request, 8 + 4);
4794	if (IS_ERR(cs))
4795	return PTR_ERR(cs);
4796
4797	/*
4798	* Prevent the pre-parser from skipping past the TLB
4799	* invalidate and loading a stale page for the batch
4800	* buffer / request payload.
4801	*/
4802	*cs++ = preparser_disable(true1);
4803
4804	cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR(0x34 * sizeof(u32)));
4805
4806	/* hsdes: 1809175790 */
4807	cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV((const i915_reg_t){ .reg = (0x4208) }), cs);
4808
4809	*cs++ = preparser_disable(false0);
4810	intel_ring_advance(request, cs);
4811	}
4812
4813	return 0;
4814	}
4815
4816	static int gen12_emit_flush(struct i915_request *request, u32 mode)
4817	{
4818	intel_engine_mask_t aux_inv = 0;
4819	u32 cmd, *cs;
4820
4821	cmd = 4;
4822	if (mode & EMIT_INVALIDATE(1UL << (0)))
4823	cmd += 2;
4824	if (mode & EMIT_INVALIDATE(1UL << (0)))
4825	aux_inv = request->engine->mask & ~BIT(BCS0)(1UL << (BCS0));
4826	if (aux_inv)
4827	cmd += 2 * hweight8(aux_inv) + 2;
4828
4829	cs = intel_ring_begin(request, cmd);
4830	if (IS_ERR(cs))
4831	return PTR_ERR(cs);
4832
4833	if (mode & EMIT_INVALIDATE(1UL << (0)))
4834	*cs++ = preparser_disable(true1);
4835
4836	cmd = MI_FLUSH_DW(((0x26) << 23) \| (1)) + 1;
4837
4838	/* We always require a command barrier so that subsequent
4839	* commands, such as breadcrumb interrupts, are strictly ordered
4840	* wrt the contents of the write cache being flushed to memory
4841	* (and thus being coherent from the CPU).
4842	*/
4843	cmd \|= MI_FLUSH_DW_STORE_INDEX(1<<21) \| MI_FLUSH_DW_OP_STOREDW(1<<14);
4844
4845	if (mode & EMIT_INVALIDATE(1UL << (0))) {
4846	cmd \|= MI_INVALIDATE_TLB(1<<18);
4847	if (request->engine->class == VIDEO_DECODE_CLASS1)
4848	cmd \|= MI_INVALIDATE_BSD(1<<7);
4849	}
4850
4851	*cs++ = cmd;
4852	cs++ = LRC_PPHWSP_SCRATCH_ADDR(0x34 sizeof(u32));
4853	cs++ = 0; / upper addr */
4854	cs++ = 0; / value */
4855
4856	if (aux_inv) { /* hsdes: 1809175790 */
4857	struct intel_engine_cs *engine;
4858	unsigned int tmp;
4859
4860	cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv))(((0x22) << 23) \| (2(hweight8(aux_inv))-1));
4861	for_each_engine_masked(engine, request->engine->gt,for ((tmp) = (aux_inv) & (request->engine->gt)-> info.engine_mask; (tmp) ? ((engine) = (request->engine-> gt)->engine[({ int __idx = ffs(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx; })]), 1 : 0;)
4862	aux_inv, tmp)for ((tmp) = (aux_inv) & (request->engine->gt)-> info.engine_mask; (tmp) ? ((engine) = (request->engine-> gt)->engine[({ int __idx = ffs(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx; })]), 1 : 0;) {
4863	*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
4864	*cs++ = AUX_INV((u32)((1UL << (0)) + 0));
4865	}
4866	*cs++ = MI_NOOP(((0) << 23) \| (0));
4867	}
4868
4869	if (mode & EMIT_INVALIDATE(1UL << (0)))
4870	*cs++ = preparser_disable(false0);
4871
4872	intel_ring_advance(request, cs);
4873
4874	return 0;
4875	}
4876
4877	static void assert_request_valid(struct i915_request *rq)
4878	{
4879	struct intel_ring *ring __maybe_unused__attribute__((__unused__)) = rq->ring;
4880
4881	/* Can we unwind this request without appearing to go forwards? */
4882	GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0)((void)0);
4883	}
4884
4885	/*
4886	* Reserve space for 2 NOOPs at the end of each request to be
4887	* used as a workaround for not being allowed to do lite
4888	* restore with HEAD==TAIL (WaIdleLiteRestore).
4889	*/
4890	static u32 gen8_emit_wa_tail(struct i915_request request, u32 *cs)
4891	{
4892	/* Ensure there's always at least one preemption point per-request. */
4893	*cs++ = MI_ARB_CHECK(((0x05) << 23) \| (0));
4894	*cs++ = MI_NOOP(((0) << 23) \| (0));
4895	request->wa_tail = intel_ring_offset(request, cs);
4896
4897	/* Check that entire request is less than half the ring */
4898	assert_request_valid(request);
4899
4900	return cs;
4901	}
4902
4903	static u32 emit_preempt_busywait(struct i915_request request, u32 *cs)
4904	{
4905	*cs++ = MI_SEMAPHORE_WAIT(((0x1c) << 23) \| (2)) \|
4906	MI_SEMAPHORE_GLOBAL_GTT(1<<22) \|
4907	MI_SEMAPHORE_POLL(1 << 15) \|
4908	MI_SEMAPHORE_SAD_EQ_SDD(4 << 12);
4909	*cs++ = 0;
4910	*cs++ = intel_hws_preempt_address(request->engine);
4911	*cs++ = 0;
4912
4913	return cs;
4914	}
4915
4916	static __always_inline__attribute__((__always_inline__)) u32*
4917	gen8_emit_fini_breadcrumb_tail(struct i915_request request, u32 cs)
4918	{
4919	*cs++ = MI_USER_INTERRUPT(((0x02) << 23) \| (0));
4920
4921	*cs++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_ENABLE(1<<0);
4922	if (intel_engine_has_semaphores(request->engine))
4923	cs = emit_preempt_busywait(request, cs);
4924
4925	request->tail = intel_ring_offset(request, cs);
4926	assert_ring_tail_valid(request->ring, request->tail);
4927
4928	return gen8_emit_wa_tail(request, cs);
4929	}
4930
4931	static u32 emit_xcs_breadcrumb(struct i915_request rq, u32 *cs)
4932	{
4933	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
4934	}
4935
4936	static u32 gen8_emit_fini_breadcrumb(struct i915_request rq, u32 *cs)
4937	{
4938	return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4939	}
4940
4941	static u32 gen8_emit_fini_breadcrumb_rcs(struct i915_request request, u32 *cs)
4942	{
4943	cs = gen8_emit_pipe_control(cs,
4944	PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12) \|
4945	PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0) \|
4946	PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5),
4947	0);
4948
4949	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
4950	cs = gen8_emit_ggtt_write_rcs(cs,
4951	request->fence.seqno,
4952	hwsp_offset(request),
4953	PIPE_CONTROL_FLUSH_ENABLE(1<<7) \|
4954	PIPE_CONTROL_CS_STALL(1<<20));
4955
4956	return gen8_emit_fini_breadcrumb_tail(request, cs);
4957	}
4958
4959	static u32 *
4960	gen11_emit_fini_breadcrumb_rcs(struct i915_request request, u32 cs)
4961	{
4962	cs = gen8_emit_ggtt_write_rcs(cs,
4963	request->fence.seqno,
4964	hwsp_offset(request),
4965	PIPE_CONTROL_CS_STALL(1<<20) \|
4966	PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28) \|
4967	PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12) \|
4968	PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0) \|
4969	PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5) \|
4970	PIPE_CONTROL_FLUSH_ENABLE(1<<7));
4971
4972	return gen8_emit_fini_breadcrumb_tail(request, cs);
4973	}
4974
4975	/*
4976	* Note that the CS instruction pre-parser will not stall on the breadcrumb
4977	* flush and will continue pre-fetching the instructions after it before the
4978	* memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
4979	* BB_START/END instructions, so, even though we might pre-fetch the pre-amble
4980	* of the next request before the memory has been flushed, we're guaranteed that
4981	* we won't access the batch itself too early.
4982	* However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
4983	* so, if the current request is modifying an instruction in the next request on
4984	* the same intel_context, we might pre-fetch and then execute the pre-update
4985	* instruction. To avoid this, the users of self-modifying code should either
4986	* disable the parser around the code emitting the memory writes, via a new flag
4987	* added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
4988	* the in-kernel use-cases we've opted to use a separate context, see
4989	* reloc_gpu() as an example.
4990	* All the above applies only to the instructions themselves. Non-inline data
4991	* used by the instructions is not pre-fetched.
4992	*/
4993
4994	static u32 gen12_emit_preempt_busywait(struct i915_request request, u32 *cs)
4995	{
4996	*cs++ = MI_SEMAPHORE_WAIT_TOKEN(((0x1c) << 23) \| (3)) \|
4997	MI_SEMAPHORE_GLOBAL_GTT(1<<22) \|
4998	MI_SEMAPHORE_POLL(1 << 15) \|
4999	MI_SEMAPHORE_SAD_EQ_SDD(4 << 12);
5000	*cs++ = 0;
5001	*cs++ = intel_hws_preempt_address(request->engine);
5002	*cs++ = 0;
5003	*cs++ = 0;
5004	*cs++ = MI_NOOP(((0) << 23) \| (0));
5005
5006	return cs;
5007	}
5008
5009	static __always_inline__attribute__((__always_inline__)) u32*
5010	gen12_emit_fini_breadcrumb_tail(struct i915_request request, u32 cs)
5011	{
5012	*cs++ = MI_USER_INTERRUPT(((0x02) << 23) \| (0));
5013
5014	*cs++ = MI_ARB_ON_OFF(((0x08) << 23) \| (0)) \| MI_ARB_ENABLE(1<<0);
5015	if (intel_engine_has_semaphores(request->engine))
5016	cs = gen12_emit_preempt_busywait(request, cs);
5017
5018	request->tail = intel_ring_offset(request, cs);
5019	assert_ring_tail_valid(request->ring, request->tail);
5020
5021	return gen8_emit_wa_tail(request, cs);
5022	}
5023
5024	static u32 gen12_emit_fini_breadcrumb(struct i915_request rq, u32 *cs)
5025	{
5026	/* XXX Stalling flush before seqno write; post-sync not */
5027	cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
5028	return gen12_emit_fini_breadcrumb_tail(rq, cs);
5029	}
5030
5031	static u32 *
5032	gen12_emit_fini_breadcrumb_rcs(struct i915_request request, u32 cs)
5033	{
5034	cs = gen12_emit_ggtt_write_rcs(cs,
5035	request->fence.seqno,
5036	hwsp_offset(request),
5037	PIPE_CONTROL0_HDC_PIPELINE_FLUSH((u32)((1UL << (9)) + 0)),
5038	PIPE_CONTROL_CS_STALL(1<<20) \|
5039	PIPE_CONTROL_TILE_CACHE_FLUSH(1<<28) \|
5040	PIPE_CONTROL_FLUSH_L3(1<<27) \|
5041	PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH(1<<12) \|
5042	PIPE_CONTROL_DEPTH_CACHE_FLUSH(1<<0) \|
5043	/* Wa_1409600907:tgl */
5044	PIPE_CONTROL_DEPTH_STALL(1<<13) \|
5045	PIPE_CONTROL_DC_FLUSH_ENABLE(1<<5) \|
5046	PIPE_CONTROL_FLUSH_ENABLE(1<<7));
5047
5048	return gen12_emit_fini_breadcrumb_tail(request, cs);
5049	}
5050
5051	static void execlists_park(struct intel_engine_cs *engine)
5052	{
5053	cancel_timer(&engine->execlists.timer);
5054	cancel_timer(&engine->execlists.preempt);
5055	}
5056
5057	void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
5058	{
5059	engine->submit_request = execlists_submit_request;
5060	engine->schedule = i915_schedule;
5061	engine->execlists.tasklet.func = execlists_submission_tasklet;
5062
5063	engine->reset.prepare = execlists_reset_prepare;
5064	engine->reset.rewind = execlists_reset_rewind;
5065	engine->reset.cancel = execlists_reset_cancel;
5066	engine->reset.finish = execlists_reset_finish;
5067
5068	engine->park = execlists_park;
5069	engine->unpark = NULL((void *)0);
5070
5071	engine->flags \|= I915_ENGINE_SUPPORTS_STATS(1UL << (1));
5072	if (!intel_vgpu_active(engine->i915)) {
5073	engine->flags \|= I915_ENGINE_HAS_SEMAPHORES(1UL << (3));
5074	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)((&(engine->i915)->__info)->has_logical_ring_preemption )) {
5075	engine->flags \|= I915_ENGINE_HAS_PREEMPTION(1UL << (2));
5076	if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)((1) != 0))
5077	engine->flags \|= I915_ENGINE_HAS_TIMESLICES(1UL << (4));
5078	}
5079	}
5080
5081	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12)
5082	engine->flags \|= I915_ENGINE_HAS_RELATIVE_MMIO(1UL << (7));
5083
5084	if (intel_engine_has_preemption(engine))
5085	engine->emit_bb_start = gen8_emit_bb_start;
5086	else
5087	engine->emit_bb_start = gen8_emit_bb_start_noarb;
5088	}
5089
5090	static void execlists_shutdown(struct intel_engine_cs *engine)
5091	{
5092	/* Synchronise with residual timers and any softirq they raise */
5093	del_timer_sync(&engine->execlists.timer)timeout_del_barrier((&engine->execlists.timer));
5094	del_timer_sync(&engine->execlists.preempt)timeout_del_barrier((&engine->execlists.preempt));
5095	tasklet_kill(&engine->execlists.tasklet);
5096	}
5097
5098	static void execlists_release(struct intel_engine_cs *engine)
5099	{
5100	engine->sanitize = NULL((void )0); / no longer in control, nothing to sanitize */
5101
5102	execlists_shutdown(engine);
5103
5104	intel_engine_cleanup_common(engine);
5105	lrc_destroy_wa_ctx(engine);
5106	}
5107
5108	static void
5109	logical_ring_default_vfuncs(struct intel_engine_cs *engine)
5110	{
5111	/* Default vfuncs which can be overriden by each engine. */
5112
5113	engine->resume = execlists_resume;
5114
5115	engine->cops = &execlists_context_ops;
5116	engine->request_alloc = execlists_request_alloc;
5117
5118	engine->emit_flush = gen8_emit_flush;
5119	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
5120	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
5121	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 12) {
5122	engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
5123	engine->emit_flush = gen12_emit_flush;
5124	}
5125	engine->set_default_submission = intel_execlists_set_default_submission;
5126
5127	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) < 11) {
5128	engine->irq_enable = gen8_logical_ring_enable_irq;
5129	engine->irq_disable = gen8_logical_ring_disable_irq;
5130	} else {
5131	/*
5132	* TODO: On Gen11 interrupt masks need to be clear
5133	* to allow C6 entry. Keep interrupts enabled at
5134	* and take the hit of generating extra interrupts
5135	* until a more refined solution exists.
5136	*/
5137	}
5138	}
5139
5140	static inline void
5141	logical_ring_default_irqs(struct intel_engine_cs *engine)
5142	{
5143	unsigned int shift = 0;
5144
5145	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) < 11) {
5146	const u8 irq_shifts[] = {
5147	[RCS0] = GEN8_RCS_IRQ_SHIFT0,
5148	[BCS0] = GEN8_BCS_IRQ_SHIFT16,
5149	[VCS0] = GEN8_VCS0_IRQ_SHIFT0,
5150	[VCS1] = GEN8_VCS1_IRQ_SHIFT16,
5151	[VECS0] = GEN8_VECS_IRQ_SHIFT0,
5152	};
5153
5154	shift = irq_shifts[engine->id];
5155	}
5156
5157	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT(1 << 0) << shift;
5158	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT(1 << 8) << shift;
5159	engine->irq_keep_mask \|= GT_CS_MASTER_ERROR_INTERRUPT((u32)((1UL << (3)) + 0)) << shift;
5160	engine->irq_keep_mask \|= GT_WAIT_SEMAPHORE_INTERRUPT((u32)((1UL << (11)) + 0)) << shift;
5161	}
5162
5163	static void rcs_submission_override(struct intel_engine_cs *engine)
5164	{
5165	switch (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen)) {
5166	case 12:
5167	engine->emit_flush = gen12_emit_flush_render;
5168	engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
5169	break;
5170	case 11:
5171	engine->emit_flush = gen11_emit_flush_render;
5172	engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
5173	break;
5174	default:
5175	engine->emit_flush = gen8_emit_flush_render;
5176	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
5177	break;
5178	}
5179	}
5180
5181	int intel_execlists_submission_setup(struct intel_engine_cs *engine)
5182	{
5183	struct intel_engine_execlists * const execlists = &engine->execlists;
5184	struct drm_i915_privateinteldrm_softc *i915 = engine->i915;
5185	struct intel_uncore *uncore = engine->uncore;
5186	u32 base = engine->mmio_base;
5187
5188	tasklet_init(&engine->execlists.tasklet,
5189	execlists_submission_tasklet, (unsigned long)engine);
5190	#ifdef __linux__
5191	timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
5192	timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
5193	#else
5194	timeout_set(&engine->execlists.timer, execlists_timeslice,
5195	&engine->execlists.timer);
5196	timeout_set(&engine->execlists.preempt, execlists_preempt,
5197	&engine->execlists.preempt);
5198	#endif
5199
5200	logical_ring_default_vfuncs(engine);
5201	logical_ring_default_irqs(engine);
5202
5203	if (engine->class == RENDER_CLASS0)
5204	rcs_submission_override(engine);
5205
5206	if (intel_init_workaround_bb(engine))
5207	/*
5208	* We continue even if we fail to initialize WA batch
5209	* because we only expect rare glitches but nothing
5210	* critical to prevent us from using GPU
5211	*/
5212	drm_err(&i915->drm, "WA batch buffer initialization failed\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "WA batch buffer initialization failed\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
5213
5214	if (HAS_LOGICAL_RING_ELSQ(i915)((&(i915)->__info)->has_logical_ring_elsq)) {
5215	execlists->submit_reg = uncore->regs +
5216	i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)((const i915_reg_t){ .reg = ((base) + 0x510) }));
5217	execlists->ctrl_reg = uncore->regs +
5218	i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)((const i915_reg_t){ .reg = ((base) + 0x550) }));
5219	} else {
5220	execlists->submit_reg = uncore->regs +
5221	i915_mmio_reg_offset(RING_ELSP(base)((const i915_reg_t){ .reg = ((base) + 0x230) }));
5222	}
5223
5224	execlists->csb_status =
5225	(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX0x10];
5226
5227	execlists->csb_write =
5228	&engine->status_page.addr[intel_hws_csb_write_index(i915)];
5229
5230	if (INTEL_GEN(i915)((&(i915)->__info)->gen) < 11)
5231	execlists->csb_size = GEN8_CSB_ENTRIES6;
5232	else
5233	execlists->csb_size = GEN11_CSB_ENTRIES12;
5234
5235	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) >= 11) {
5236	execlists->ccid \|= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT48 - 32);
5237	execlists->ccid \|= engine->class << (GEN11_ENGINE_CLASS_SHIFT61 - 32);
5238	}
5239
5240	/* Finally, take ownership and responsibility for cleanup! */
5241	engine->sanitize = execlists_sanitize;
5242	engine->release = execlists_release;
5243
5244	return 0;
5245	}
5246
5247	static void init_common_reg_state(u32 * const regs,
5248	const struct intel_engine_cs *engine,
5249	const struct intel_ring *ring,
5250	bool_Bool inhibit)
5251	{
5252	u32 ctl;
5253
5254	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH)({ typeof((1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p (_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a) && __builtin_constant_p (_a)) do { } while (0); ((_a) << 16 \| (_a)); }); });
5255	ctl \|= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)(({ if (__builtin_constant_p(((1 << 0)))) do { } while ( 0); if (__builtin_constant_p(0)) do { } while (0); if (__builtin_constant_p (((1 << 0))) && __builtin_constant_p(0)) do { } while (0); ((((1 << 0))) << 16 \| (0)); }));
5256	if (inhibit)
5257	ctl \|= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT(1 << 0);
5258	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) < 11)
5259	ctl \|= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT \|(({ if (__builtin_constant_p(((1 << 2) \| (1 << 1) ))) do { } while (0); if (__builtin_constant_p(0)) do { } while (0); if (__builtin_constant_p(((1 << 2) \| (1 << 1 ))) && __builtin_constant_p(0)) do { } while (0); ((( (1 << 2) \| (1 << 1))) << 16 \| (0)); }))
5260	CTX_CTRL_RS_CTX_ENABLE)(({ if (__builtin_constant_p(((1 << 2) \| (1 << 1) ))) do { } while (0); if (__builtin_constant_p(0)) do { } while (0); if (__builtin_constant_p(((1 << 2) \| (1 << 1 ))) && __builtin_constant_p(0)) do { } while (0); ((( (1 << 2) \| (1 << 1))) << 16 \| (0)); }));
5261	regs[CTX_CONTEXT_CONTROL(0x02 + 1)] = ctl;
5262
5263	regs[CTX_RING_CTL(0x0a + 1)] = RING_CTL_SIZE(ring->size)((ring->size) - (1 << 12)) \| RING_VALID0x00000001;
5264	regs[CTX_TIMESTAMP(0x22 + 1)] = 0;
5265	}
5266
5267	static void init_wa_bb_reg_state(u32 * const regs,
5268	const struct intel_engine_cs *engine)
5269	{
5270	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
5271
5272	if (wa_ctx->per_ctx.size) {
5273	const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
5274
5275	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1)((void)0);
5276	regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
5277	(ggtt_offset + wa_ctx->per_ctx.offset) \| 0x01;
5278	}
5279
5280	if (wa_ctx->indirect_ctx.size) {
5281	lrc_ring_setup_indirect_ctx(regs, engine,
5282	i915_ggtt_offset(wa_ctx->vma) +
5283	wa_ctx->indirect_ctx.offset,
5284	wa_ctx->indirect_ctx.size);
5285	}
5286	}
5287
5288	static void init_ppgtt_reg_state(u32 regs, const struct i915_ppgtt ppgtt)
5289	{
5290	if (i915_vm_is_4lvl(&ppgtt->vm)) {
5291	/* 64b PPGTT (48bit canonical)
5292	* PDP0_DESCRIPTOR contains the base address to PML4 and
5293	* other PDP Descriptors are ignored.
5294	*/
5295	ASSIGN_CTX_PML4(ppgtt, regs)do { u32 reg_state__ = (regs); const u64 addr__ = (__px_dma( __builtin_choose_expr( __builtin_types_compatible_p(typeof(ppgtt ->pd), struct drm_i915_gem_object ) \|\| __builtin_types_compatible_p (typeof(ppgtt->pd), const struct drm_i915_gem_object ), ( { struct drm_i915_gem_object __x = (struct drm_i915_gem_object )(ppgtt->pd); __x; }), __builtin_choose_expr( __builtin_types_compatible_p (typeof(ppgtt->pd), struct i915_page_table ) \|\| __builtin_types_compatible_p (typeof(ppgtt->pd), const struct i915_page_table ), ({ struct i915_page_table __x = (struct i915_page_table )(ppgtt-> pd); __x->base; }), __builtin_choose_expr( __builtin_types_compatible_p (typeof(ppgtt->pd), struct i915_page_directory ) \|\| __builtin_types_compatible_p (typeof(ppgtt->pd), const struct i915_page_directory ), ( { struct i915_page_directory __x = (struct i915_page_directory *)(ppgtt->pd); __x->pt.base; }), (void)0))))); (reg_state__ )[(0x30 + 1)] = ((u32)(((addr__) >> 16) >> 16)); ( reg_state__)[(0x32 + 1)] = ((u32)(addr__)); } while (0);
5296	} else {
5297	ASSIGN_CTX_PDP(ppgtt, regs, 3)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr ((ppgtt), (3)); (reg_state__)[(0x24 + 1)] = ((u32)(((addr__) >> 16) >> 16)); (reg_state__)[(0x26 + 1)] = ((u32)(addr__ )); } while (0);
5298	ASSIGN_CTX_PDP(ppgtt, regs, 2)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr ((ppgtt), (2)); (reg_state__)[(0x28 + 1)] = ((u32)(((addr__) >> 16) >> 16)); (reg_state__)[(0x2a + 1)] = ((u32)(addr__ )); } while (0);
5299	ASSIGN_CTX_PDP(ppgtt, regs, 1)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr ((ppgtt), (1)); (reg_state__)[(0x2c + 1)] = ((u32)(((addr__) >> 16) >> 16)); (reg_state__)[(0x2e + 1)] = ((u32)(addr__ )); } while (0);
5300	ASSIGN_CTX_PDP(ppgtt, regs, 0)do { u32 *reg_state__ = (regs); const u64 addr__ = i915_page_dir_dma_addr ((ppgtt), (0)); (reg_state__)[(0x30 + 1)] = ((u32)(((addr__) >> 16) >> 16)); (reg_state__)[(0x32 + 1)] = ((u32)(addr__ )); } while (0);
5301	}
5302	}
5303
5304	static struct i915_ppgtt vm_alias(struct i915_address_space vm)
5305	{
5306	if (i915_is_ggtt(vm)((vm)->is_ggtt))
5307	return i915_vm_to_ggtt(vm)->alias;
5308	else
5309	return i915_vm_to_ppgtt(vm);
5310	}
5311
5312	static void execlists_init_reg_state(u32 *regs,
5313	const struct intel_context *ce,
5314	const struct intel_engine_cs *engine,
5315	const struct intel_ring *ring,
5316	bool_Bool inhibit)
5317	{
5318	/*
5319	* A context is actually a big batch buffer with several
5320	* MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
5321	* values we are setting here are only for the first context restore:
5322	* on a subsequent save, the GPU will recreate this batchbuffer with new
5323	* values (including all the missing MI_LOAD_REGISTER_IMM commands that
5324	* we are not initializing here).
5325	*
5326	* Must keep consistent with virtual_update_register_offsets().
5327	*/
5328	set_offsets(regs, reg_offsets(engine), engine, inhibit);
5329
5330	init_common_reg_state(regs, engine, ring, inhibit);
5331	init_ppgtt_reg_state(regs, vm_alias(ce->vm));
5332
5333	init_wa_bb_reg_state(regs, engine);
5334
5335	__reset_stop_ring(regs, engine);
5336	}
5337
5338	static int
5339	populate_lr_context(struct intel_context *ce,
5340	struct drm_i915_gem_object *ctx_obj,
5341	struct intel_engine_cs *engine,
5342	struct intel_ring *ring)
5343	{
5344	bool_Bool inhibit = true1;
5345	void *vaddr;
5346
5347	vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
5348	if (IS_ERR(vaddr)) {
5349	drm_dbg(&engine->i915->drm, "Could not map object pages!\n")drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "Could not map object pages!\n");
5350	return PTR_ERR(vaddr);
5351	}
5352
5353	set_redzone(vaddr, engine);
5354
5355	if (engine->default_state) {
5356	#ifdef __linux__
5357	shmem_read(engine->default_state, 0,
5358	vaddr, engine->context_size);
5359	#else
5360	uao_read(engine->default_state, 0,
5361	vaddr, engine->context_size);
5362	#endif
5363	__set_bit(CONTEXT_VALID_BIT2, &ce->flags);
5364	inhibit = false0;
5365	}
5366
5367	/* Clear the ppHWSP (inc. per-context counters) */
5368	memset(vaddr, 0, PAGE_SIZE)__builtin_memset((vaddr), (0), ((1 << 12)));
5369
5370	/*
5371	* The second page of the context object contains some registers which
5372	* must be set up prior to the first execution.
5373	*/
5374	execlists_init_reg_state(vaddr + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12)),
5375	ce, engine, ring, inhibit);
5376
5377	__i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
5378	i915_gem_object_unpin_map(ctx_obj);
5379	return 0;
5380	}
5381
5382	static struct intel_timeline pinned_timeline(struct intel_context ce)
5383	{
5384	struct intel_timeline tl = fetch_and_zero(&ce->timeline)({ typeof(&ce->timeline) __T = (&ce->timeline ); (&ce->timeline) = (typeof(*&ce->timeline))0 ; __T; });
5385
5386	return intel_timeline_create_from_engine(ce->engine,
5387	page_unmask_bits(tl)((unsigned long)(tl) & ((1UL << (12)) - 1)));
5388	}
5389
5390	static int __execlists_context_alloc(struct intel_context *ce,
5391	struct intel_engine_cs *engine)
5392	{
5393	struct drm_i915_gem_object *ctx_obj;
5394	struct intel_ring *ring;
5395	struct i915_vma *vma;
5396	u32 context_size;
5397	int ret;
5398
5399	GEM_BUG_ON(ce->state)((void)0);
5400	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE)((((engine->context_size) + (((1ULL << (12))) - 1)) / ((1ULL << (12)))) * ((1ULL << (12))));
5401
5402	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
5403	context_size += I915_GTT_PAGE_SIZE(1ULL << (12)); /* for redzone */
5404
5405	if (INTEL_GEN(engine->i915)((&(engine->i915)->__info)->gen) == 12) {
5406	ce->wa_bb_page = context_size / PAGE_SIZE(1 << 12);
5407	context_size += PAGE_SIZE(1 << 12);
5408	}
5409
5410	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
5411	if (IS_ERR(ctx_obj))
5412	return PTR_ERR(ctx_obj);
5413
5414	vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL((void *)0));
5415	if (IS_ERR(vma)) {
5416	ret = PTR_ERR(vma);
5417	goto error_deref_obj;
5418	}
5419
5420	if (!page_mask_bits(ce->timeline)({ unsigned long __v = (unsigned long)(ce->timeline); (typeof (ce->timeline))(__v & -(1UL << (12))); })) {
5421	struct intel_timeline *tl;
5422
5423	/*
5424	* Use the static global HWSP for the kernel context, and
5425	* a dynamically allocated cacheline for everyone else.
5426	*/
5427	if (unlikely(ce->timeline)__builtin_expect(!!(ce->timeline), 0))
5428	tl = pinned_timeline(ce);
5429	else
5430	tl = intel_timeline_create(engine->gt);
5431	if (IS_ERR(tl)) {
5432	ret = PTR_ERR(tl);
5433	goto error_deref_obj;
5434	}
5435
5436	ce->timeline = tl;
5437	}
5438
5439	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
5440	if (IS_ERR(ring)) {
5441	ret = PTR_ERR(ring);
5442	goto error_deref_obj;
5443	}
5444
5445	ret = populate_lr_context(ce, ctx_obj, engine, ring);
5446	if (ret) {
5447	drm_dbg(&engine->i915->drm,drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "Failed to populate LRC: %d\n", ret)
5448	"Failed to populate LRC: %d\n", ret)drm_dev_dbg((&engine->i915->drm)->dev, DRM_UT_DRIVER , "Failed to populate LRC: %d\n", ret);
5449	goto error_ring_free;
5450	}
5451
5452	ce->ring = ring;
5453	ce->state = vma;
5454
5455	return 0;
5456
5457	error_ring_free:
5458	intel_ring_put(ring);
5459	error_deref_obj:
5460	i915_gem_object_put(ctx_obj);
5461	return ret;
5462	}
5463
5464	static struct list_head virtual_queue(struct virtual_engine ve)
5465	{
5466	return &ve->base.execlists.default_priolist.requests[0];
5467	}
5468
5469	static void rcu_virtual_context_destroy(struct work_struct *wrk)
5470	{
5471	struct virtual_engine *ve =
5472	container_of(wrk, typeof(ve), rcu.work)({ const __typeof( ((typeof(ve) )0)->rcu.work ) __mptr = (wrk); (typeof(ve) )( (char )__mptr - __builtin_offsetof( typeof(ve), rcu.work) );});
5473	unsigned int n;
5474
5475	GEM_BUG_ON(ve->context.inflight)((void)0);
5476
5477	/* Preempt-to-busy may leave a stale request behind. */
5478	if (unlikely(ve->request)__builtin_expect(!!(ve->request), 0)) {
5479	struct i915_request *old;
5480
5481	spin_lock_irq(&ve->base.active.lock)mtx_enter(&ve->base.active.lock);
5482
5483	old = fetch_and_zero(&ve->request)({ typeof(&ve->request) __T = (&ve->request); (&ve->request) = (typeof(&ve->request))0; __T ; });
5484	if (old) {
5485	GEM_BUG_ON(!i915_request_completed(old))((void)0);
5486	__i915_request_submit(old);
5487	i915_request_put(old);
5488	}
5489
5490	spin_unlock_irq(&ve->base.active.lock)mtx_leave(&ve->base.active.lock);
5491	}
5492
5493	/*
5494	* Flush the tasklet in case it is still running on another core.
5495	*
5496	* This needs to be done before we remove ourselves from the siblings'
5497	* rbtrees as in the case it is running in parallel, it may reinsert
5498	* the rb_node into a sibling.
5499	*/
5500	tasklet_kill(&ve->base.execlists.tasklet);
5501
5502	/* Decouple ourselves from the siblings, no more access allowed. */
5503	for (n = 0; n < ve->num_siblings; n++) {
5504	struct intel_engine_cs *sibling = ve->siblings[n];
5505	struct rb_node *node = &ve->nodes[sibling->id].rb;
5506
5507	if (RB_EMPTY_NODE(node)((node)->__entry.rbe_parent == node))
5508	continue;
5509
5510	spin_lock_irq(&sibling->active.lock)mtx_enter(&sibling->active.lock);
5511
5512	/* Detachment is lazily performed in the execlists tasklet */
5513	if (!RB_EMPTY_NODE(node)((node)->__entry.rbe_parent == node))
5514	rb_erase_cached(node, &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (node));
5515
5516	spin_unlock_irq(&sibling->active.lock)mtx_leave(&sibling->active.lock);
5517	}
5518	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet))((void)0);
5519	GEM_BUG_ON(!list_empty(virtual_queue(ve)))((void)0);
5520
5521	if (ve->context.state)
5522	__execlists_context_fini(&ve->context);
5523	intel_context_fini(&ve->context);
5524
5525	intel_breadcrumbs_free(ve->base.breadcrumbs);
5526	intel_engine_free_request_pool(&ve->base);
5527
5528	kfree(ve->bonds);
5529	kfree(ve);
5530	}
5531
5532	static void virtual_context_destroy(struct kref *kref)
5533	{
5534	struct virtual_engine *ve =
5535	container_of(kref, typeof(ve), context.ref)({ const __typeof( ((typeof(ve) )0)->context.ref ) __mptr = (kref); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), context.ref) );});
5536
5537	GEM_BUG_ON(!list_empty(&ve->context.signals))((void)0);
5538
5539	/*
5540	* When destroying the virtual engine, we have to be aware that
5541	* it may still be in use from an hardirq/softirq context causing
5542	* the resubmission of a completed request (background completion
5543	* due to preempt-to-busy). Before we can free the engine, we need
5544	* to flush the submission code and tasklets that are still potentially
5545	* accessing the engine. Flushing the tasklets requires process context,
5546	* and since we can guard the resubmit onto the engine with an RCU read
5547	* lock, we can delegate the free of the engine to an RCU worker.
5548	*/
5549	INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy);
5550	queue_rcu_work(system_wq, &ve->rcu);
5551	}
5552
5553	static void virtual_engine_initial_hint(struct virtual_engine *ve)
5554	{
5555	int swp;
5556
5557	/*
5558	* Pick a random sibling on starting to help spread the load around.
5559	*
5560	* New contexts are typically created with exactly the same order
5561	* of siblings, and often started in batches. Due to the way we iterate
5562	* the array of sibling when submitting requests, sibling[0] is
5563	* prioritised for dequeuing. If we make sure that sibling[0] is fairly
5564	* randomised across the system, we also help spread the load by the
5565	* first engine we inspect being different each time.
5566	*
5567	* NB This does not force us to execute on this engine, it will just
5568	* typically be the first we inspect for submission.
5569	*/
5570	swp = prandom_u32_max(ve->num_siblings);
5571	if (swp)
5572	swap(ve->siblings[swp], ve->siblings[0])do { __typeof(ve->siblings[swp]) __tmp = (ve->siblings[ swp]); (ve->siblings[swp]) = (ve->siblings[0]); (ve-> siblings[0]) = __tmp; } while(0);
5573	}
5574
5575	static int virtual_context_alloc(struct intel_context *ce)
5576	{
5577	struct virtual_engine ve = container_of(ce, typeof(ve), context)({ const __typeof( ((typeof(ve) )0)->context ) __mptr = (ce); (typeof(ve) )( (char )__mptr - __builtin_offsetof(typeof (*ve), context) );});
5578
5579	return __execlists_context_alloc(ce, ve->siblings[0]);
5580	}
5581
5582	static int virtual_context_pin(struct intel_context ce, void vaddr)
5583	{
5584	struct virtual_engine ve = container_of(ce, typeof(ve), context)({ const __typeof( ((typeof(ve) )0)->context ) __mptr = (ce); (typeof(ve) )( (char )__mptr - __builtin_offsetof(typeof (*ve), context) );});
5585
5586	/* Note: we must use a real engine class for setting up reg state */
5587	return __execlists_context_pin(ce, ve->siblings[0], vaddr);
5588	}
5589
5590	static void virtual_context_enter(struct intel_context *ce)
5591	{
5592	struct virtual_engine ve = container_of(ce, typeof(ve), context)({ const __typeof( ((typeof(ve) )0)->context ) __mptr = (ce); (typeof(ve) )( (char )__mptr - __builtin_offsetof(typeof (*ve), context) );});
5593	unsigned int n;
5594
5595	for (n = 0; n < ve->num_siblings; n++)
5596	intel_engine_pm_get(ve->siblings[n]);
5597
5598	intel_timeline_enter(ce->timeline);
5599	}
5600
5601	static void virtual_context_exit(struct intel_context *ce)
5602	{
5603	struct virtual_engine ve = container_of(ce, typeof(ve), context)({ const __typeof( ((typeof(ve) )0)->context ) __mptr = (ce); (typeof(ve) )( (char )__mptr - __builtin_offsetof(typeof (*ve), context) );});
5604	unsigned int n;
5605
5606	intel_timeline_exit(ce->timeline);
5607
5608	for (n = 0; n < ve->num_siblings; n++)
5609	intel_engine_pm_put(ve->siblings[n]);
5610	}
5611
5612	static const struct intel_context_ops virtual_context_ops = {
5613	.alloc = virtual_context_alloc,
5614
5615	.pre_pin = execlists_context_pre_pin,
5616	.pin = virtual_context_pin,
5617	.unpin = execlists_context_unpin,
5618	.post_unpin = execlists_context_post_unpin,
5619
5620	.enter = virtual_context_enter,
5621	.exit = virtual_context_exit,
5622
5623	.destroy = virtual_context_destroy,
5624	};
5625
5626	static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
5627	{
5628	struct i915_request *rq;
5629	intel_engine_mask_t mask;
5630
5631	rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) )&(ve->request); membar_datadep_consumer(); __tmp; } );
5632	if (!rq)
5633	return 0;
5634
5635	/* The rq is ready for submission; rq->execution_mask is now stable. */
5636	mask = rq->execution_mask;
5637	if (unlikely(!mask)__builtin_expect(!!(!mask), 0)) {
5638	/* Invalid selection, submit to a random engine in error */
5639	i915_request_set_error_once(rq, -ENODEV19);
5640	mask = ve->siblings[0]->mask;
5641	}
5642
5643	ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (&ve->base); do { } while (0); } while (0)
5644	rq->fence.context, rq->fence.seqno,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (&ve->base); do { } while (0); } while (0)
5645	mask, ve->base.execlists.queue_priority_hint)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (&ve->base); do { } while (0); } while (0);
5646
5647	return mask;
5648	}
5649
5650	static void virtual_submission_tasklet(unsigned long data)
5651	{
5652	struct virtual_engine * const ve = (struct virtual_engine *)data;
5653	const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint)({ typeof(ve->base.execlists.queue_priority_hint) __tmp = * (volatile typeof(ve->base.execlists.queue_priority_hint) * )&(ve->base.execlists.queue_priority_hint); membar_datadep_consumer (); __tmp; });
5654	intel_engine_mask_t mask;
5655	unsigned int n;
5656
5657	rcu_read_lock();
5658	mask = virtual_submission_mask(ve);
5659	rcu_read_unlock();
5660	if (unlikely(!mask)__builtin_expect(!!(!mask), 0))
5661	return;
5662
5663	local_irq_disable()intr_disable();
5664	for (n = 0; n < ve->num_siblings; n++) {
5665	struct intel_engine_cs sibling = READ_ONCE(ve->siblings[n])({ typeof(ve->siblings[n]) __tmp = (volatile typeof(ve-> siblings[n]) *)&(ve->siblings[n]); membar_datadep_consumer (); __tmp; });
5666	struct ve_node * const node = &ve->nodes[sibling->id];
5667	struct rb_node *parent, rb;
5668	bool_Bool first;
5669
5670	if (!READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) )&(ve->request); membar_datadep_consumer(); __tmp; } ))
5671	break; /* already handled by a sibling's tasklet */
5672
5673	if (unlikely(!(mask & sibling->mask))__builtin_expect(!!(!(mask & sibling->mask)), 0)) {
5674	if (!RB_EMPTY_NODE(&node->rb)((&node->rb)->__entry.rbe_parent == &node->rb )) {
5675	spin_lock(&sibling->active.lock)mtx_enter(&sibling->active.lock);
5676	rb_erase_cached(&node->rb,linux_root_RB_REMOVE((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (&node->rb))
5677	&sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (&node->rb));
5678	RB_CLEAR_NODE(&node->rb)(((&node->rb))->__entry.rbe_parent = (&node-> rb));
5679	spin_unlock(&sibling->active.lock)mtx_leave(&sibling->active.lock);
5680	}
5681	continue;
5682	}
5683
5684	spin_lock(&sibling->active.lock)mtx_enter(&sibling->active.lock);
5685
5686	if (!RB_EMPTY_NODE(&node->rb)((&node->rb)->__entry.rbe_parent == &node->rb )) {
5687	/*
5688	* Cheat and avoid rebalancing the tree if we can
5689	* reuse this node in situ.
5690	*/
5691	first = rb_first_cached(&sibling->execlists.virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), -1) ==
5692	&node->rb;
5693	if (prio == node->prio \|\| (prio > node->prio && first))
5694	goto submit_engine;
5695
5696	rb_erase_cached(&node->rb, &sibling->execlists.virtual)linux_root_RB_REMOVE((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (&node->rb));
5697	}
5698
5699	rb = NULL((void *)0);
5700	first = true1;
5701	parent = &sibling->execlists.virtual.rb_root.rb_node;
5702	while (*parent) {
5703	struct ve_node *other;
5704
5705	rb = *parent;
5706	other = rb_entry(rb, typeof(other), rb)({ const __typeof( ((typeof(other) )0)->rb ) __mptr = ( rb); (typeof(other) )( (char )__mptr - __builtin_offsetof( typeof(other), rb) );});
5707	if (prio > other->prio) {
5708	parent = &rb->rb_left__entry.rbe_left;
5709	} else {
5710	parent = &rb->rb_right__entry.rbe_right;
5711	first = false0;
5712	}
5713	}
5714
5715	rb_link_node(&node->rb, rb, parent);
5716	rb_insert_color_cached(&node->rb,linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (&node->rb))
5717	&sibling->execlists.virtual,linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (&node->rb))
5718	first)linux_root_RB_INSERT_COLOR((struct linux_root *)(&(&sibling ->execlists.virtual)->rb_root), (&node->rb));
5719
5720	submit_engine:
5721	GEM_BUG_ON(RB_EMPTY_NODE(&node->rb))((void)0);
5722	node->prio = prio;
5723	if (first && prio > sibling->execlists.queue_priority_hint)
5724	tasklet_hi_schedule(&sibling->execlists.tasklet);
5725
5726	spin_unlock(&sibling->active.lock)mtx_leave(&sibling->active.lock);
5727	}
5728	local_irq_enable()intr_enable();
5729	}
5730
5731	static void virtual_submit_request(struct i915_request *rq)
5732	{
5733	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5734	struct i915_request *old;
5735	unsigned long flags;
5736
5737	ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (&ve->base); do { } while (0); } while (0)
5738	rq->fence.context,do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (&ve->base); do { } while (0); } while (0)
5739	rq->fence.seqno)do { const struct intel_engine_cs *e__ __attribute__((__unused__ )) = (&ve->base); do { } while (0); } while (0);
5740
5741	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request)((void)0);
5742
5743	spin_lock_irqsave(&ve->base.active.lock, flags)do { flags = 0; mtx_enter(&ve->base.active.lock); } while (0);
5744
5745	old = ve->request;
5746	if (old) { /* background completion event from preempt-to-busy */
5747	GEM_BUG_ON(!i915_request_completed(old))((void)0);
5748	__i915_request_submit(old);
5749	i915_request_put(old);
5750	}
5751
5752	if (i915_request_completed(rq)) {
5753	__i915_request_submit(rq);
5754
5755	ve->base.execlists.queue_priority_hint = INT_MIN(-0x7fffffff-1);
5756	ve->request = NULL((void *)0);
5757	} else {
5758	ve->base.execlists.queue_priority_hint = rq_prio(rq);
5759	ve->request = i915_request_get(rq);
5760
5761	GEM_BUG_ON(!list_empty(virtual_queue(ve)))((void)0);
5762	list_move_tail(&rq->sched.link, virtual_queue(ve));
5763
5764	tasklet_hi_schedule(&ve->base.execlists.tasklet);
5765	}
5766
5767	spin_unlock_irqrestore(&ve->base.active.lock, flags)do { (void)(flags); mtx_leave(&ve->base.active.lock); } while (0);
5768	}
5769
5770	static struct ve_bond *
5771	virtual_find_bond(struct virtual_engine *ve,
5772	const struct intel_engine_cs *master)
5773	{
5774	int i;
5775
5776	for (i = 0; i < ve->num_bonds; i++) {
5777	if (ve->bonds[i].master == master)
5778	return &ve->bonds[i];
5779	}
5780
5781	return NULL((void *)0);
5782	}
5783
5784	static void
5785	virtual_bond_execute(struct i915_request rq, struct dma_fence signal)
5786	{
5787	struct virtual_engine *ve = to_virtual_engine(rq->engine);
5788	intel_engine_mask_t allowed, exec;
5789	struct ve_bond *bond;
5790
5791	allowed = ~to_request(signal)->engine->mask;
5792
5793	bond = virtual_find_bond(ve, to_request(signal)->engine);
5794	if (bond)
5795	allowed &= bond->sibling_mask;
5796
5797	/* Restrict the bonded request to run on only the available engines */
5798	exec = READ_ONCE(rq->execution_mask)({ typeof(rq->execution_mask) __tmp = (volatile typeof(rq ->execution_mask) )&(rq->execution_mask); membar_datadep_consumer (); __tmp; });
5799	while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)({ __typeof(&rq->execution_mask) __op = (__typeof((& rq->execution_mask)))(&exec); __typeof((&rq->execution_mask )) __o = __op; __typeof((&rq->execution_mask)) __p = __sync_val_compare_and_swap((&rq->execution_mask), (__o ), (exec & allowed)); if (__p != __o) __op = __p; (__p == __o); }))
5800	;
5801
5802	/* Prevent the master from being re-run on the bonded engines */
5803	to_request(signal)->execution_mask &= ~allowed;
5804	}
5805
5806	struct intel_context *
5807	intel_execlists_create_virtual(struct intel_engine_cs **siblings,
5808	unsigned int count)
5809	{
5810	struct virtual_engine *ve;
5811	unsigned int n;
5812	int err;
5813
5814	if (count == 0)
5815	return ERR_PTR(-EINVAL22);
5816
5817	if (count == 1)
5818	return intel_context_create(siblings[0]);
5819
5820	ve = kzalloc(struct_size(ve, siblings, count)(sizeof((ve)) + ((count) (sizeof(*(ve)->siblings)))), GFP_KERNEL(0x0001 \| 0x0004));
5821	if (!ve)
5822	return ERR_PTR(-ENOMEM12);
5823
5824	ve->base.i915 = siblings[0]->i915;
5825	ve->base.gt = siblings[0]->gt;
5826	ve->base.uncore = siblings[0]->uncore;
5827	ve->base.id = -1;
5828
5829	ve->base.class = OTHER_CLASS4;
5830	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5831	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
5832	ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
5833
5834	/*
5835	* The decision on whether to submit a request using semaphores
5836	* depends on the saturated state of the engine. We only compute
5837	* this during HW submission of the request, and we need for this
5838	* state to be globally applied to all requests being submitted
5839	* to this engine. Virtual engines encompass more than one physical
5840	* engine and so we cannot accurately tell in advance if one of those
5841	* engines is already saturated and so cannot afford to use a semaphore
5842	* and be pessimized in priority for doing so -- if we are the only
5843	* context using semaphores after all other clients have stopped, we
5844	* will be starved on the saturated system. Such a global switch for
5845	* semaphores is less than ideal, but alas is the current compromise.
5846	*/
5847	ve->base.saturated = ALL_ENGINES((intel_engine_mask_t)~0ul);
5848
5849	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5850
5851	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL2);
5852	intel_engine_init_execlists(&ve->base);
5853
5854	ve->base.cops = &virtual_context_ops;
5855	ve->base.request_alloc = execlists_request_alloc;
5856
5857	ve->base.schedule = i915_schedule;
5858	ve->base.submit_request = virtual_submit_request;
5859	ve->base.bond_execute = virtual_bond_execute;
5860
5861	INIT_LIST_HEAD(virtual_queue(ve));
5862	ve->base.execlists.queue_priority_hint = INT_MIN(-0x7fffffff-1);
5863	tasklet_init(&ve->base.execlists.tasklet,
5864	virtual_submission_tasklet,
5865	(unsigned long)ve);
5866
5867	intel_context_init(&ve->context, &ve->base);
5868
5869	ve->base.breadcrumbs = intel_breadcrumbs_create(NULL((void *)0));
5870	if (!ve->base.breadcrumbs) {
5871	err = -ENOMEM12;
5872	goto err_put;
5873	}
5874
5875	for (n = 0; n < count; n++) {
5876	struct intel_engine_cs *sibling = siblings[n];
5877
5878	GEM_BUG_ON(!is_power_of_2(sibling->mask))((void)0);
5879	if (sibling->mask & ve->base.mask) {
5880	DRM_DEBUG("duplicate %s entry in load balancer\n",__drm_dbg(DRM_UT_CORE, "duplicate %s entry in load balancer\n" , sibling->name)
5881	sibling->name)__drm_dbg(DRM_UT_CORE, "duplicate %s entry in load balancer\n" , sibling->name);
5882	err = -EINVAL22;
5883	goto err_put;
5884	}
5885
5886	/*
5887	* The virtual engine implementation is tightly coupled to
5888	* the execlists backend -- we push out request directly
5889	* into a tree inside each physical engine. We could support
5890	* layering if we handle cloning of the requests and
5891	* submitting a copy into each backend.
5892	*/
5893	if (sibling->execlists.tasklet.func !=
5894	execlists_submission_tasklet) {
5895	err = -ENODEV19;
5896	goto err_put;
5897	}
5898
5899	GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb))((void)0);
5900	RB_CLEAR_NODE(&ve->nodes[sibling->id].rb)(((&ve->nodes[sibling->id].rb))->__entry.rbe_parent = (&ve->nodes[sibling->id].rb));
5901
5902	ve->siblings[ve->num_siblings++] = sibling;
5903	ve->base.mask \|= sibling->mask;
5904
5905	/*
5906	* All physical engines must be compatible for their emission
5907	* functions (as we build the instructions during request
5908	* construction and do not alter them before submission
5909	* on the physical engine). We use the engine class as a guide
5910	* here, although that could be refined.
5911	*/
5912	if (ve->base.class != OTHER_CLASS4) {
5913	if (ve->base.class != sibling->class) {
5914	DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",__drm_dbg(DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n" , sibling->class, ve->base.class)
5915	sibling->class, ve->base.class)__drm_dbg(DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n" , sibling->class, ve->base.class);
5916	err = -EINVAL22;
5917	goto err_put;
5918	}
5919	continue;
5920	}
5921
5922	ve->base.class = sibling->class;
5923	ve->base.uabi_class = sibling->uabi_class;
5924	snprintf(ve->base.name, sizeof(ve->base.name),
5925	"v%dx%d", ve->base.class, count);
5926	ve->base.context_size = sibling->context_size;
5927
5928	ve->base.emit_bb_start = sibling->emit_bb_start;
5929	ve->base.emit_flush = sibling->emit_flush;
5930	ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
5931	ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
5932	ve->base.emit_fini_breadcrumb_dw =
5933	sibling->emit_fini_breadcrumb_dw;
5934
5935	ve->base.flags = sibling->flags;
5936	}
5937
5938	ve->base.flags \|= I915_ENGINE_IS_VIRTUAL(1UL << (6));
5939
5940	virtual_engine_initial_hint(ve);
5941	return &ve->context;
5942
5943	err_put:
5944	intel_context_put(&ve->context);
5945	return ERR_PTR(err);
5946	}
5947
5948	struct intel_context *
5949	intel_execlists_clone_virtual(struct intel_engine_cs *src)
5950	{
5951	struct virtual_engine *se = to_virtual_engine(src);
5952	struct intel_context *dst;
5953
5954	dst = intel_execlists_create_virtual(se->siblings,
5955	se->num_siblings);
5956	if (IS_ERR(dst))
5957	return dst;
5958
5959	if (se->num_bonds) {
5960	struct virtual_engine *de = to_virtual_engine(dst->engine);
5961
5962	de->bonds = kmemdup(se->bonds,
5963	sizeof(se->bonds) se->num_bonds,
5964	GFP_KERNEL(0x0001 \| 0x0004));
5965	if (!de->bonds) {
5966	intel_context_put(dst);
5967	return ERR_PTR(-ENOMEM12);
5968	}
5969
5970	de->num_bonds = se->num_bonds;
5971	}
5972
5973	return dst;
5974	}
5975
5976	int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
5977	const struct intel_engine_cs *master,
5978	const struct intel_engine_cs *sibling)
5979	{
5980	struct virtual_engine *ve = to_virtual_engine(engine);
5981	struct ve_bond *bond;
5982	int n;
5983
5984	/* Sanity check the sibling is part of the virtual engine */
5985	for (n = 0; n < ve->num_siblings; n++)
5986	if (sibling == ve->siblings[n])
5987	break;
5988	if (n == ve->num_siblings)
5989	return -EINVAL22;
5990
5991	bond = virtual_find_bond(ve, master);
5992	if (bond) {
5993	bond->sibling_mask \|= sibling->mask;
5994	return 0;
5995	}
5996
5997	#ifdef __linux__
5998	bond = krealloc(ve->bonds,
5999	sizeof(bond) (ve->num_bonds + 1),
6000	GFP_KERNEL(0x0001 \| 0x0004));
6001	if (!bond)
6002	return -ENOMEM12;
6003	#else
6004	bond = kmalloc(sizeof(bond) (ve->num_bonds + 1),
6005	GFP_KERNEL(0x0001 \| 0x0004));
6006	if (!bond)
6007	return -ENOMEM12;
6008
6009	memcpy(bond, ve->bonds, sizeof(bond) ve->num_bonds)__builtin_memcpy((bond), (ve->bonds), (sizeof(bond) ve-> num_bonds));
6010	kfree(ve->bonds);
6011	#endif
6012
6013	bond[ve->num_bonds].master = master;
6014	bond[ve->num_bonds].sibling_mask = sibling->mask;
6015
6016	ve->bonds = bond;
6017	ve->num_bonds++;
6018
6019	return 0;
6020	}
6021
6022	struct intel_engine_cs *
6023	intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
6024	unsigned int sibling)
6025	{
6026	struct virtual_engine *ve = to_virtual_engine(engine);
6027
6028	if (sibling >= ve->num_siblings)
6029	return NULL((void *)0);
6030
6031	return ve->siblings[sibling];
6032	}
6033
6034	void intel_execlists_show_requests(struct intel_engine_cs *engine,
6035	struct drm_printer *m,
6036	void (show_request)(struct drm_printer m,
6037	struct i915_request *rq,
6038	const char *prefix),
6039	unsigned int max)
6040	{
6041	const struct intel_engine_execlists *execlists = &engine->execlists;
6042	struct i915_request rq, last;
6043	unsigned long flags;
6044	unsigned int count;
6045	struct rb_node *rb;
6046
6047	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
6048
6049	last = NULL((void *)0);
6050	count = 0;
6051	list_for_each_entry(rq, &engine->active.requests, sched.link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->sched.link ) __mptr = ((&engine->active.requests)->next); (__typeof (rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );}); &rq->sched.link != (&engine->active .requests); rq = ({ const __typeof( ((__typeof(rq) )0)-> sched.link ) __mptr = (rq->sched.link.next); (__typeof(rq ) )( (char )__mptr - __builtin_offsetof(__typeof(rq), sched .link) );})) {
6052	if (count++ < max - 1)
6053	show_request(m, rq, "\t\tE ");
6054	else
6055	last = rq;
6056	}
6057	if (last) {
6058	if (count > max) {
6059	drm_printf(m,
6060	"\t\t...skipping %d executing requests...\n",
6061	count - max);
6062	}
6063	show_request(m, last, "\t\tE ");
6064	}
6065
6066	if (execlists->switch_priority_hint != INT_MIN(-0x7fffffff-1))
6067	drm_printf(m, "\t\tSwitch priority hint: %d\n",
6068	READ_ONCE(execlists->switch_priority_hint)({ typeof(execlists->switch_priority_hint) __tmp = (volatile typeof(execlists->switch_priority_hint) )&(execlists ->switch_priority_hint); membar_datadep_consumer(); __tmp; }));
6069	if (execlists->queue_priority_hint != INT_MIN(-0x7fffffff-1))
6070	drm_printf(m, "\t\tQueue priority hint: %d\n",
6071	READ_ONCE(execlists->queue_priority_hint)({ typeof(execlists->queue_priority_hint) __tmp = (volatile typeof(execlists->queue_priority_hint) )&(execlists-> queue_priority_hint); membar_datadep_consumer(); __tmp; }));
6072
6073	last = NULL((void *)0);
6074	count = 0;
6075	for (rb = rb_first_cached(&execlists->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->queue)->rb_root), -1); rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
6076	struct i915_priolist p = rb_entry(rb, typeof(p), node)({ const __typeof( ((typeof(p) )0)->node ) __mptr = (rb ); (typeof(p) )( (char )__mptr - __builtin_offsetof(typeof (*p), node) );});
6077	int i;
6078
6079	priolist_for_each_request(rq, p, i)for (i = 0; i < (sizeof(((p)->requests)) / sizeof(((p)-> requests)[0])); i++) for (rq = ({ const __typeof( ((__typeof( rq) )0)->sched.link ) __mptr = ((&(p)->requests[ i])->next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof (__typeof(rq), sched.link) );}); &rq->sched.link != ( &(p)->requests[i]); rq = ({ const __typeof( ((__typeof (rq) )0)->sched.link ) __mptr = (rq->sched.link.next ); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof (rq), sched.link) );})) {
6080	if (count++ < max - 1)
6081	show_request(m, rq, "\t\tQ ");
6082	else
6083	last = rq;
6084	}
6085	}
6086	if (last) {
6087	if (count > max) {
6088	drm_printf(m,
6089	"\t\t...skipping %d queued requests...\n",
6090	count - max);
6091	}
6092	show_request(m, last, "\t\tQ ");
6093	}
6094
6095	last = NULL((void *)0);
6096	count = 0;
6097	for (rb = rb_first_cached(&execlists->virtual)linux_root_RB_MINMAX((struct linux_root *)(&(&execlists ->virtual)->rb_root), -1); rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
6098	struct virtual_engine *ve =
6099	rb_entry(rb, typeof(ve), nodes[engine->id].rb)({ const __typeof( ((typeof(ve) )0)->nodes[engine->id ].rb ) __mptr = (rb); (typeof(ve) )( (char )__mptr - __builtin_offsetof (typeof(ve), nodes[engine->id].rb) );});
6100	struct i915_request rq = READ_ONCE(ve->request)({ typeof(ve->request) __tmp = (volatile typeof(ve->request ) *)&(ve->request); membar_datadep_consumer(); __tmp; } );
6101
6102	if (rq) {
6103	if (count++ < max - 1)
6104	show_request(m, rq, "\t\tV ");
6105	else
6106	last = rq;
6107	}
6108	}
6109	if (last) {
6110	if (count > max) {
6111	drm_printf(m,
6112	"\t\t...skipping %d virtual requests...\n",
6113	count - max);
6114	}
6115	show_request(m, last, "\t\tV ");
6116	}
6117
6118	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
6119	}
6120
6121	void intel_lr_context_reset(struct intel_engine_cs *engine,
6122	struct intel_context *ce,
6123	u32 head,
6124	bool_Bool scrub)
6125	{
6126	GEM_BUG_ON(!intel_context_is_pinned(ce))((void)0);
6127
6128	/*
6129	* We want a simple context + ring to execute the breadcrumb update.
6130	* We cannot rely on the context being intact across the GPU hang,
6131	* so clear it and rebuild just what we need for the breadcrumb.
6132	* All pending requests for this context will be zapped, and any
6133	* future request will be after userspace has had the opportunity
6134	* to recreate its own state.
6135	*/
6136	if (scrub)
6137	restore_default_state(ce, engine);
6138
6139	/* Rerun the request; its payload has been neutered (if guilty). */
6140	__execlists_update_reg_state(ce, engine, head);
6141	}
6142
6143	bool_Bool
6144	intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
6145	{
6146	return engine->set_default_submission ==
6147	intel_execlists_set_default_submission;
6148	}
6149
6150	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
6151	#include "selftest_lrc.c"
6152	#endif