/usr/src/sys/dev/pci/drm/i915/i915

Bug Summary

File:	dev/pci/drm/i915/i915_request.c
Warning:	line 809, column 2 Value stored to 'rq' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name i915_request.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/i915/i915_request.c

1	/*
2	* Copyright © 2008-2015 Intel Corporation
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*
23	*/
24
25	#include <linux/dma-fence-array.h>
26	#include <linux/dma-fence-chain.h>
27	#include <linux/irq_work.h>
28	#include <linux/prefetch.h>
29	#include <linux/sched.h>
30	#include <linux/sched/clock.h>
31	#include <linux/sched/signal.h>
32
33	#include "gem/i915_gem_context.h"
34	#include "gt/intel_breadcrumbs.h"
35	#include "gt/intel_context.h"
36	#include "gt/intel_ring.h"
37	#include "gt/intel_rps.h"
38
39	#include "i915_active.h"
40	#include "i915_drv.h"
41	#include "i915_globals.h"
42	#include "i915_trace.h"
43	#include "intel_pm.h"
44
45	struct execute_cb {
46	struct irq_work work;
47	struct i915_sw_fence *fence;
48	void (hook)(struct i915_request rq, struct dma_fence *signal);
49	struct i915_request *signal;
50	};
51
52	static struct i915_global_request {
53	struct i915_global base;
54	#ifdef __linux__
55	struct kmem_cache *slab_requests;
56	struct kmem_cache *slab_execute_cbs;
57	#else
58	struct pool slab_requests;
59	struct pool slab_execute_cbs;
60	#endif
61	} global;
62
63	static const char i915_fence_get_driver_name(struct dma_fence fence)
64	{
65	return dev_name(to_request(fence)->engine->i915->drm.dev)"";
66	}
67
68	static const char i915_fence_get_timeline_name(struct dma_fence fence)
69	{
70	const struct i915_gem_context *ctx;
71
72	/*
73	* The timeline struct (as part of the ppgtt underneath a context)
74	* may be freed when the request is no longer in use by the GPU.
75	* We could extend the life of a context to beyond that of all
76	* fences, possibly keeping the hw resource around indefinitely,
77	* or we just give them a false name. Since
78	* dma_fence_ops.get_timeline_name is a debug feature, the occasional
79	* lie seems justifiable.
80	*/
81	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
82	return "signaled";
83
84	ctx = i915_request_gem_context(to_request(fence));
85	if (!ctx)
86	return "[" DRIVER_NAME"i915" "]";
87
88	return ctx->name;
89	}
90
91	static bool_Bool i915_fence_signaled(struct dma_fence *fence)
92	{
93	return i915_request_completed(to_request(fence));
94	}
95
96	static bool_Bool i915_fence_enable_signaling(struct dma_fence *fence)
97	{
98	return i915_request_enable_breadcrumb(to_request(fence));
99	}
100
101	static signed long i915_fence_wait(struct dma_fence *fence,
102	bool_Bool interruptible,
103	signed long timeout)
104	{
105	return i915_request_wait(to_request(fence),
106	interruptible \| I915_WAIT_PRIORITY(1UL << (1)),
107	timeout);
108	}
109
110	#ifdef __linux__
111	struct kmem_cache *i915_request_slab_cache(void)
112	{
113	return global.slab_requests;
114	}
115	#else
116	struct pool *i915_request_slab_cache(void)
117	{
118	return &global.slab_requests;
119	}
120	#endif
121
122	static void i915_fence_release(struct dma_fence *fence)
123	{
124	struct i915_request *rq = to_request(fence);
125
126	/*
127	* The request is put onto a RCU freelist (i.e. the address
128	* is immediately reused), mark the fences as being freed now.
129	* Otherwise the debugobjects for the fences are only marked as
130	* freed when the slab cache itself is freed, and so we would get
131	* caught trying to reuse dead objects.
132	*/
133	i915_sw_fence_fini(&rq->submit);
134	i915_sw_fence_fini(&rq->semaphore);
135
136	/*
137	* Keep one request on each engine for reserved use under mempressure
138	*
139	* We do not hold a reference to the engine here and so have to be
140	* very careful in what rq->engine we poke. The virtual engine is
141	* referenced via the rq->context and we released that ref during
142	* i915_request_retire(), ergo we must not dereference a virtual
143	* engine here. Not that we would want to, as the only consumer of
144	* the reserved engine->request_pool is the power management parking,
145	* which must-not-fail, and that is only run on the physical engines.
146	*
147	* Since the request must have been executed to be have completed,
148	* we know that it will have been processed by the HW and will
149	* not be unsubmitted again, so rq->engine and rq->execution_mask
150	* at this point is stable. rq->execution_mask will be a single
151	* bit if the last and _only_ engine it could execution on was a
152	* physical engine, if it's multiple bits then it started on and
153	* could still be on a virtual engine. Thus if the mask is not a
154	* power-of-two we assume that rq->engine may still be a virtual
155	* engine and so a dangling invalid pointer that we cannot dereference
156	*
157	* For example, consider the flow of a bonded request through a virtual
158	* engine. The request is created with a wide engine mask (all engines
159	* that we might execute on). On processing the bond, the request mask
160	* is reduced to one or more engines. If the request is subsequently
161	* bound to a single engine, it will then be constrained to only
162	* execute on that engine and never returned to the virtual engine
163	* after timeslicing away, see __unwind_incomplete_requests(). Thus we
164	* know that if the rq->execution_mask is a single bit, rq->engine
165	* can be a physical engine with the exact corresponding mask.
166	*/
167	if (is_power_of_2(rq->execution_mask)(((rq->execution_mask) != 0) && (((rq->execution_mask ) - 1) & (rq->execution_mask)) == 0) &&
168	!cmpxchg(&rq->engine->request_pool, NULL, rq)__sync_val_compare_and_swap(&rq->engine->request_pool , ((void *)0), rq))
169	return;
170
171	#ifdef __linux__
172	kmem_cache_free(global.slab_requests, rq);
173	#else
174	pool_put(&global.slab_requests, rq);
175	#endif
176	}
177
178	const struct dma_fence_ops i915_fence_ops = {
179	.get_driver_name = i915_fence_get_driver_name,
180	.get_timeline_name = i915_fence_get_timeline_name,
181	.enable_signaling = i915_fence_enable_signaling,
182	.signaled = i915_fence_signaled,
183	.wait = i915_fence_wait,
184	.release = i915_fence_release,
185	};
186
187	static void irq_execute_cb(struct irq_work *wrk)
188	{
189	struct execute_cb cb = container_of(wrk, typeof(cb), work)({ const __typeof( ((typeof(cb) )0)->work ) __mptr = (wrk ); (typeof(cb) )( (char )__mptr - __builtin_offsetof(typeof (*cb), work) );});
190
191	i915_sw_fence_complete(cb->fence);
192	#ifdef __linux__
193	kmem_cache_free(global.slab_execute_cbs, cb);
194	#else
195	pool_put(&global.slab_execute_cbs, cb);
196	#endif
197	}
198
199	static void irq_execute_cb_hook(struct irq_work *wrk)
200	{
201	struct execute_cb cb = container_of(wrk, typeof(cb), work)({ const __typeof( ((typeof(cb) )0)->work ) __mptr = (wrk ); (typeof(cb) )( (char )__mptr - __builtin_offsetof(typeof (*cb), work) );});
202
203	cb->hook(container_of(cb->fence, struct i915_request, submit)({ const __typeof( ((struct i915_request )0)->submit ) __mptr = (cb->fence); (struct i915_request )( (char )__mptr - __builtin_offsetof (struct i915_request, submit) );}),
204	&cb->signal->fence);
205	i915_request_put(cb->signal);
206
207	irq_execute_cb(wrk);
208	}
209
210	static __always_inline__attribute__((__always_inline__)) void
211	__notify_execute_cb(struct i915_request rq, bool_Bool (fn)(struct irq_work *wrk))
212	{
213	struct execute_cb cb, cn;
214
215	if (llist_empty(&rq->execute_cb))
216	return;
217
218	STUB()do { printf("%s: stub\n", __func__); } while(0);
219	#ifdef notyet
220	llist_for_each_entry_safe(cb, cn,for (cb = (((llist_del_all(&rq->execute_cb))) ? ({ const __typeof( ((__typeof(cb) )0)->work.llnode ) __mptr = ( (llist_del_all(&rq->execute_cb))); (__typeof(cb) )( ( char )__mptr - __builtin_offsetof(__typeof(cb), work.llnode ) );}) : ((void )0)); cb != ((void )0) && (cn = ((cb ->work.llnode.next) ? ({ const __typeof( ((__typeof(cb) * )0)->work.llnode ) __mptr = (cb->work.llnode.next); (__typeof (cb) )( (char )__mptr - __builtin_offsetof(__typeof(cb), work .llnode) );}) : ((void )0)), cb); cb = cn)
221	llist_del_all(&rq->execute_cb),for (cb = (((llist_del_all(&rq->execute_cb))) ? ({ const __typeof( ((__typeof(cb) )0)->work.llnode ) __mptr = ( (llist_del_all(&rq->execute_cb))); (__typeof(cb) )( ( char )__mptr - __builtin_offsetof(__typeof(cb), work.llnode ) );}) : ((void )0)); cb != ((void )0) && (cn = ((cb ->work.llnode.next) ? ({ const __typeof( ((__typeof(cb) * )0)->work.llnode ) __mptr = (cb->work.llnode.next); (__typeof (cb) )( (char )__mptr - __builtin_offsetof(__typeof(cb), work .llnode) );}) : ((void )0)), cb); cb = cn)
222	work.llnode)for (cb = (((llist_del_all(&rq->execute_cb))) ? ({ const __typeof( ((__typeof(cb) )0)->work.llnode ) __mptr = ( (llist_del_all(&rq->execute_cb))); (__typeof(cb) )( ( char )__mptr - __builtin_offsetof(__typeof(cb), work.llnode ) );}) : ((void )0)); cb != ((void )0) && (cn = ((cb ->work.llnode.next) ? ({ const __typeof( ((__typeof(cb) * )0)->work.llnode ) __mptr = (cb->work.llnode.next); (__typeof (cb) )( (char )__mptr - __builtin_offsetof(__typeof(cb), work .llnode) );}) : ((void )0)), cb); cb = cn)
223	fn(&cb->work);
224	#endif
225	}
226
227	static void __notify_execute_cb_irq(struct i915_request *rq)
228	{
229	__notify_execute_cb(rq, irq_work_queue);
230	}
231
232	static bool_Bool irq_work_imm(struct irq_work *wrk)
233	{
234	#ifdef notyet
235	wrk->func(wrk);
236	#else
237	STUB()do { printf("%s: stub\n", __func__); } while(0);
238	#endif
239	return false0;
240	}
241
242	static void __notify_execute_cb_imm(struct i915_request *rq)
243	{
244	__notify_execute_cb(rq, irq_work_imm);
245	}
246
247	static void free_capture_list(struct i915_request *request)
248	{
249	struct i915_capture_list *capture;
250
251	capture = fetch_and_zero(&request->capture_list)({ typeof(&request->capture_list) __T = (&request ->capture_list); (&request->capture_list) = (typeof (&request->capture_list))0; __T; });
252	while (capture) {
253	struct i915_capture_list *next = capture->next;
254
255	kfree(capture);
256	capture = next;
257	}
258	}
259
260	static void __i915_request_fill(struct i915_request *rq, u8 val)
261	{
262	void *vaddr = rq->ring->vaddr;
263	u32 head;
264
265	head = rq->infix;
266	if (rq->postfix < head) {
267	memset(vaddr + head, val, rq->ring->size - head)__builtin_memset((vaddr + head), (val), (rq->ring->size - head));
268	head = 0;
269	}
270	memset(vaddr + head, val, rq->postfix - head)__builtin_memset((vaddr + head), (val), (rq->postfix - head ));
271	}
272
273	static void remove_from_engine(struct i915_request *rq)
274	{
275	struct intel_engine_cs engine, locked;
276
277	/*
278	* Virtual engines complicate acquiring the engine timeline lock,
279	* as their rq->engine pointer is not stable until under that
280	* engine lock. The simple ploy we use is to take the lock then
281	* check that the rq still belongs to the newly locked engine.
282	*/
283	locked = READ_ONCE(rq->engine)({ typeof(rq->engine) __tmp = (volatile typeof(rq->engine ) )&(rq->engine); membar_datadep_consumer(); __tmp; } );
284	spin_lock_irq(&locked->active.lock)mtx_enter(&locked->active.lock);
285	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))__builtin_expect(!!(locked != (engine = ({ typeof(rq->engine ) __tmp = (volatile typeof(rq->engine) )&(rq->engine ); membar_datadep_consumer(); __tmp; }))), 0)) {
286	spin_unlock(&locked->active.lock)mtx_leave(&locked->active.lock);
287	spin_lock(&engine->active.lock)mtx_enter(&engine->active.lock);
288	locked = engine;
289	}
290	list_del_init(&rq->sched.link);
291
292	clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
293	clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
294
295	/* Prevent further __await_execution() registering a cb, then flush */
296	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
297
298	spin_unlock_irq(&locked->active.lock)mtx_leave(&locked->active.lock);
299
300	__notify_execute_cb_imm(rq);
301	}
302
303	bool_Bool i915_request_retire(struct i915_request *rq)
304	{
305	if (!i915_request_completed(rq))
306	return false0;
307
308	RQ_TRACE(rq, "\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
309
310	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit))((void)0);
311	trace_i915_request_retire(rq);
312	i915_request_mark_complete(rq);
313
314	/*
315	* We know the GPU must have read the request to have
316	* sent us the seqno + interrupt, so use the position
317	* of tail of the request to update the last known position
318	* of the GPU head.
319	*
320	* Note this requires that we are always called in request
321	* completion order.
322	*/
323	GEM_BUG_ON(!list_is_first(&rq->link,((void)0)
324	&i915_request_timeline(rq)->requests))((void)0);
325	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
326	/* Poison before we release our space in the ring */
327	__i915_request_fill(rq, POISON_FREE0xdf);
328	rq->ring->head = rq->postfix;
329
330	if (!i915_request_signaled(rq)) {
331	spin_lock_irq(&rq->lock)mtx_enter(&rq->lock);
332	dma_fence_signal_locked(&rq->fence);
333	spin_unlock_irq(&rq->lock)mtx_leave(&rq->lock);
334	}
335
336	if (i915_request_has_waitboost(rq)) {
337	GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters))((void)0);
338	atomic_dec(&rq->engine->gt->rps.num_waiters)__sync_fetch_and_sub(&rq->engine->gt->rps.num_waiters , 1);
339	}
340
341	/*
342	* We only loosely track inflight requests across preemption,
343	* and so we may find ourselves attempting to retire a _completed_
344	* request that we have removed from the HW and put back on a run
345	* queue.
346	*
347	* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
348	* after removing the breadcrumb and signaling it, so that we do not
349	* inadvertently attach the breadcrumb to a completed request.
350	*/
351	remove_from_engine(rq);
352	GEM_BUG_ON(!llist_empty(&rq->execute_cb))((void)0);
353
354	__list_del_entry(&rq->link)list_del(&rq->link); /* poison neither prev/next (RCU walks) */
355
356	intel_context_exit(rq->context);
357	intel_context_unpin(rq->context);
358
359	free_capture_list(rq);
360	i915_sched_node_fini(&rq->sched);
361	i915_request_put(rq);
362
363	return true1;
364	}
365
366	void i915_request_retire_upto(struct i915_request *rq)
367	{
368	struct intel_timeline * const tl = i915_request_timeline(rq);
369	struct i915_request *tmp;
370
371	RQ_TRACE(rq, "\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
372
373	GEM_BUG_ON(!i915_request_completed(rq))((void)0);
374
375	do {
376	tmp = list_first_entry(&tl->requests, typeof(tmp), link)({ const __typeof( ((typeof(tmp) )0)->link ) __mptr = ( (&tl->requests)->next); (typeof(tmp) )( (char )__mptr - __builtin_offsetof(typeof(tmp), link) );});
377	} while (i915_request_retire(tmp) && tmp != rq);
378	}
379
380	static struct i915_request * const *
381	__engine_active(struct intel_engine_cs *engine)
382	{
383	return READ_ONCE(engine->execlists.active)({ typeof(engine->execlists.active) __tmp = (volatile typeof (engine->execlists.active) )&(engine->execlists.active ); membar_datadep_consumer(); __tmp; });
384	}
385
386	static bool_Bool __request_in_flight(const struct i915_request *signal)
387	{
388	struct i915_request * const port, rq;
389	bool_Bool inflight = false0;
390
391	if (!i915_request_is_ready(signal))
392	return false0;
393
394	/*
395	* Even if we have unwound the request, it may still be on
396	* the GPU (preempt-to-busy). If that request is inside an
397	* unpreemptible critical section, it will not be removed. Some
398	* GPU functions may even be stuck waiting for the paired request
399	* (__await_execution) to be submitted and cannot be preempted
400	* until the bond is executing.
401	*
402	* As we know that there are always preemption points between
403	* requests, we know that only the currently executing request
404	* may be still active even though we have cleared the flag.
405	* However, we can't rely on our tracking of ELSP[0] to know
406	* which request is currently active and so maybe stuck, as
407	* the tracking maybe an event behind. Instead assume that
408	* if the context is still inflight, then it is still active
409	* even if the active flag has been cleared.
410	*
411	* To further complicate matters, if there a pending promotion, the HW
412	* may either perform a context switch to the second inflight execlists,
413	* or it may switch to the pending set of execlists. In the case of the
414	* latter, it may send the ACK and we process the event copying the
415	* pending[] over top of inflight[], _overwriting_ our *active. Since
416	* this implies the HW is arbitrating and not struck in *active, we do
417	* not worry about complete accuracy, but we do require no read/write
418	* tearing of the pointer [the read of the pointer must be valid, even
419	* as the array is being overwritten, for which we require the writes
420	* to avoid tearing.]
421	*
422	* Note that the read of *execlists->active may race with the promotion
423	* of execlists->pending[] to execlists->inflight[], overwritting
424	* the value at *execlists->active. This is fine. The promotion implies
425	* that we received an ACK from the HW, and so the context is not
426	* stuck -- if we do not see ourselves in *active, the inflight status
427	* is valid. If instead we see ourselves being copied into *active,
428	* we are inflight and may signal the callback.
429	*/
430	if (!intel_context_inflight(signal->context)({ unsigned long __v = (unsigned long)(({ typeof((signal-> context)->inflight) __tmp = (volatile typeof((signal-> context)->inflight) )&((signal->context)->inflight ); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof(( signal->context)->inflight) __tmp = (volatile typeof(( signal->context)->inflight) )&((signal->context )->inflight); membar_datadep_consumer(); __tmp; })))(__v & -(1UL << (2))); }))
431	return false0;
432
433	rcu_read_lock();
434	for (port = __engine_active(signal->engine);
435	(rq = READ_ONCE(port)({ typeof(port) __tmp = (volatile typeof(port) )&(port ); membar_datadep_consumer(); __tmp; })); /* may race with promotion of pending[] */
436	port++) {
437	if (rq->context == signal->context) {
438	inflight = i915_seqno_passed(rq->fence.seqno,
439	signal->fence.seqno);
440	break;
441	}
442	}
443	rcu_read_unlock();
444
445	return inflight;
446	}
447
448	static int
449	__await_execution(struct i915_request *rq,
450	struct i915_request *signal,
451	void (hook)(struct i915_request rq,
452	struct dma_fence *signal),
453	gfp_t gfp)
454	{
455	STUB()do { printf("%s: stub\n", __func__); } while(0);
456	return -ENOSYS78;
457	#ifdef notyet
458	struct execute_cb *cb;
459
460	if (i915_request_is_active(signal)) {
461	if (hook)
462	hook(rq, &signal->fence);
463	return 0;
464	}
465
466	#ifdef __linux__
467	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
468	#else
469	cb = pool_get(&global.slab_execute_cbs,
470	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
471	#endif
472	if (!cb)
473	return -ENOMEM12;
474
475	cb->fence = &rq->submit;
476	i915_sw_fence_await(cb->fence);
477	init_irq_work(&cb->work, irq_execute_cb);
478
479	if (hook) {
480	cb->hook = hook;
481	cb->signal = i915_request_get(signal);
482	#ifdef __linux__
483	cb->work.func = irq_execute_cb_hook;
484	#else
485	init_irq_work(&cb->work, irq_execute_cb_hook);
486	#endif
487	}
488
489	/*
490	* Register the callback first, then see if the signaler is already
491	* active. This ensures that if we race with the
492	* __notify_execute_cb from i915_request_submit() and we are not
493	* included in that list, we get a second bite of the cherry and
494	* execute it ourselves. After this point, a future
495	* i915_request_submit() will notify us.
496	*
497	* In i915_request_retire() we set the ACTIVE bit on a completed
498	* request (then flush the execute_cb). So by registering the
499	* callback first, then checking the ACTIVE bit, we serialise with
500	* the completed/retired request.
501	*/
502	if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
503	if (i915_request_is_active(signal) \|\|
504	__request_in_flight(signal))
505	__notify_execute_cb_imm(signal);
506	}
507
508	return 0;
509	#endif
510	}
511
512	static bool_Bool fatal_error(int error)
513	{
514	switch (error) {
515	case 0: /* not an error! */
516	case -EAGAIN35: /* innocent victim of a GT reset (__i915_request_reset) */
517	case -ETIMEDOUT60: /* waiting for Godot (timer_i915_sw_fence_wake) */
518	return false0;
519	default:
520	return true1;
521	}
522	}
523
524	void __i915_request_skip(struct i915_request *rq)
525	{
526	GEM_BUG_ON(!fatal_error(rq->fence.error))((void)0);
527
528	if (rq->infix == rq->postfix)
529	return;
530
531	/*
532	* As this request likely depends on state from the lost
533	* context, clear out all the user operations leaving the
534	* breadcrumb at the end (so we get the fence notifications).
535	*/
536	__i915_request_fill(rq, 0);
537	rq->infix = rq->postfix;
538	}
539
540	void i915_request_set_error_once(struct i915_request *rq, int error)
541	{
542	int old;
543
544	GEM_BUG_ON(!IS_ERR_VALUE((long)error))((void)0);
545
546	if (i915_request_signaled(rq))
547	return;
548
549	old = READ_ONCE(rq->fence.error)({ typeof(rq->fence.error) __tmp = (volatile typeof(rq-> fence.error) )&(rq->fence.error); membar_datadep_consumer (); __tmp; });
550	do {
551	if (fatal_error(old))
552	return;
553	} while (!try_cmpxchg(&rq->fence.error, &old, error)({ __typeof(&rq->fence.error) __op = (__typeof((&rq ->fence.error)))(&old); __typeof((&rq->fence.error )) __o = __op; __typeof((&rq->fence.error)) __p = __sync_val_compare_and_swap ((&rq->fence.error), (__o), (error)); if (__p != __o) __op = __p; (__p == __o); }));
554	}
555
556	bool_Bool __i915_request_submit(struct i915_request *request)
557	{
558	struct intel_engine_cs *engine = request->engine;
559	bool_Bool result = false0;
560
561	RQ_TRACE(request, "\n")do { const struct i915_request rq__ = (request); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
562
563	GEM_BUG_ON(!irqs_disabled())((void)0);
564	lockdep_assert_held(&engine->active.lock)do { (void)(&engine->active.lock); } while(0);
565
566	/*
567	* With the advent of preempt-to-busy, we frequently encounter
568	* requests that we have unsubmitted from HW, but left running
569	* until the next ack and so have completed in the meantime. On
570	* resubmission of that completed request, we can skip
571	* updating the payload, and execlists can even skip submitting
572	* the request.
573	*
574	* We must remove the request from the caller's priority queue,
575	* and the caller must only call us when the request is in their
576	* priority queue, under the active.lock. This ensures that the
577	* request has not yet been retired and we can safely move
578	* the request into the engine->active.list where it will be
579	* dropped upon retiring. (Otherwise if resubmit a retired
580	* request, this would be a horrible use-after-free.)
581	*/
582	if (i915_request_completed(request))
583	goto xfer;
584
585	if (unlikely(intel_context_is_closed(request->context) &&__builtin_expect(!!(intel_context_is_closed(request->context ) && !intel_engine_has_heartbeat(engine)), 0)
586	!intel_engine_has_heartbeat(engine))__builtin_expect(!!(intel_context_is_closed(request->context ) && !intel_engine_has_heartbeat(engine)), 0))
587	intel_context_set_banned(request->context);
588
589	if (unlikely(intel_context_is_banned(request->context))__builtin_expect(!!(intel_context_is_banned(request->context )), 0))
590	i915_request_set_error_once(request, -EIO5);
591
592	if (unlikely(fatal_error(request->fence.error))__builtin_expect(!!(fatal_error(request->fence.error)), 0))
593	__i915_request_skip(request);
594
595	/*
596	* Are we using semaphores when the gpu is already saturated?
597	*
598	* Using semaphores incurs a cost in having the GPU poll a
599	* memory location, busywaiting for it to change. The continual
600	* memory reads can have a noticeable impact on the rest of the
601	* system with the extra bus traffic, stalling the cpu as it too
602	* tries to access memory across the bus (perf stat -e bus-cycles).
603	*
604	* If we installed a semaphore on this request and we only submit
605	* the request after the signaler completed, that indicates the
606	* system is overloaded and using semaphores at this time only
607	* increases the amount of work we are doing. If so, we disable
608	* further use of semaphores until we are idle again, whence we
609	* optimistically try again.
610	*/
611	if (request->sched.semaphores &&
612	i915_sw_fence_signaled(&request->semaphore))
613	engine->saturated \|= request->sched.semaphores;
614
615	engine->emit_fini_breadcrumb(request,
616	request->ring->vaddr + request->postfix);
617
618	trace_i915_request_execute(request);
619	engine->serial++;
620	result = true1;
621
622	xfer:
623	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags)) {
624	list_move_tail(&request->sched.link, &engine->active.requests);
625	clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
626	}
627
628	/*
629	* XXX Rollback bonded-execution on __i915_request_unsubmit()?
630	*
631	* In the future, perhaps when we have an active time-slicing scheduler,
632	* it will be interesting to unsubmit parallel execution and remove
633	* busywaits from the GPU until their master is restarted. This is
634	* quite hairy, we have to carefully rollback the fence and do a
635	* preempt-to-idle cycle on the target engine, all the while the
636	* master execute_cb may refire.
637	*/
638	__notify_execute_cb_irq(request);
639
640	/* We may be recursing from the signal callback of another i915 fence */
641	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
642	i915_request_enable_breadcrumb(request);
643
644	return result;
645	}
646
647	void i915_request_submit(struct i915_request *request)
648	{
649	struct intel_engine_cs *engine = request->engine;
650	unsigned long flags;
651
652	/* Will be called from irq-context when using foreign fences. */
653	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
654
655	__i915_request_submit(request);
656
657	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
658	}
659
660	void __i915_request_unsubmit(struct i915_request *request)
661	{
662	struct intel_engine_cs *engine = request->engine;
663
664	/*
665	* Only unwind in reverse order, required so that the per-context list
666	* is kept in seqno/ring order.
667	*/
668	RQ_TRACE(request, "\n")do { const struct i915_request rq__ = (request); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
669
670	GEM_BUG_ON(!irqs_disabled())((void)0);
671	lockdep_assert_held(&engine->active.lock)do { (void)(&engine->active.lock); } while(0);
672
673	/*
674	* Before we remove this breadcrumb from the signal list, we have
675	* to ensure that a concurrent dma_fence_enable_signaling() does not
676	* attach itself. We first mark the request as no longer active and
677	* make sure that is visible to other cores, and then remove the
678	* breadcrumb if attached.
679	*/
680	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))((void)0);
681	clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
682	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
683	i915_request_cancel_breadcrumb(request);
684
685	/* We've already spun, don't charge on resubmitting. */
686	if (request->sched.semaphores && i915_request_started(request))
687	request->sched.semaphores = 0;
688
689	/*
690	* We don't need to wake_up any waiters on request->execute, they
691	* will get woken by any other event or us re-adding this request
692	* to the engine timeline (__i915_request_submit()). The waiters
693	* should be quite adapt at finding that the request now has a new
694	* global_seqno to the one they went to sleep on.
695	*/
696	}
697
698	void i915_request_unsubmit(struct i915_request *request)
699	{
700	struct intel_engine_cs *engine = request->engine;
701	unsigned long flags;
702
703	/* Will be called from irq-context when using foreign fences. */
704	spin_lock_irqsave(&engine->active.lock, flags)do { flags = 0; mtx_enter(&engine->active.lock); } while (0);
705
706	__i915_request_unsubmit(request);
707
708	spin_unlock_irqrestore(&engine->active.lock, flags)do { (void)(flags); mtx_leave(&engine->active.lock); } while (0);
709	}
710
711	static int __i915_sw_fence_call__attribute__((__aligned__(4)))
712	submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
713	{
714	struct i915_request *request =
715	container_of(fence, typeof(request), submit)({ const __typeof( ((typeof(request) )0)->submit ) __mptr = (fence); (typeof(request) )( (char )__mptr - __builtin_offsetof (typeof(request), submit) );});
716
717	switch (state) {
718	case FENCE_COMPLETE:
719	trace_i915_request_submit(request);
720
721	if (unlikely(fence->error)__builtin_expect(!!(fence->error), 0))
722	i915_request_set_error_once(request, fence->error);
723
724	/*
725	* We need to serialize use of the submit_request() callback
726	* with its hotplugging performed during an emergency
727	* i915_gem_set_wedged(). We use the RCU mechanism to mark the
728	* critical section in order to force i915_gem_set_wedged() to
729	* wait until the submit_request() is completed before
730	* proceeding.
731	*/
732	rcu_read_lock();
733	request->engine->submit_request(request);
734	rcu_read_unlock();
735	break;
736
737	case FENCE_FREE:
738	i915_request_put(request);
739	break;
740	}
741
742	return NOTIFY_DONE0;
743	}
744
745	static int __i915_sw_fence_call__attribute__((__aligned__(4)))
746	semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
747	{
748	struct i915_request rq = container_of(fence, typeof(rq), semaphore)({ const __typeof( ((typeof(rq) )0)->semaphore ) __mptr = (fence); (typeof(rq) )( (char )__mptr - __builtin_offsetof (typeof(*rq), semaphore) );});
749
750	switch (state) {
751	case FENCE_COMPLETE:
752	break;
753
754	case FENCE_FREE:
755	i915_request_put(rq);
756	break;
757	}
758
759	return NOTIFY_DONE0;
760	}
761
762	static void retire_requests(struct intel_timeline *tl)
763	{
764	struct i915_request rq, rn;
765
766	list_for_each_entry_safe(rq, rn, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->link ) * __mptr = ((&tl->requests)->next); (__typeof(rq) ) ( (char )__mptr - __builtin_offsetof(__typeof(rq), link) ); }), rn = ({ const __typeof( ((__typeof(rq) )0)->link ) * __mptr = (rq->link.next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), link) );}); &rq-> link != (&tl->requests); rq = rn, rn = ({ const __typeof ( ((__typeof(rn) )0)->link ) __mptr = (rn->link.next ); (__typeof(rn) )( (char )__mptr - __builtin_offsetof(__typeof (*rn), link) );}))
767	if (!i915_request_retire(rq))
768	break;
769	}
770
771	static void __i915_request_ctor(void *);
772
773	static noinline__attribute__((__noinline__)) struct i915_request *
774	request_alloc_slow(struct intel_timeline *tl,
775	struct i915_request **rsvd,
776	gfp_t gfp)
777	{
778	struct i915_request *rq;
779
780	/* If we cannot wait, dip into our reserves */
781	if (!gfpflags_allow_blocking(gfp)) {
782	rq = xchg(rsvd, NULL)__sync_lock_test_and_set(rsvd, ((void *)0));
783	if (!rq) /* Use the normal failure path for one final WARN */
784	goto out;
785
786	return rq;
787	}
788
789	if (list_empty(&tl->requests))
790	goto out;
791
792	/* Move our oldest request to the slab-cache (if not in use!) */
793	rq = list_first_entry(&tl->requests, typeof(rq), link)({ const __typeof( ((typeof(rq) )0)->link ) __mptr = (( &tl->requests)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof(rq), link) );});
794	i915_request_retire(rq);
795
796	#ifdef __linux__
797	rq = kmem_cache_alloc(global.slab_requests,
798	gfp \| __GFP_RETRY_MAYFAIL0 \| __GFP_NOWARN0);
799	#else
800	rq = pool_get(&global.slab_requests,
801	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
802	if (rq)
803	__i915_request_ctor(rq);
804	#endif
805	if (rq)
806	return rq;
807
808	/* Ratelimit ourselves to prevent oom from malicious clients */
809	rq = list_last_entry(&tl->requests, typeof(rq), link)({ const __typeof( ((typeof(rq) )0)->link ) __mptr = (( &tl->requests)->prev); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof(rq), link) );});
	Value stored to 'rq' is never read
810	cond_synchronize_rcu(rq->rcustate);
811
812	/* Retire our old requests in the hope that we free some */
813	retire_requests(tl);
814
815	out:
816	#ifdef __linux__
817	return kmem_cache_alloc(global.slab_requests, gfp);
818	#else
819	rq = pool_get(&global.slab_requests,
820	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
821	if (rq)
822	__i915_request_ctor(rq);
823	return rq;
824	#endif
825	}
826
827	static void __i915_request_ctor(void *arg)
828	{
829	struct i915_request *rq = arg;
830
831	/*
832	* witness does not understand spin_lock_nested()
833	* order reversal in i915 with this lock
834	*/
835	mtx_init_flags(&rq->lock, IPL_TTY, NULL, MTX_NOWITNESS)do { (void)(((void *)0)); (void)(0x01); __mtx_init((&rq-> lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0);
836	i915_sched_node_init(&rq->sched);
837	i915_sw_fence_init(&rq->submit, submit_notify)__i915_sw_fence_init((&rq->submit), (submit_notify), ( (void )0), ((void )0));
838	i915_sw_fence_init(&rq->semaphore, semaphore_notify)__i915_sw_fence_init((&rq->semaphore), (semaphore_notify ), ((void )0), ((void )0));
839
840	rq->capture_list = NULL((void *)0);
841
842	init_llist_head(&rq->execute_cb);
843	}
844
845	struct i915_request *
846	__i915_request_create(struct intel_context *ce, gfp_t gfp)
847	{
848	struct intel_timeline *tl = ce->timeline;
849	struct i915_request *rq;
850	u32 seqno;
851	int ret;
852
853	might_sleep_if(gfpflags_allow_blocking(gfp))do { if (gfpflags_allow_blocking(gfp)) assertwaitok(); } while (0);
854
855	/* Check that the caller provided an already pinned context */
856	__intel_context_pin(ce);
857
858	/*
859	* Beware: Dragons be flying overhead.
860	*
861	* We use RCU to look up requests in flight. The lookups may
862	* race with the request being allocated from the slab freelist.
863	* That is the request we are writing to here, may be in the process
864	* of being read by __i915_active_request_get_rcu(). As such,
865	* we have to be very careful when overwriting the contents. During
866	* the RCU lookup, we change chase the request->engine pointer,
867	* read the request->global_seqno and increment the reference count.
868	*
869	* The reference count is incremented atomically. If it is zero,
870	* the lookup knows the request is unallocated and complete. Otherwise,
871	* it is either still in use, or has been reallocated and reset
872	* with dma_fence_init(). This increment is safe for release as we
873	* check that the request we have a reference to and matches the active
874	* request.
875	*
876	* Before we increment the refcount, we chase the request->engine
877	* pointer. We must not call kmem_cache_zalloc() or else we set
878	* that pointer to NULL and cause a crash during the lookup. If
879	* we see the request is completed (based on the value of the
880	* old engine and seqno), the lookup is complete and reports NULL.
881	* If we decide the request is not completed (new engine or seqno),
882	* then we grab a reference and double check that it is still the
883	* active request - which it won't be and restart the lookup.
884	*
885	* Do not use kmem_cache_zalloc() here!
886	*/
887	#ifdef __linux__
888	rq = kmem_cache_alloc(global.slab_requests,
889	gfp \| __GFP_RETRY_MAYFAIL0 \| __GFP_NOWARN0);
890	#else
891	rq = pool_get(&global.slab_requests,
892	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
893	if (rq)
894	__i915_request_ctor(rq);
895	#endif
896	if (unlikely(!rq)__builtin_expect(!!(!rq), 0)) {
897	rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp);
898	if (!rq) {
899	ret = -ENOMEM12;
900	goto err_unreserve;
901	}
902	}
903
904	rq->context = ce;
905	rq->engine = ce->engine;
906	rq->ring = ce->ring;
907	rq->execution_mask = ce->engine->mask;
908
909	ret = intel_timeline_get_seqno(tl, rq, &seqno);
910	if (ret)
911	goto err_free;
912
913	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
914	tl->fence_context, seqno);
915
916	RCU_INIT_POINTER(rq->timeline, tl)do { (rq->timeline) = (tl); } while(0);
917	RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline)do { (rq->hwsp_cacheline) = (tl->hwsp_cacheline); } while (0);
918	rq->hwsp_seqno = tl->hwsp_seqno;
919	GEM_BUG_ON(i915_request_completed(rq))((void)0);
920
921	rq->rcustate = get_state_synchronize_rcu()0; /* acts as smp_mb() */
922
923	/* We bump the ref for the fence chain */
924	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
925	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
926
927	i915_sched_node_reinit(&rq->sched);
928
929	/* No zalloc, everything must be cleared after use */
930	rq->batch = NULL((void *)0);
931	GEM_BUG_ON(rq->capture_list)((void)0);
932	GEM_BUG_ON(!llist_empty(&rq->execute_cb))((void)0);
933
934	/*
935	* Reserve space in the ring buffer for all the commands required to
936	* eventually emit this request. This is to guarantee that the
937	* i915_request_add() call can't fail. Note that the reserve may need
938	* to be redone if the request is not actually submitted straight
939	* away, e.g. because a GPU scheduler has deferred it.
940	*
941	* Note that due to how we add reserved_space to intel_ring_begin()
942	* we need to double our request to ensure that if we need to wrap
943	* around inside i915_request_add() there is sufficient space at
944	* the beginning of the ring as well.
945	*/
946	rq->reserved_space =
947	2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
948
949	/*
950	* Record the position of the start of the request so that
951	* should we detect the updated seqno part-way through the
952	* GPU processing the request, we never over-estimate the
953	* position of the head.
954	*/
955	rq->head = rq->ring->emit;
956
957	ret = rq->engine->request_alloc(rq);
958	if (ret)
959	goto err_unwind;
960
961	rq->infix = rq->ring->emit; /* end of header; start of user payload */
962
963	intel_context_mark_active(ce);
964	list_add_tail_rcu(&rq->link, &tl->requests)list_add_tail(&rq->link, &tl->requests);
965
966	return rq;
967
968	err_unwind:
969	ce->ring->emit = rq->head;
970
971	/* Make sure we didn't add ourselves to external state before freeing */
972	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list))((void)0);
973	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list))((void)0);
974
975	err_free:
976	#ifdef __linux__
977	kmem_cache_free(global.slab_requests, rq);
978	#else
979	pool_put(&global.slab_requests, rq);
980	#endif
981	err_unreserve:
982	intel_context_unpin(ce);
983	return ERR_PTR(ret);
984	}
985
986	struct i915_request *
987	i915_request_create(struct intel_context *ce)
988	{
989	struct i915_request *rq;
990	struct intel_timeline *tl;
991
992	tl = intel_context_timeline_lock(ce);
993	if (IS_ERR(tl))
994	return ERR_CAST(tl);
995
996	/* Move our oldest request to the slab-cache (if not in use!) */
997	rq = list_first_entry(&tl->requests, typeof(rq), link)({ const __typeof( ((typeof(rq) )0)->link ) __mptr = (( &tl->requests)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof(rq), link) );});
998	if (!list_is_last(&rq->link, &tl->requests))
999	i915_request_retire(rq);
1000
1001	intel_context_enter(ce);
1002	rq = __i915_request_create(ce, GFP_KERNEL(0x0001 \| 0x0004));
1003	intel_context_exit(ce); /* active reference transferred to request */
1004	if (IS_ERR(rq))
1005	goto err_unlock;
1006
1007	/* Check that we do not interrupt ourselves with a new request */
1008	rq->cookie = lockdep_pin_lock(&tl->mutex)({ struct pin_cookie pc = {}; pc; });
1009
1010	return rq;
1011
1012	err_unlock:
1013	intel_context_timeline_unlock(tl);
1014	return rq;
1015	}
1016
1017	static int
1018	i915_request_await_start(struct i915_request rq, struct i915_request signal)
1019	{
1020	struct dma_fence *fence;
1021	int err;
1022
1023	if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)(signal->timeline))
1024	return 0;
1025
1026	if (i915_request_started(signal))
1027	return 0;
1028
1029	fence = NULL((void *)0);
1030	rcu_read_lock();
1031	spin_lock_irq(&signal->lock)mtx_enter(&signal->lock);
1032	do {
1033	struct list_head pos = READ_ONCE(signal->link.prev)({ typeof(signal->link.prev) __tmp = (volatile typeof(signal ->link.prev) *)&(signal->link.prev); membar_datadep_consumer (); __tmp; });
1034	struct i915_request *prev;
1035
1036	/* Confirm signal has not been retired, the link is valid */
1037	if (unlikely(i915_request_started(signal))__builtin_expect(!!(i915_request_started(signal)), 0))
1038	break;
1039
1040	/* Is signal the earliest request on its timeline? */
1041	if (pos == &rcu_dereference(signal->timeline)(signal->timeline)->requests)
1042	break;
1043
1044	/*
1045	* Peek at the request before us in the timeline. That
1046	* request will only be valid before it is retired, so
1047	* after acquiring a reference to it, confirm that it is
1048	* still part of the signaler's timeline.
1049	*/
1050	prev = list_entry(pos, typeof(prev), link)({ const __typeof( ((typeof(prev) )0)->link ) __mptr = ( pos); (typeof(prev) )( (char )__mptr - __builtin_offsetof( typeof(prev), link) );});
1051	if (!i915_request_get_rcu(prev))
1052	break;
1053
1054	/* After the strong barrier, confirm prev is still attached */
1055	if (unlikely(READ_ONCE(prev->link.next) != &signal->link)__builtin_expect(!!(({ typeof(prev->link.next) __tmp = (volatile typeof(prev->link.next) )&(prev->link.next); membar_datadep_consumer (); __tmp; }) != &signal->link), 0)) {
1056	i915_request_put(prev);
1057	break;
1058	}
1059
1060	fence = &prev->fence;
1061	} while (0);
1062	spin_unlock_irq(&signal->lock)mtx_leave(&signal->lock);
1063	rcu_read_unlock();
1064	if (!fence)
1065	return 0;
1066
1067	err = 0;
1068	if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
1069	err = i915_sw_fence_await_dma_fence(&rq->submit,
1070	fence, 0,
1071	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1072	dma_fence_put(fence);
1073
1074	return err;
1075	}
1076
1077	static intel_engine_mask_t
1078	already_busywaiting(struct i915_request *rq)
1079	{
1080	/*
1081	* Polling a semaphore causes bus traffic, delaying other users of
1082	* both the GPU and CPU. We want to limit the impact on others,
1083	* while taking advantage of early submission to reduce GPU
1084	* latency. Therefore we restrict ourselves to not using more
1085	* than one semaphore from each source, and not using a semaphore
1086	* if we have detected the engine is saturated (i.e. would not be
1087	* submitted early and cause bus traffic reading an already passed
1088	* semaphore).
1089	*
1090	* See the are-we-too-late? check in __i915_request_submit().
1091	*/
1092	return rq->sched.semaphores \| READ_ONCE(rq->engine->saturated)({ typeof(rq->engine->saturated) __tmp = (volatile typeof (rq->engine->saturated) )&(rq->engine->saturated ); membar_datadep_consumer(); __tmp; });
1093	}
1094
1095	static int
1096	__emit_semaphore_wait(struct i915_request *to,
1097	struct i915_request *from,
1098	u32 seqno)
1099	{
1100	const int has_token = INTEL_GEN(to->engine->i915)((&(to->engine->i915)->__info)->gen) >= 12;
1101	u32 hwsp_offset;
1102	int len, err;
1103	u32 *cs;
1104
1105	GEM_BUG_ON(INTEL_GEN(to->engine->i915) < 8)((void)0);
1106	GEM_BUG_ON(i915_request_has_initial_breadcrumb(to))((void)0);
1107
1108	/* We need to pin the signaler's HWSP until we are finished reading. */
1109	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
1110	if (err)
1111	return err;
1112
1113	len = 4;
1114	if (has_token)
1115	len += 2;
1116
1117	cs = intel_ring_begin(to, len);
1118	if (IS_ERR(cs))
1119	return PTR_ERR(cs);
1120
1121	/*
1122	* Using greater-than-or-equal here means we have to worry
1123	* about seqno wraparound. To side step that issue, we swap
1124	* the timeline HWSP upon wrapping, so that everyone listening
1125	* for the old (pre-wrap) values do not see the much smaller
1126	* (post-wrap) values than they were expecting (and so wait
1127	* forever).
1128	*/
1129	*cs++ = (MI_SEMAPHORE_WAIT(((0x1c) << 23) \| (2)) \|
1130	MI_SEMAPHORE_GLOBAL_GTT(1<<22) \|
1131	MI_SEMAPHORE_POLL(1 << 15) \|
1132	MI_SEMAPHORE_SAD_GTE_SDD(1 << 12)) +
1133	has_token;
1134	*cs++ = seqno;
1135	*cs++ = hwsp_offset;
1136	*cs++ = 0;
1137	if (has_token) {
1138	*cs++ = 0;
1139	*cs++ = MI_NOOP(((0) << 23) \| (0));
1140	}
1141
1142	intel_ring_advance(to, cs);
1143	return 0;
1144	}
1145
1146	static int
1147	emit_semaphore_wait(struct i915_request *to,
1148	struct i915_request *from,
1149	gfp_t gfp)
1150	{
1151	const intel_engine_mask_t mask = READ_ONCE(from->engine)({ typeof(from->engine) __tmp = (volatile typeof(from-> engine) )&(from->engine); membar_datadep_consumer(); __tmp ; })->mask;
1152	struct i915_sw_fence *wait = &to->submit;
1153
1154	if (!intel_context_use_semaphores(to->context))
1155	goto await_fence;
1156
1157	if (i915_request_has_initial_breadcrumb(to))
1158	goto await_fence;
1159
1160	if (!rcu_access_pointer(from->hwsp_cacheline)(from->hwsp_cacheline))
1161	goto await_fence;
1162
1163	/*
1164	* If this or its dependents are waiting on an external fence
1165	* that may fail catastrophically, then we want to avoid using
1166	* sempahores as they bypass the fence signaling metadata, and we
1167	* lose the fence->error propagation.
1168	*/
1169	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN(1UL << (0)))
1170	goto await_fence;
1171
1172	/* Just emit the first semaphore we see as request space is limited. */
1173	if (already_busywaiting(to) & mask)
1174	goto await_fence;
1175
1176	if (i915_request_await_start(to, from) < 0)
1177	goto await_fence;
1178
1179	/* Only submit our spinner after the signaler is running! */
1180	if (__await_execution(to, from, NULL((void *)0), gfp))
1181	goto await_fence;
1182
1183	if (__emit_semaphore_wait(to, from, from->fence.seqno))
1184	goto await_fence;
1185
1186	to->sched.semaphores \|= mask;
1187	wait = &to->semaphore;
1188
1189	await_fence:
1190	return i915_sw_fence_await_dma_fence(wait,
1191	&from->fence, 0,
1192	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1193	}
1194
1195	static bool_Bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1196	struct dma_fence *fence)
1197	{
1198	return __intel_timeline_sync_is_later(tl,
1199	fence->context,
1200	fence->seqno - 1);
1201	}
1202
1203	static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1204	const struct dma_fence *fence)
1205	{
1206	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
1207	}
1208
1209	static int
1210	__i915_request_await_execution(struct i915_request *to,
1211	struct i915_request *from,
1212	void (hook)(struct i915_request rq,
1213	struct dma_fence *signal))
1214	{
1215	int err;
1216
1217	GEM_BUG_ON(intel_context_is_barrier(from->context))((void)0);
1218
1219	/* Submit both requests at the same time */
1220	err = __await_execution(to, from, hook, I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1221	if (err)
1222	return err;
1223
1224	/* Squash repeated depenendices to the same timelines */
1225	if (intel_timeline_sync_has_start(i915_request_timeline(to),
1226	&from->fence))
1227	return 0;
1228
1229	/*
1230	* Wait until the start of this request.
1231	*
1232	* The execution cb fires when we submit the request to HW. But in
1233	* many cases this may be long before the request itself is ready to
1234	* run (consider that we submit 2 requests for the same context, where
1235	* the request of interest is behind an indefinite spinner). So we hook
1236	* up to both to reduce our queues and keep the execution lag minimised
1237	* in the worst case, though we hope that the await_start is elided.
1238	*/
1239	err = i915_request_await_start(to, from);
1240	if (err < 0)
1241	return err;
1242
1243	/*
1244	* Ensure both start together [after all semaphores in signal]
1245	*
1246	* Now that we are queued to the HW at roughly the same time (thanks
1247	* to the execute cb) and are ready to run at roughly the same time
1248	* (thanks to the await start), our signaler may still be indefinitely
1249	* delayed by waiting on a semaphore from a remote engine. If our
1250	* signaler depends on a semaphore, so indirectly do we, and we do not
1251	* want to start our payload until our signaler also starts theirs.
1252	* So we wait.
1253	*
1254	* However, there is also a second condition for which we need to wait
1255	* for the precise start of the signaler. Consider that the signaler
1256	* was submitted in a chain of requests following another context
1257	* (with just an ordinary intra-engine fence dependency between the
1258	* two). In this case the signaler is queued to HW, but not for
1259	* immediate execution, and so we must wait until it reaches the
1260	* active slot.
1261	*/
1262	if (intel_engine_has_semaphores(to->engine) &&
1263	!i915_request_has_initial_breadcrumb(to)) {
1264	err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
1265	if (err < 0)
1266	return err;
1267	}
1268
1269	/* Couple the dependency tree for PI on this exposed to->fence */
1270	if (to->engine->schedule) {
1271	err = i915_sched_node_add_dependency(&to->sched,
1272	&from->sched,
1273	I915_DEPENDENCY_WEAK(1UL << (2)));
1274	if (err < 0)
1275	return err;
1276	}
1277
1278	return intel_timeline_sync_set_start(i915_request_timeline(to),
1279	&from->fence);
1280	}
1281
1282	static void mark_external(struct i915_request *rq)
1283	{
1284	/*
1285	* The downside of using semaphores is that we lose metadata passing
1286	* along the signaling chain. This is particularly nasty when we
1287	* need to pass along a fatal error such as EFAULT or EDEADLK. For
1288	* fatal errors we want to scrub the request before it is executed,
1289	* which means that we cannot preload the request onto HW and have
1290	* it wait upon a semaphore.
1291	*/
1292	rq->sched.flags \|= I915_SCHED_HAS_EXTERNAL_CHAIN(1UL << (0));
1293	}
1294
1295	static int
1296	__i915_request_await_external(struct i915_request rq, struct dma_fence fence)
1297	{
1298	mark_external(rq);
1299	return i915_sw_fence_await_dma_fence(&rq->submit, fence,
1300	i915_fence_context_timeout(rq->engine->i915,
1301	fence->context),
1302	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1303	}
1304
1305	static int
1306	i915_request_await_external(struct i915_request rq, struct dma_fence fence)
1307	{
1308	struct dma_fence *iter;
1309	int err = 0;
1310
1311	if (!to_dma_fence_chain(fence))
1312	return __i915_request_await_external(rq, fence);
1313
1314	dma_fence_chain_for_each(iter, fence)for (iter = dma_fence_get(fence); iter != ((void *)0); iter = dma_fence_chain_next(iter)) {
1315	struct dma_fence_chain *chain = to_dma_fence_chain(iter);
1316
1317	if (!dma_fence_is_i915(chain->fence)) {
1318	err = __i915_request_await_external(rq, iter);
1319	break;
1320	}
1321
1322	err = i915_request_await_dma_fence(rq, chain->fence);
1323	if (err < 0)
1324	break;
1325	}
1326
1327	dma_fence_put(iter);
1328	return err;
1329	}
1330
1331	int
1332	i915_request_await_execution(struct i915_request *rq,
1333	struct dma_fence *fence,
1334	void (hook)(struct i915_request rq,
1335	struct dma_fence *signal))
1336	{
1337	struct dma_fence **child = &fence;
1338	unsigned int nchild = 1;
1339	int ret;
1340
1341	if (dma_fence_is_array(fence)) {
1342	struct dma_fence_array *array = to_dma_fence_array(fence);
1343
1344	/* XXX Error for signal-on-any fence arrays */
1345
1346	child = array->fences;
1347	nchild = array->num_fences;
1348	GEM_BUG_ON(!nchild)((void)0);
1349	}
1350
1351	do {
1352	fence = *child++;
1353	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1354	continue;
1355
1356	if (fence->context == rq->fence.context)
1357	continue;
1358
1359	/*
1360	* We don't squash repeated fence dependencies here as we
1361	* want to run our callback in all cases.
1362	*/
1363
1364	if (dma_fence_is_i915(fence))
1365	ret = __i915_request_await_execution(rq,
1366	to_request(fence),
1367	hook);
1368	else
1369	ret = i915_request_await_external(rq, fence);
1370	if (ret < 0)
1371	return ret;
1372	} while (--nchild);
1373
1374	return 0;
1375	}
1376
1377	static int
1378	await_request_submit(struct i915_request to, struct i915_request from)
1379	{
1380	/*
1381	* If we are waiting on a virtual engine, then it may be
1382	* constrained to execute on a single engine prior to submission.
1383	* When it is submitted, it will be first submitted to the virtual
1384	* engine and then passed to the physical engine. We cannot allow
1385	* the waiter to be submitted immediately to the physical engine
1386	* as it may then bypass the virtual request.
1387	*/
1388	if (to->engine == READ_ONCE(from->engine)({ typeof(from->engine) __tmp = (volatile typeof(from-> engine) )&(from->engine); membar_datadep_consumer(); __tmp ; }))
1389	return i915_sw_fence_await_sw_fence_gfp(&to->submit,
1390	&from->submit,
1391	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1392	else
1393	return __i915_request_await_execution(to, from, NULL((void *)0));
1394	}
1395
1396	static int
1397	i915_request_await_request(struct i915_request to, struct i915_request from)
1398	{
1399	int ret;
1400
1401	GEM_BUG_ON(to == from)((void)0);
1402	GEM_BUG_ON(to->timeline == from->timeline)((void)0);
1403
1404	if (i915_request_completed(from)) {
1405	i915_sw_fence_set_error_once(&to->submit, from->fence.error);
1406	return 0;
1407	}
1408
1409	if (to->engine->schedule) {
1410	ret = i915_sched_node_add_dependency(&to->sched,
1411	&from->sched,
1412	I915_DEPENDENCY_EXTERNAL(1UL << (1)));
1413	if (ret < 0)
1414	return ret;
1415	}
1416
1417	if (is_power_of_2(to->execution_mask \| READ_ONCE(from->execution_mask))(((to->execution_mask \| ({ typeof(from->execution_mask) __tmp = (volatile typeof(from->execution_mask) )&(from ->execution_mask); membar_datadep_consumer(); __tmp; })) != 0) && (((to->execution_mask \| ({ typeof(from-> execution_mask) __tmp = (volatile typeof(from->execution_mask ) )&(from->execution_mask); membar_datadep_consumer() ; __tmp; })) - 1) & (to->execution_mask \| ({ typeof(from ->execution_mask) __tmp = (volatile typeof(from->execution_mask ) )&(from->execution_mask); membar_datadep_consumer() ; __tmp; }))) == 0))
1418	ret = await_request_submit(to, from);
1419	else
1420	ret = emit_semaphore_wait(to, from, I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1421	if (ret < 0)
1422	return ret;
1423
1424	return 0;
1425	}
1426
1427	int
1428	i915_request_await_dma_fence(struct i915_request rq, struct dma_fence fence)
1429	{
1430	struct dma_fence **child = &fence;
1431	unsigned int nchild = 1;
1432	int ret;
1433
1434	/*
1435	* Note that if the fence-array was created in signal-on-any mode,
1436	* we should not decompose it into its individual fences. However,
1437	* we don't currently store which mode the fence-array is operating
1438	* in. Fortunately, the only user of signal-on-any is private to
1439	* amdgpu and we should not see any incoming fence-array from
1440	* sync-file being in signal-on-any mode.
1441	*/
1442	if (dma_fence_is_array(fence)) {
1443	struct dma_fence_array *array = to_dma_fence_array(fence);
1444
1445	child = array->fences;
1446	nchild = array->num_fences;
1447	GEM_BUG_ON(!nchild)((void)0);
1448	}
1449
1450	do {
1451	fence = *child++;
1452	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1453	continue;
1454
1455	/*
1456	* Requests on the same timeline are explicitly ordered, along
1457	* with their dependencies, by i915_request_add() which ensures
1458	* that requests are submitted in-order through each ring.
1459	*/
1460	if (fence->context == rq->fence.context)
1461	continue;
1462
1463	/* Squash repeated waits to the same timelines */
1464	if (fence->context &&
1465	intel_timeline_sync_is_later(i915_request_timeline(rq),
1466	fence))
1467	continue;
1468
1469	if (dma_fence_is_i915(fence))
1470	ret = i915_request_await_request(rq, to_request(fence));
1471	else
1472	ret = i915_request_await_external(rq, fence);
1473	if (ret < 0)
1474	return ret;
1475
1476	/* Record the latest fence used against each timeline */
1477	if (fence->context)
1478	intel_timeline_sync_set(i915_request_timeline(rq),
1479	fence);
1480	} while (--nchild);
1481
1482	return 0;
1483	}
1484
1485	/**
1486	* i915_request_await_object - set this request to (async) wait upon a bo
1487	* @to: request we are wishing to use
1488	* @obj: object which may be in use on another ring.
1489	* @write: whether the wait is on behalf of a writer
1490	*
1491	* This code is meant to abstract object synchronization with the GPU.
1492	* Conceptually we serialise writes between engines inside the GPU.
1493	* We only allow one engine to write into a buffer at any time, but
1494	* multiple readers. To ensure each has a coherent view of memory, we must:
1495	*
1496	* - If there is an outstanding write request to the object, the new
1497	* request must wait for it to complete (either CPU or in hw, requests
1498	* on the same ring will be naturally ordered).
1499	*
1500	* - If we are a write request (pending_write_domain is set), the new
1501	* request must wait for outstanding read requests to complete.
1502	*
1503	* Returns 0 if successful, else propagates up the lower layer error.
1504	*/
1505	int
1506	i915_request_await_object(struct i915_request *to,
1507	struct drm_i915_gem_object *obj,
1508	bool_Bool write)
1509	{
1510	struct dma_fence *excl;
1511	int ret = 0;
1512
1513	if (write) {
1514	struct dma_fence **shared;
1515	unsigned int count, i;
1516
1517	ret = dma_resv_get_fences_rcu(obj->base.resv,
1518	&excl, &count, &shared);
1519	if (ret)
1520	return ret;
1521
1522	for (i = 0; i < count; i++) {
1523	ret = i915_request_await_dma_fence(to, shared[i]);
1524	if (ret)
1525	break;
1526
1527	dma_fence_put(shared[i]);
1528	}
1529
1530	for (; i < count; i++)
1531	dma_fence_put(shared[i]);
1532	kfree(shared);
1533	} else {
1534	excl = dma_resv_get_excl_rcu(obj->base.resv);
1535	}
1536
1537	if (excl) {
1538	if (ret == 0)
1539	ret = i915_request_await_dma_fence(to, excl);
1540
1541	dma_fence_put(excl);
1542	}
1543
1544	return ret;
1545	}
1546
1547	static struct i915_request *
1548	__i915_request_add_to_timeline(struct i915_request *rq)
1549	{
1550	struct intel_timeline *timeline = i915_request_timeline(rq);
1551	struct i915_request *prev;
1552
1553	/*
1554	* Dependency tracking and request ordering along the timeline
1555	* is special cased so that we can eliminate redundant ordering
1556	* operations while building the request (we know that the timeline
1557	* itself is ordered, and here we guarantee it).
1558	*
1559	* As we know we will need to emit tracking along the timeline,
1560	* we embed the hooks into our request struct -- at the cost of
1561	* having to have specialised no-allocation interfaces (which will
1562	* be beneficial elsewhere).
1563	*
1564	* A second benefit to open-coding i915_request_await_request is
1565	* that we can apply a slight variant of the rules specialised
1566	* for timelines that jump between engines (such as virtual engines).
1567	* If we consider the case of virtual engine, we must emit a dma-fence
1568	* to prevent scheduling of the second request until the first is
1569	* complete (to maximise our greedy late load balancing) and this
1570	* precludes optimising to use semaphores serialisation of a single
1571	* timeline across engines.
1572	*/
1573	prev = to_request(__i915_active_fence_set(&timeline->last_request,
1574	&rq->fence));
1575	if (prev && !i915_request_completed(prev)) {
1576	/*
1577	* The requests are supposed to be kept in order. However,
1578	* we need to be wary in case the timeline->last_request
1579	* is used as a barrier for external modification to this
1580	* context.
1581	*/
1582	GEM_BUG_ON(prev->context == rq->context &&((void)0)
1583	i915_seqno_passed(prev->fence.seqno,((void)0)
1584	rq->fence.seqno))((void)0);
1585
1586	if (is_power_of_2(READ_ONCE(prev->engine)->mask \| rq->engine->mask)(((({ typeof(prev->engine) __tmp = (volatile typeof(prev-> engine) )&(prev->engine); membar_datadep_consumer(); __tmp ; })->mask \| rq->engine->mask) != 0) && (((( { typeof(prev->engine) __tmp = (volatile typeof(prev-> engine) )&(prev->engine); membar_datadep_consumer(); __tmp ; })->mask \| rq->engine->mask) - 1) & (({ typeof (prev->engine) __tmp = (volatile typeof(prev->engine) )&(prev->engine); membar_datadep_consumer(); __tmp; }) ->mask \| rq->engine->mask)) == 0))
1587	i915_sw_fence_await_sw_fence(&rq->submit,
1588	&prev->submit,
1589	&rq->submitq);
1590	else
1591	__i915_sw_fence_await_dma_fence(&rq->submit,
1592	&prev->fence,
1593	&rq->dmaq);
1594	if (rq->engine->schedule)
1595	__i915_sched_node_add_dependency(&rq->sched,
1596	&prev->sched,
1597	&rq->dep,
1598	0);
1599	}
1600
1601	/*
1602	* Make sure that no request gazumped us - if it was allocated after
1603	* our i915_request_alloc() and called __i915_request_add() before
1604	* us, the timeline will hold its seqno which is later than ours.
1605	*/
1606	GEM_BUG_ON(timeline->seqno != rq->fence.seqno)((void)0);
1607
1608	return prev;
1609	}
1610
1611	/*
1612	* NB: This function is not allowed to fail. Doing so would mean the the
1613	* request is not being tracked for completion but the work itself is
1614	* going to happen on the hardware. This would be a Bad Thing(tm).
1615	*/
1616	struct i915_request __i915_request_commit(struct i915_request rq)
1617	{
1618	struct intel_engine_cs *engine = rq->engine;
1619	struct intel_ring *ring = rq->ring;
1620	u32 *cs;
1621
1622	RQ_TRACE(rq, "\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
1623
1624	/*
1625	* To ensure that this call will not fail, space for its emissions
1626	* should already have been reserved in the ring buffer. Let the ring
1627	* know that it is time to use that space up.
1628	*/
1629	GEM_BUG_ON(rq->reserved_space > ring->space)((void)0);
1630	rq->reserved_space = 0;
1631	rq->emitted_jiffies = jiffies;
1632
1633	/*
1634	* Record the position of the start of the breadcrumb so that
1635	* should we detect the updated seqno part-way through the
1636	* GPU processing the request, we never over-estimate the
1637	* position of the ring's HEAD.
1638	*/
1639	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
1640	GEM_BUG_ON(IS_ERR(cs))((void)0);
1641	rq->postfix = intel_ring_offset(rq, cs);
1642
1643	return __i915_request_add_to_timeline(rq);
1644	}
1645
1646	void __i915_request_queue(struct i915_request *rq,
1647	const struct i915_sched_attr *attr)
1648	{
1649	/*
1650	* Let the backend know a new request has arrived that may need
1651	* to adjust the existing execution schedule due to a high priority
1652	* request - i.e. we may want to preempt the current request in order
1653	* to run a high priority dependency chain before we can execute this
1654	* request.
1655	*
1656	* This is called before the request is ready to run so that we can
1657	* decide whether to preempt the entire chain so that it is ready to
1658	* run at the earliest possible convenience.
1659	*/
1660	if (attr && rq->engine->schedule)
1661	rq->engine->schedule(rq, attr);
1662	i915_sw_fence_commit(&rq->semaphore);
1663	i915_sw_fence_commit(&rq->submit);
1664	}
1665
1666	void i915_request_add(struct i915_request *rq)
1667	{
1668	struct intel_timeline * const tl = i915_request_timeline(rq);
1669	struct i915_sched_attr attr = {};
1670	struct i915_gem_context *ctx;
1671
1672	lockdep_assert_held(&tl->mutex)do { (void)(&tl->mutex); } while(0);
1673	lockdep_unpin_lock(&tl->mutex, rq->cookie);
1674
1675	trace_i915_request_add(rq);
1676	__i915_request_commit(rq);
1677
1678	/* XXX placeholder for selftests */
1679	rcu_read_lock();
1680	ctx = rcu_dereference(rq->context->gem_context)(rq->context->gem_context);
1681	if (ctx)
1682	attr = ctx->sched;
1683	rcu_read_unlock();
1684
1685	__i915_request_queue(rq, &attr);
1686
1687	mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
1688	}
1689
1690	static unsigned long local_clock_ns(unsigned int *cpu)
1691	{
1692	unsigned long t;
1693
1694	/*
1695	* Cheaply and approximately convert from nanoseconds to microseconds.
1696	* The result and subsequent calculations are also defined in the same
1697	* approximate microseconds units. The principal source of timing
1698	* error here is from the simple truncation.
1699	*
1700	* Note that local_clock() is only defined wrt to the current CPU;
1701	* the comparisons are no longer valid if we switch CPUs. Instead of
1702	* blocking preemption for the entire busywait, we can detect the CPU
1703	* switch and use that as indicator of system load and a reason to
1704	* stop busywaiting, see busywait_stop().
1705	*/
1706	cpu = get_cpu()(({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_cpuid);
1707	t = local_clock();
1708	put_cpu();
1709
1710	return t;
1711	}
1712
1713	static bool_Bool busywait_stop(unsigned long timeout, unsigned int cpu)
1714	{
1715	unsigned int this_cpu;
1716
1717	if (time_after(local_clock_ns(&this_cpu), timeout)((long)(timeout) - (long)(local_clock_ns(&this_cpu)) < 0))
1718	return true1;
1719
1720	return this_cpu != cpu;
1721	}
1722
1723	static bool_Bool __i915_spin_request(struct i915_request * const rq, int state)
1724	{
1725	unsigned long timeout_ns;
1726	unsigned int cpu;
1727
1728	/*
1729	* Only wait for the request if we know it is likely to complete.
1730	*
1731	* We don't track the timestamps around requests, nor the average
1732	* request length, so we do not have a good indicator that this
1733	* request will complete within the timeout. What we do know is the
1734	* order in which requests are executed by the context and so we can
1735	* tell if the request has been started. If the request is not even
1736	* running yet, it is a fair assumption that it will not complete
1737	* within our relatively short timeout.
1738	*/
1739	if (!i915_request_is_running(rq))
1740	return false0;
1741
1742	/*
1743	* When waiting for high frequency requests, e.g. during synchronous
1744	* rendering split between the CPU and GPU, the finite amount of time
1745	* required to set up the irq and wait upon it limits the response
1746	* rate. By busywaiting on the request completion for a short while we
1747	* can service the high frequency waits as quick as possible. However,
1748	* if it is a slow request, we want to sleep as quickly as possible.
1749	* The tradeoff between waiting and sleeping is roughly the time it
1750	* takes to sleep on a request, on the order of a microsecond.
1751	*/
1752
1753	timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns)({ typeof(rq->engine->props.max_busywait_duration_ns) __tmp = (volatile typeof(rq->engine->props.max_busywait_duration_ns ) )&(rq->engine->props.max_busywait_duration_ns); membar_datadep_consumer (); __tmp; });
1754	timeout_ns += local_clock_ns(&cpu);
1755	do {
1756	if (dma_fence_is_signaled(&rq->fence))
1757	return true1;
1758
1759	if (signal_pending_state(state, current)((state) & 0x100 ? (((({struct cpu_info __ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_siglist \| (({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci ) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci; })->ci_curproc)->p_sigmask) : 0))
1760	break;
1761
1762	if (busywait_stop(timeout_ns, cpu))
1763	break;
1764
1765	cpu_relax();
1766	} while (!drm_need_resched()(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate.spc_schedflags & 0x0002));
1767
1768	return false0;
1769	}
1770
1771	struct request_wait {
1772	struct dma_fence_cb cb;
1773	#ifdef __linux__
1774	struct task_struct *tsk;
1775	#else
1776	struct proc *tsk;
1777	#endif
1778	};
1779
1780	static void request_wait_wake(struct dma_fence fence, struct dma_fence_cb cb)
1781	{
1782	struct request_wait wait = container_of(cb, typeof(wait), cb)({ const __typeof( ((typeof(wait) )0)->cb ) __mptr = (cb ); (typeof(wait) )( (char )__mptr - __builtin_offsetof(typeof (*wait), cb) );});
1783
1784	wake_up_process(fetch_and_zero(&wait->tsk)({ typeof(&wait->tsk) __T = (&wait->tsk); (& wait->tsk) = (typeof(&wait->tsk))0; __T; }));
1785	}
1786
1787	/**
1788	* i915_request_wait - wait until execution of request has finished
1789	* @rq: the request to wait upon
1790	* @flags: how to wait
1791	* @timeout: how long to wait in jiffies
1792	*
1793	* i915_request_wait() waits for the request to be completed, for a
1794	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1795	* unbounded wait).
1796	*
1797	* Returns the remaining time (in jiffies) if the request completed, which may
1798	* be zero or -ETIME if the request is unfinished after the timeout expires.
1799	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1800	* pending before the request completes.
1801	*/
1802	long i915_request_wait(struct i915_request *rq,
1803	unsigned int flags,
1804	long timeout)
1805	{
1806	const int state = flags & I915_WAIT_INTERRUPTIBLE(1UL << (0)) ?
1807	TASK_INTERRUPTIBLE0x100 : TASK_UNINTERRUPTIBLE0;
1808	struct request_wait wait;
1809
1810	might_sleep()assertwaitok();
1811	GEM_BUG_ON(timeout < 0)((void)0);
1812
1813	if (dma_fence_is_signaled(&rq->fence))
1814	return timeout;
1815
1816	if (!timeout)
1817	return -ETIME60;
1818
1819	trace_i915_request_wait_begin(rq, flags);
1820
1821	/*
1822	* We must never wait on the GPU while holding a lock as we
1823	* may need to perform a GPU reset. So while we don't need to
1824	* serialise wait/reset with an explicit lock, we do want
1825	* lockdep to detect potential dependency cycles.
1826	*/
1827	mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
1828
1829	/*
1830	* Optimistic spin before touching IRQs.
1831	*
1832	* We may use a rather large value here to offset the penalty of
1833	* switching away from the active task. Frequently, the client will
1834	* wait upon an old swapbuffer to throttle itself to remain within a
1835	* frame of the gpu. If the client is running in lockstep with the gpu,
1836	* then it should not be waiting long at all, and a sleep now will incur
1837	* extra scheduler latency in producing the next frame. To try to
1838	* avoid adding the cost of enabling/disabling the interrupt to the
1839	* short wait, we first spin to see if the request would have completed
1840	* in the time taken to setup the interrupt.
1841	*
1842	* We need upto 5us to enable the irq, and upto 20us to hide the
1843	* scheduler latency of a context switch, ignoring the secondary
1844	* impacts from a context switch such as cache eviction.
1845	*
1846	* The scheme used for low-latency IO is called "hybrid interrupt
1847	* polling". The suggestion there is to sleep until just before you
1848	* expect to be woken by the device interrupt and then poll for its
1849	* completion. That requires having a good predictor for the request
1850	* duration, which we currently lack.
1851	*/
1852	if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT)((8000) != 0) &&
1853	__i915_spin_request(rq, state))
1854	goto out;
1855
1856	/*
1857	* This client is about to stall waiting for the GPU. In many cases
1858	* this is undesirable and limits the throughput of the system, as
1859	* many clients cannot continue processing user input/output whilst
1860	* blocked. RPS autotuning may take tens of milliseconds to respond
1861	* to the GPU load and thus incurs additional latency for the client.
1862	* We can circumvent that by promoting the GPU frequency to maximum
1863	* before we sleep. This makes the GPU throttle up much more quickly
1864	* (good for benchmarks and user experience, e.g. window animations),
1865	* but at a cost of spending more power processing the workload
1866	* (bad for battery).
1867	*/
1868	if (flags & I915_WAIT_PRIORITY(1UL << (1)) && !i915_request_started(rq))
1869	intel_rps_boost(rq);
1870
1871	#ifdef __linux__
1872	wait.tsk = current;
1873	#else
1874	wait.tsk = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc;
1875	#endif
1876	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
1877	goto out;
1878
1879	/*
1880	* Flush the submission tasklet, but only if it may help this request.
1881	*
1882	* We sometimes experience some latency between the HW interrupts and
1883	* tasklet execution (mostly due to ksoftirqd latency, but it can also
1884	* be due to lazy CS events), so lets run the tasklet manually if there
1885	* is a chance it may submit this request. If the request is not ready
1886	* to run, as it is waiting for other fences to be signaled, flushing
1887	* the tasklet is busy work without any advantage for this client.
1888	*
1889	* If the HW is being lazy, this is the last chance before we go to
1890	* sleep to catch any pending events. We will check periodically in
1891	* the heartbeat to flush the submission tasklets as a last resort
1892	* for unhappy HW.
1893	*/
1894	if (i915_request_is_ready(rq))
1895	intel_engine_flush_submission(rq->engine);
1896
1897	for (;;) {
1898	set_current_state(state);
1899
1900	if (dma_fence_is_signaled(&rq->fence))
1901	break;
1902
1903	if (signal_pending_state(state, current)((state) & 0x100 ? (((({struct cpu_info __ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_siglist \| (({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci ) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci; })->ci_curproc)->p_sigmask) : 0)) {
1904	timeout = -ERESTARTSYS4;
1905	break;
1906	}
1907
1908	if (!timeout) {
1909	timeout = -ETIME60;
1910	break;
1911	}
1912
1913	timeout = io_schedule_timeout(timeout)schedule_timeout(timeout);
1914	}
1915	__set_current_state(TASK_RUNNING-1);
1916
1917	if (READ_ONCE(wait.tsk)({ typeof(wait.tsk) __tmp = (volatile typeof(wait.tsk) )& (wait.tsk); membar_datadep_consumer(); __tmp; }))
1918	dma_fence_remove_callback(&rq->fence, &wait.cb);
1919	GEM_BUG_ON(!list_empty(&wait.cb.node))((void)0);
1920
1921	out:
1922	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
1923	trace_i915_request_wait_end(rq);
1924	return timeout;
1925	}
1926
1927	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
1928	#include "selftests/mock_request.c"
1929	#include "selftests/i915_request.c"
1930	#endif
1931
1932	static void i915_global_request_shrink(void)
1933	{
1934	#ifdef notyet
1935	kmem_cache_shrink(global.slab_execute_cbs);
1936	kmem_cache_shrink(global.slab_requests);
1937	#endif
1938	}
1939
1940	static void i915_global_request_exit(void)
1941	{
1942	#ifdef __linux__
1943	kmem_cache_destroy(global.slab_execute_cbs);
1944	kmem_cache_destroy(global.slab_requests);
1945	#else
1946	pool_destroy(&global.slab_execute_cbs);
1947	pool_destroy(&global.slab_requests);
1948	#endif
1949	}
1950
1951	static struct i915_global_request global = { {
1952	.shrink = i915_global_request_shrink,
1953	.exit = i915_global_request_exit,
1954	} };
1955
1956	int __init i915_global_request_init(void)
1957	{
1958	#ifdef __linux__
1959	global.slab_requests =
1960	kmem_cache_create("i915_request",
1961	sizeof(struct i915_request),
1962	__alignof__(struct i915_request),
1963	SLAB_HWCACHE_ALIGN \|
1964	SLAB_RECLAIM_ACCOUNT \|
1965	SLAB_TYPESAFE_BY_RCU,
1966	__i915_request_ctor);
1967	if (!global.slab_requests)
1968	return -ENOMEM12;
1969
1970	global.slab_execute_cbs = KMEM_CACHE(execute_cb,
1971	SLAB_HWCACHE_ALIGN \|
1972	SLAB_RECLAIM_ACCOUNT \|
1973	SLAB_TYPESAFE_BY_RCU);
1974	if (!global.slab_execute_cbs)
1975	goto err_requests;
1976	#else
1977	pool_init(&global.slab_requests, sizeof(struct i915_request),
1978	CACHELINESIZE64, IPL_TTY0x9, 0, "i915_request", NULL((void *)0));
1979	pool_init(&global.slab_execute_cbs, sizeof(struct execute_cb),
1980	CACHELINESIZE64, IPL_TTY0x9, 0, "i915_exec", NULL((void *)0));
1981	#endif
1982
1983	i915_global_register(&global.base);
1984	return 0;
1985
1986	#ifdef __linux__
1987	err_requests:
1988	kmem_cache_destroy(global.slab_requests);
1989	return -ENOMEM12;
1990	#endif
1991	}