/usr/src/sys/dev/pci/drm/i915/i915

Bug Summary

File:	dev/pci/drm/i915/i915_request.c
Warning:	line 1847, column 21 Value stored to 'ring' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name i915_request.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/i915/i915_request.c

1	/*
2	* Copyright © 2008-2015 Intel Corporation
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*
23	*/
24
25	#include <linux/dma-fence-array.h>
26	#include <linux/dma-fence-chain.h>
27	#include <linux/irq_work.h>
28	#include <linux/prefetch.h>
29	#include <linux/sched.h>
30	#include <linux/sched/clock.h>
31	#include <linux/sched/signal.h>
32	#include <linux/sched/mm.h>
33
34	#include "gem/i915_gem_context.h"
35	#include "gt/intel_breadcrumbs.h"
36	#include "gt/intel_context.h"
37	#include "gt/intel_engine.h"
38	#include "gt/intel_engine_heartbeat.h"
39	#include "gt/intel_engine_regs.h"
40	#include "gt/intel_gpu_commands.h"
41	#include "gt/intel_reset.h"
42	#include "gt/intel_ring.h"
43	#include "gt/intel_rps.h"
44
45	#include "i915_active.h"
46	#include "i915_deps.h"
47	#include "i915_driver.h"
48	#include "i915_drv.h"
49	#include "i915_trace.h"
50	#include "intel_pm.h"
51
52	struct execute_cb {
53	struct irq_work work;
54	struct i915_sw_fence *fence;
55	struct i915_request *signal;
56	};
57
58	static struct pool slab_requests;
59	static struct pool slab_execute_cbs;
60
61	static const char i915_fence_get_driver_name(struct dma_fence fence)
62	{
63	return dev_name(to_request(fence)->i915->drm.dev)"";
64	}
65
66	static const char i915_fence_get_timeline_name(struct dma_fence fence)
67	{
68	const struct i915_gem_context *ctx;
69
70	/*
71	* The timeline struct (as part of the ppgtt underneath a context)
72	* may be freed when the request is no longer in use by the GPU.
73	* We could extend the life of a context to beyond that of all
74	* fences, possibly keeping the hw resource around indefinitely,
75	* or we just give them a false name. Since
76	* dma_fence_ops.get_timeline_name is a debug feature, the occasional
77	* lie seems justifiable.
78	*/
79	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
80	return "signaled";
81
82	ctx = i915_request_gem_context(to_request(fence));
83	if (!ctx)
84	return "[" DRIVER_NAME"i915" "]";
85
86	return ctx->name;
87	}
88
89	static bool_Bool i915_fence_signaled(struct dma_fence *fence)
90	{
91	return i915_request_completed(to_request(fence));
92	}
93
94	static bool_Bool i915_fence_enable_signaling(struct dma_fence *fence)
95	{
96	return i915_request_enable_breadcrumb(to_request(fence));
97	}
98
99	static signed long i915_fence_wait(struct dma_fence *fence,
100	bool_Bool interruptible,
101	signed long timeout)
102	{
103	return i915_request_wait_timeout(to_request(fence),
104	interruptible \| I915_WAIT_PRIORITY(1UL << (1)),
105	timeout);
106	}
107
108	#ifdef __linux__
109	struct kmem_cache *i915_request_slab_cache(void)
110	{
111	return slab_requests;
112	}
113	#else
114	struct pool *i915_request_slab_cache(void)
115	{
116	return &slab_requests;
117	}
118	#endif
119
120	static void i915_fence_release(struct dma_fence *fence)
121	{
122	struct i915_request *rq = to_request(fence);
123
124	GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&((void)0)
125	rq->guc_prio != GUC_PRIO_FINI)((void)0);
126
127	i915_request_free_capture_list(fetch_and_zero(&rq->capture_list)({ typeof(&rq->capture_list) __T = (&rq->capture_list ); (&rq->capture_list) = (typeof(&rq->capture_list ))0; __T; }));
128	if (rq->batch_res) {
129	i915_vma_resource_put(rq->batch_res);
130	rq->batch_res = NULL((void *)0);
131	}
132
133	/*
134	* The request is put onto a RCU freelist (i.e. the address
135	* is immediately reused), mark the fences as being freed now.
136	* Otherwise the debugobjects for the fences are only marked as
137	* freed when the slab cache itself is freed, and so we would get
138	* caught trying to reuse dead objects.
139	*/
140	i915_sw_fence_fini(&rq->submit);
141	i915_sw_fence_fini(&rq->semaphore);
142
143	/*
144	* Keep one request on each engine for reserved use under mempressure.
145	*
146	* We do not hold a reference to the engine here and so have to be
147	* very careful in what rq->engine we poke. The virtual engine is
148	* referenced via the rq->context and we released that ref during
149	* i915_request_retire(), ergo we must not dereference a virtual
150	* engine here. Not that we would want to, as the only consumer of
151	* the reserved engine->request_pool is the power management parking,
152	* which must-not-fail, and that is only run on the physical engines.
153	*
154	* Since the request must have been executed to be have completed,
155	* we know that it will have been processed by the HW and will
156	* not be unsubmitted again, so rq->engine and rq->execution_mask
157	* at this point is stable. rq->execution_mask will be a single
158	* bit if the last and _only_ engine it could execution on was a
159	* physical engine, if it's multiple bits then it started on and
160	* could still be on a virtual engine. Thus if the mask is not a
161	* power-of-two we assume that rq->engine may still be a virtual
162	* engine and so a dangling invalid pointer that we cannot dereference
163	*
164	* For example, consider the flow of a bonded request through a virtual
165	* engine. The request is created with a wide engine mask (all engines
166	* that we might execute on). On processing the bond, the request mask
167	* is reduced to one or more engines. If the request is subsequently
168	* bound to a single engine, it will then be constrained to only
169	* execute on that engine and never returned to the virtual engine
170	* after timeslicing away, see __unwind_incomplete_requests(). Thus we
171	* know that if the rq->execution_mask is a single bit, rq->engine
172	* can be a physical engine with the exact corresponding mask.
173	*/
174	if (is_power_of_2(rq->execution_mask)(((rq->execution_mask) != 0) && (((rq->execution_mask ) - 1) & (rq->execution_mask)) == 0) &&
175	!cmpxchg(&rq->engine->request_pool, NULL, rq)__sync_val_compare_and_swap(&rq->engine->request_pool , ((void *)0), rq))
176	return;
177
178	#ifdef __linux__
179	kmem_cache_free(slab_requests, rq);
180	#else
181	pool_put(&slab_requests, rq);
182	#endif
183	}
184
185	const struct dma_fence_ops i915_fence_ops = {
186	.get_driver_name = i915_fence_get_driver_name,
187	.get_timeline_name = i915_fence_get_timeline_name,
188	.enable_signaling = i915_fence_enable_signaling,
189	.signaled = i915_fence_signaled,
190	.wait = i915_fence_wait,
191	.release = i915_fence_release,
192	};
193
194	static void irq_execute_cb(struct irq_work *wrk)
195	{
196	struct execute_cb cb = container_of(wrk, typeof(cb), work)({ const __typeof( ((typeof(cb) )0)->work ) __mptr = (wrk ); (typeof(cb) )( (char )__mptr - __builtin_offsetof(typeof (*cb), work) );});
197
198	i915_sw_fence_complete(cb->fence);
199	#ifdef __linux__
200	kmem_cache_free(slab_execute_cbs, cb);
201	#else
202	pool_put(&slab_execute_cbs, cb);
203	#endif
204	}
205
206	static __always_inlineinline __attribute__((__always_inline__)) void
207	__notify_execute_cb(struct i915_request rq, bool_Bool (fn)(struct irq_work *wrk))
208	{
209	struct execute_cb cb, cn;
210
211	if (llist_empty(&rq->execute_cb))
212	return;
213
214	llist_for_each_entry_safe(cb, cn,for (cb = ({ const __typeof( ((__typeof(cb) )0)->work.node .llist ) __mptr = ((llist_del_all(&rq->execute_cb))); (__typeof(cb) )( (char )__mptr - __builtin_offsetof(__typeof (cb), work.node.llist) );}); ((char )(cb) + __builtin_offsetof (typeof((cb)), work.node.llist)) != ((void )0) && ( cn = ({ const __typeof( ((__typeof(cb) )0)->work.node.llist ) __mptr = (cb->work.node.llist.next); (__typeof(cb) ) ( (char )__mptr - __builtin_offsetof(__typeof(*cb), work.node .llist) );}), cb); cb = cn)
215	llist_del_all(&rq->execute_cb),for (cb = ({ const __typeof( ((__typeof(cb) )0)->work.node .llist ) __mptr = ((llist_del_all(&rq->execute_cb))); (__typeof(cb) )( (char )__mptr - __builtin_offsetof(__typeof (cb), work.node.llist) );}); ((char )(cb) + __builtin_offsetof (typeof((cb)), work.node.llist)) != ((void )0) && ( cn = ({ const __typeof( ((__typeof(cb) )0)->work.node.llist ) __mptr = (cb->work.node.llist.next); (__typeof(cb) ) ( (char )__mptr - __builtin_offsetof(__typeof(*cb), work.node .llist) );}), cb); cb = cn)
216	work.node.llist)for (cb = ({ const __typeof( ((__typeof(cb) )0)->work.node .llist ) __mptr = ((llist_del_all(&rq->execute_cb))); (__typeof(cb) )( (char )__mptr - __builtin_offsetof(__typeof (cb), work.node.llist) );}); ((char )(cb) + __builtin_offsetof (typeof((cb)), work.node.llist)) != ((void )0) && ( cn = ({ const __typeof( ((__typeof(cb) )0)->work.node.llist ) __mptr = (cb->work.node.llist.next); (__typeof(cb) ) ( (char )__mptr - __builtin_offsetof(__typeof(*cb), work.node .llist) );}), cb); cb = cn)
217	fn(&cb->work);
218	}
219
220	static void __notify_execute_cb_irq(struct i915_request *rq)
221	{
222	__notify_execute_cb(rq, irq_work_queue);
223	}
224
225	static bool_Bool irq_work_imm(struct irq_work *wrk)
226	{
227	#ifdef __linux__
228	wrk->func(wrk);
229	#else
230	wrk->task.t_func(wrk);
231	#endif
232	return false0;
233	}
234
235	void i915_request_notify_execute_cb_imm(struct i915_request *rq)
236	{
237	__notify_execute_cb(rq, irq_work_imm);
238	}
239
240	static void __i915_request_fill(struct i915_request *rq, u8 val)
241	{
242	void *vaddr = rq->ring->vaddr;
243	u32 head;
244
245	head = rq->infix;
246	if (rq->postfix < head) {
247	memset(vaddr + head, val, rq->ring->size - head)__builtin_memset((vaddr + head), (val), (rq->ring->size - head));
248	head = 0;
249	}
250	memset(vaddr + head, val, rq->postfix - head)__builtin_memset((vaddr + head), (val), (rq->postfix - head ));
251	}
252
253	/**
254	* i915_request_active_engine
255	* @rq: request to inspect
256	* @active: pointer in which to return the active engine
257	*
258	* Fills the currently active engine to the @active pointer if the request
259	* is active and still not completed.
260	*
261	* Returns true if request was active or false otherwise.
262	*/
263	bool_Bool
264	i915_request_active_engine(struct i915_request *rq,
265	struct intel_engine_cs **active)
266	{
267	struct intel_engine_cs engine, locked;
268	bool_Bool ret = false0;
269
270	/*
271	* Serialise with __i915_request_submit() so that it sees
272	* is-banned?, or we know the request is already inflight.
273	*
274	* Note that rq->engine is unstable, and so we double
275	* check that we have acquired the lock on the final engine.
276	*/
277	locked = READ_ONCE(rq->engine)({ typeof(rq->engine) __tmp = (volatile typeof(rq->engine ) )&(rq->engine); membar_datadep_consumer(); __tmp; } );
278	spin_lock_irq(&locked->sched_engine->lock)mtx_enter(&locked->sched_engine->lock);
279	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))__builtin_expect(!!(locked != (engine = ({ typeof(rq->engine ) __tmp = (volatile typeof(rq->engine) )&(rq->engine ); membar_datadep_consumer(); __tmp; }))), 0)) {
280	spin_unlock(&locked->sched_engine->lock)mtx_leave(&locked->sched_engine->lock);
281	locked = engine;
282	spin_lock(&locked->sched_engine->lock)mtx_enter(&locked->sched_engine->lock);
283	}
284
285	if (i915_request_is_active(rq)) {
286	if (!__i915_request_is_complete(rq))
287	*active = locked;
288	ret = true1;
289	}
290
291	spin_unlock_irq(&locked->sched_engine->lock)mtx_leave(&locked->sched_engine->lock);
292
293	return ret;
294	}
295
296	static void __rq_init_watchdog(struct i915_request *rq)
297	{
298	rq->watchdog.timer.to_func = NULL((void *)0);
299	}
300
301	#ifdef __linux__
302
303	static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
304	{
305	struct i915_request *rq =
306	container_of(hrtimer, struct i915_request, watchdog.timer)({ const __typeof( ((struct i915_request )0)->watchdog.timer ) __mptr = (hrtimer); (struct i915_request )( (char )__mptr - __builtin_offsetof(struct i915_request, watchdog.timer) ); });
307	struct intel_gt *gt = rq->engine->gt;
308
309	if (!i915_request_completed(rq)) {
310	if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
311	schedule_work(&gt->watchdog.work);
312	} else {
313	i915_request_put(rq);
314	}
315
316	return HRTIMER_NORESTART;
317	}
318
319	#else
320
321	static void
322	__rq_watchdog_expired(void *arg)
323	{
324	struct i915_request rq = (struct i915_request )arg;
325	struct intel_gt *gt = rq->engine->gt;
326
327	if (!i915_request_completed(rq)) {
328	if (llist_add(&rq->watchdog.link, &gt->watchdog.list))
329	schedule_work(&gt->watchdog.work);
330	} else {
331	i915_request_put(rq);
332	}
333	}
334
335	#endif
336
337	static void __rq_arm_watchdog(struct i915_request *rq)
338	{
339	struct i915_request_watchdog *wdg = &rq->watchdog;
340	struct intel_context *ce = rq->context;
341
342	if (!ce->watchdog.timeout_us)
343	return;
344
345	i915_request_get(rq);
346
347	#ifdef __linux__
348	hrtimer_init(&wdg->timer, CLOCK_MONOTONIC3, HRTIMER_MODE_REL1);
349	wdg->timer.function = __rq_watchdog_expired;
350	hrtimer_start_range_ns(&wdg->timer,
351	ns_to_ktime(ce->watchdog.timeout_us *
352	NSEC_PER_USEC1000L),
353	NSEC_PER_MSEC1000000L,
354	HRTIMER_MODE_REL1);
355	#else
356	timeout_set(&wdg->timer, __rq_watchdog_expired, rq);
357	timeout_add_msec(&wdg->timer, 1);
358	#endif
359	}
360
361	static void __rq_cancel_watchdog(struct i915_request *rq)
362	{
363	struct i915_request_watchdog *wdg = &rq->watchdog;
364
365	if (wdg->timer.to_func && hrtimer_try_to_cancel(&wdg->timer)timeout_del(&wdg->timer) > 0)
366	i915_request_put(rq);
367	}
368
369	#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)1
370
371	/**
372	* i915_request_free_capture_list - Free a capture list
373	* @capture: Pointer to the first list item or NULL
374	*
375	*/
376	void i915_request_free_capture_list(struct i915_capture_list *capture)
377	{
378	while (capture) {
379	struct i915_capture_list *next = capture->next;
380
381	i915_vma_resource_put(capture->vma_res);
382	kfree(capture);
383	capture = next;
384	}
385	}
386
387	#define assert_capture_list_is_null(_rq)((void)0) GEM_BUG_ON((_rq)->capture_list)((void)0)
388
389	#define clear_capture_list(_rq)((_rq)->capture_list = ((void )0)) ((_rq)->capture_list = NULL((void )0))
390
391	#else
392
393	#define i915_request_free_capture_list(_a) do {} while (0)
394
395	#define assert_capture_list_is_null(_a)((void)0) do {} while (0)
396
397	#define clear_capture_list(_rq)((_rq)->capture_list = ((void *)0)) do {} while (0)
398
399	#endif
400
401	bool_Bool i915_request_retire(struct i915_request *rq)
402	{
403	if (!__i915_request_is_complete(rq))
404	return false0;
405
406	RQ_TRACE(rq, "\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
407
408	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit))((void)0);
409	trace_i915_request_retire(rq);
410	i915_request_mark_complete(rq);
411
412	__rq_cancel_watchdog(rq);
413
414	/*
415	* We know the GPU must have read the request to have
416	* sent us the seqno + interrupt, so use the position
417	* of tail of the request to update the last known position
418	* of the GPU head.
419	*
420	* Note this requires that we are always called in request
421	* completion order.
422	*/
423	GEM_BUG_ON(!list_is_first(&rq->link,((void)0)
424	&i915_request_timeline(rq)->requests))((void)0);
425	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
426	/* Poison before we release our space in the ring */
427	__i915_request_fill(rq, POISON_FREE0xdf);
428	rq->ring->head = rq->postfix;
429
430	if (!i915_request_signaled(rq)) {
431	spin_lock_irq(&rq->lock)mtx_enter(&rq->lock);
432	dma_fence_signal_locked(&rq->fence);
433	spin_unlock_irq(&rq->lock)mtx_leave(&rq->lock);
434	}
435
436	if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags))
437	intel_rps_dec_waiters(&rq->engine->gt->rps);
438
439	/*
440	* We only loosely track inflight requests across preemption,
441	* and so we may find ourselves attempting to retire a _completed_
442	* request that we have removed from the HW and put back on a run
443	* queue.
444	*
445	* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
446	* after removing the breadcrumb and signaling it, so that we do not
447	* inadvertently attach the breadcrumb to a completed request.
448	*/
449	rq->engine->remove_active_request(rq);
450	GEM_BUG_ON(!llist_empty(&rq->execute_cb))((void)0);
451
452	__list_del_entry(&rq->link)list_del(&rq->link); /* poison neither prev/next (RCU walks) */
453
454	intel_context_exit(rq->context);
455	intel_context_unpin(rq->context);
456
457	i915_sched_node_fini(&rq->sched);
458	i915_request_put(rq);
459
460	return true1;
461	}
462
463	void i915_request_retire_upto(struct i915_request *rq)
464	{
465	struct intel_timeline * const tl = i915_request_timeline(rq);
466	struct i915_request *tmp;
467
468	RQ_TRACE(rq, "\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
469	GEM_BUG_ON(!__i915_request_is_complete(rq))((void)0);
470
471	do {
472	tmp = list_first_entry(&tl->requests, typeof(tmp), link)({ const __typeof( ((typeof(tmp) )0)->link ) __mptr = ( (&tl->requests)->next); (typeof(tmp) )( (char )__mptr - __builtin_offsetof(typeof(tmp), link) );});
473	GEM_BUG_ON(!i915_request_completed(tmp))((void)0);
474	} while (i915_request_retire(tmp) && tmp != rq);
475	}
476
477	static struct i915_request * const *
478	__engine_active(struct intel_engine_cs *engine)
479	{
480	return READ_ONCE(engine->execlists.active)({ typeof(engine->execlists.active) __tmp = (volatile typeof (engine->execlists.active) )&(engine->execlists.active ); membar_datadep_consumer(); __tmp; });
481	}
482
483	static bool_Bool __request_in_flight(const struct i915_request *signal)
484	{
485	struct i915_request * const port, rq;
486	bool_Bool inflight = false0;
487
488	if (!i915_request_is_ready(signal))
489	return false0;
490
491	/*
492	* Even if we have unwound the request, it may still be on
493	* the GPU (preempt-to-busy). If that request is inside an
494	* unpreemptible critical section, it will not be removed. Some
495	* GPU functions may even be stuck waiting for the paired request
496	* (__await_execution) to be submitted and cannot be preempted
497	* until the bond is executing.
498	*
499	* As we know that there are always preemption points between
500	* requests, we know that only the currently executing request
501	* may be still active even though we have cleared the flag.
502	* However, we can't rely on our tracking of ELSP[0] to know
503	* which request is currently active and so maybe stuck, as
504	* the tracking maybe an event behind. Instead assume that
505	* if the context is still inflight, then it is still active
506	* even if the active flag has been cleared.
507	*
508	* To further complicate matters, if there a pending promotion, the HW
509	* may either perform a context switch to the second inflight execlists,
510	* or it may switch to the pending set of execlists. In the case of the
511	* latter, it may send the ACK and we process the event copying the
512	* pending[] over top of inflight[], _overwriting_ our *active. Since
513	* this implies the HW is arbitrating and not struck in *active, we do
514	* not worry about complete accuracy, but we do require no read/write
515	* tearing of the pointer [the read of the pointer must be valid, even
516	* as the array is being overwritten, for which we require the writes
517	* to avoid tearing.]
518	*
519	* Note that the read of *execlists->active may race with the promotion
520	* of execlists->pending[] to execlists->inflight[], overwritting
521	* the value at *execlists->active. This is fine. The promotion implies
522	* that we received an ACK from the HW, and so the context is not
523	* stuck -- if we do not see ourselves in *active, the inflight status
524	* is valid. If instead we see ourselves being copied into *active,
525	* we are inflight and may signal the callback.
526	*/
527	if (!intel_context_inflight(signal->context)({ unsigned long __v = (unsigned long)(({ typeof((signal-> context)->inflight) __tmp = (volatile typeof((signal-> context)->inflight) )&((signal->context)->inflight ); membar_datadep_consumer(); __tmp; })); (typeof(({ typeof(( signal->context)->inflight) __tmp = (volatile typeof(( signal->context)->inflight) )&((signal->context )->inflight); membar_datadep_consumer(); __tmp; })))(__v & -(1UL << (3))); }))
528	return false0;
529
530	rcu_read_lock();
531	for (port = __engine_active(signal->engine);
532	(rq = READ_ONCE(port)({ typeof(port) __tmp = (volatile typeof(port) )&(port ); membar_datadep_consumer(); __tmp; })); /* may race with promotion of pending[] */
533	port++) {
534	if (rq->context == signal->context) {
535	inflight = i915_seqno_passed(rq->fence.seqno,
536	signal->fence.seqno);
537	break;
538	}
539	}
540	rcu_read_unlock();
541
542	return inflight;
543	}
544
545	static int
546	__await_execution(struct i915_request *rq,
547	struct i915_request *signal,
548	gfp_t gfp)
549	{
550	struct execute_cb *cb;
551
552	if (i915_request_is_active(signal))
553	return 0;
554
555	#ifdef __linux__
556	cb = kmem_cache_alloc(slab_execute_cbs, gfp);
557	#else
558	cb = pool_get(&slab_execute_cbs,
559	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
560	#endif
561	if (!cb)
562	return -ENOMEM12;
563
564	cb->fence = &rq->submit;
565	i915_sw_fence_await(cb->fence);
566	init_irq_work(&cb->work, irq_execute_cb);
567
568	/*
569	* Register the callback first, then see if the signaler is already
570	* active. This ensures that if we race with the
571	* __notify_execute_cb from i915_request_submit() and we are not
572	* included in that list, we get a second bite of the cherry and
573	* execute it ourselves. After this point, a future
574	* i915_request_submit() will notify us.
575	*
576	* In i915_request_retire() we set the ACTIVE bit on a completed
577	* request (then flush the execute_cb). So by registering the
578	* callback first, then checking the ACTIVE bit, we serialise with
579	* the completed/retired request.
580	*/
581	if (llist_add(&cb->work.node.llist, &signal->execute_cb)) {
582	if (i915_request_is_active(signal) \|\|
583	__request_in_flight(signal))
584	i915_request_notify_execute_cb_imm(signal);
585	}
586
587	return 0;
588	}
589
590	static bool_Bool fatal_error(int error)
591	{
592	switch (error) {
593	case 0: /* not an error! */
594	case -EAGAIN35: /* innocent victim of a GT reset (__i915_request_reset) */
595	case -ETIMEDOUT60: /* waiting for Godot (timer_i915_sw_fence_wake) */
596	return false0;
597	default:
598	return true1;
599	}
600	}
601
602	void __i915_request_skip(struct i915_request *rq)
603	{
604	GEM_BUG_ON(!fatal_error(rq->fence.error))((void)0);
605
606	if (rq->infix == rq->postfix)
607	return;
608
609	RQ_TRACE(rq, "error: %d\n", rq->fence.error)do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
610
611	/*
612	* As this request likely depends on state from the lost
613	* context, clear out all the user operations leaving the
614	* breadcrumb at the end (so we get the fence notifications).
615	*/
616	__i915_request_fill(rq, 0);
617	rq->infix = rq->postfix;
618	}
619
620	bool_Bool i915_request_set_error_once(struct i915_request *rq, int error)
621	{
622	int old;
623
624	GEM_BUG_ON(!IS_ERR_VALUE((long)error))((void)0);
625
626	if (i915_request_signaled(rq))
627	return false0;
628
629	old = READ_ONCE(rq->fence.error)({ typeof(rq->fence.error) __tmp = (volatile typeof(rq-> fence.error) )&(rq->fence.error); membar_datadep_consumer (); __tmp; });
630	do {
631	if (fatal_error(old))
632	return false0;
633	} while (!try_cmpxchg(&rq->fence.error, &old, error)({ __typeof(&rq->fence.error) __op = (__typeof((&rq ->fence.error)))(&old); __typeof((&rq->fence.error )) __o = __op; __typeof((&rq->fence.error)) __p = __sync_val_compare_and_swap ((&rq->fence.error), (__o), (error)); if (__p != __o) __op = __p; (__p == __o); }));
634
635	return true1;
636	}
637
638	struct i915_request i915_request_mark_eio(struct i915_request rq)
639	{
640	if (__i915_request_is_complete(rq))
641	return NULL((void *)0);
642
643	GEM_BUG_ON(i915_request_signaled(rq))((void)0);
644
645	/* As soon as the request is completed, it may be retired */
646	rq = i915_request_get(rq);
647
648	i915_request_set_error_once(rq, -EIO5);
649	i915_request_mark_complete(rq);
650
651	return rq;
652	}
653
654	bool_Bool __i915_request_submit(struct i915_request *request)
655	{
656	struct intel_engine_cs *engine = request->engine;
657	bool_Bool result = false0;
658
659	RQ_TRACE(request, "\n")do { const struct i915_request rq__ = (request); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
660
661	GEM_BUG_ON(!irqs_disabled())((void)0);
662	lockdep_assert_held(&engine->sched_engine->lock)do { (void)(&engine->sched_engine->lock); } while(0 );
663
664	/*
665	* With the advent of preempt-to-busy, we frequently encounter
666	* requests that we have unsubmitted from HW, but left running
667	* until the next ack and so have completed in the meantime. On
668	* resubmission of that completed request, we can skip
669	* updating the payload, and execlists can even skip submitting
670	* the request.
671	*
672	* We must remove the request from the caller's priority queue,
673	* and the caller must only call us when the request is in their
674	* priority queue, under the sched_engine->lock. This ensures that the
675	* request has not yet been retired and we can safely move
676	* the request into the engine->active.list where it will be
677	* dropped upon retiring. (Otherwise if resubmit a retired
678	* request, this would be a horrible use-after-free.)
679	*/
680	if (__i915_request_is_complete(request)) {
681	list_del_init(&request->sched.link);
682	goto active;
683	}
684
685	if (unlikely(!intel_context_is_schedulable(request->context))__builtin_expect(!!(!intel_context_is_schedulable(request-> context)), 0))
686	i915_request_set_error_once(request, -EIO5);
687
688	if (unlikely(fatal_error(request->fence.error))__builtin_expect(!!(fatal_error(request->fence.error)), 0))
689	__i915_request_skip(request);
690
691	/*
692	* Are we using semaphores when the gpu is already saturated?
693	*
694	* Using semaphores incurs a cost in having the GPU poll a
695	* memory location, busywaiting for it to change. The continual
696	* memory reads can have a noticeable impact on the rest of the
697	* system with the extra bus traffic, stalling the cpu as it too
698	* tries to access memory across the bus (perf stat -e bus-cycles).
699	*
700	* If we installed a semaphore on this request and we only submit
701	* the request after the signaler completed, that indicates the
702	* system is overloaded and using semaphores at this time only
703	* increases the amount of work we are doing. If so, we disable
704	* further use of semaphores until we are idle again, whence we
705	* optimistically try again.
706	*/
707	if (request->sched.semaphores &&
708	i915_sw_fence_signaled(&request->semaphore))
709	engine->saturated \|= request->sched.semaphores;
710
711	engine->emit_fini_breadcrumb(request,
712	request->ring->vaddr + request->postfix);
713
714	trace_i915_request_execute(request);
715	if (engine->bump_serial)
716	engine->bump_serial(engine);
717	else
718	engine->serial++;
719
720	result = true1;
721
722	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))((void)0);
723	engine->add_active_request(request);
724	active:
725	clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
726	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
727
728	/*
729	* XXX Rollback bonded-execution on __i915_request_unsubmit()?
730	*
731	* In the future, perhaps when we have an active time-slicing scheduler,
732	* it will be interesting to unsubmit parallel execution and remove
733	* busywaits from the GPU until their master is restarted. This is
734	* quite hairy, we have to carefully rollback the fence and do a
735	* preempt-to-idle cycle on the target engine, all the while the
736	* master execute_cb may refire.
737	*/
738	__notify_execute_cb_irq(request);
739
740	/* We may be recursing from the signal callback of another i915 fence */
741	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
742	i915_request_enable_breadcrumb(request);
743
744	return result;
745	}
746
747	void i915_request_submit(struct i915_request *request)
748	{
749	struct intel_engine_cs *engine = request->engine;
750	unsigned long flags;
751
752	/* Will be called from irq-context when using foreign fences. */
753	spin_lock_irqsave(&engine->sched_engine->lock, flags)do { flags = 0; mtx_enter(&engine->sched_engine->lock ); } while (0);
754
755	__i915_request_submit(request);
756
757	spin_unlock_irqrestore(&engine->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&engine->sched_engine-> lock); } while (0);
758	}
759
760	void __i915_request_unsubmit(struct i915_request *request)
761	{
762	struct intel_engine_cs *engine = request->engine;
763
764	/*
765	* Only unwind in reverse order, required so that the per-context list
766	* is kept in seqno/ring order.
767	*/
768	RQ_TRACE(request, "\n")do { const struct i915_request rq__ = (request); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
769
770	GEM_BUG_ON(!irqs_disabled())((void)0);
771	lockdep_assert_held(&engine->sched_engine->lock)do { (void)(&engine->sched_engine->lock); } while(0 );
772
773	/*
774	* Before we remove this breadcrumb from the signal list, we have
775	* to ensure that a concurrent dma_fence_enable_signaling() does not
776	* attach itself. We first mark the request as no longer active and
777	* make sure that is visible to other cores, and then remove the
778	* breadcrumb if attached.
779	*/
780	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))((void)0);
781	clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
782	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
783	i915_request_cancel_breadcrumb(request);
784
785	/* We've already spun, don't charge on resubmitting. */
786	if (request->sched.semaphores && __i915_request_has_started(request))
787	request->sched.semaphores = 0;
788
789	/*
790	* We don't need to wake_up any waiters on request->execute, they
791	* will get woken by any other event or us re-adding this request
792	* to the engine timeline (__i915_request_submit()). The waiters
793	* should be quite adapt at finding that the request now has a new
794	* global_seqno to the one they went to sleep on.
795	*/
796	}
797
798	void i915_request_unsubmit(struct i915_request *request)
799	{
800	struct intel_engine_cs *engine = request->engine;
801	unsigned long flags;
802
803	/* Will be called from irq-context when using foreign fences. */
804	spin_lock_irqsave(&engine->sched_engine->lock, flags)do { flags = 0; mtx_enter(&engine->sched_engine->lock ); } while (0);
805
806	__i915_request_unsubmit(request);
807
808	spin_unlock_irqrestore(&engine->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&engine->sched_engine-> lock); } while (0);
809	}
810
811	void i915_request_cancel(struct i915_request *rq, int error)
812	{
813	if (!i915_request_set_error_once(rq, error))
814	return;
815
816	set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
817
818	intel_context_cancel_request(rq->context, rq);
819	}
820
821	static int
822	submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
823	{
824	struct i915_request *request =
825	container_of(fence, typeof(request), submit)({ const __typeof( ((typeof(request) )0)->submit ) __mptr = (fence); (typeof(request) )( (char )__mptr - __builtin_offsetof (typeof(request), submit) );});
826
827	switch (state) {
828	case FENCE_COMPLETE:
829	trace_i915_request_submit(request);
830
831	if (unlikely(fence->error)__builtin_expect(!!(fence->error), 0))
832	i915_request_set_error_once(request, fence->error);
833	else
834	__rq_arm_watchdog(request);
835
836	/*
837	* We need to serialize use of the submit_request() callback
838	* with its hotplugging performed during an emergency
839	* i915_gem_set_wedged(). We use the RCU mechanism to mark the
840	* critical section in order to force i915_gem_set_wedged() to
841	* wait until the submit_request() is completed before
842	* proceeding.
843	*/
844	rcu_read_lock();
845	request->engine->submit_request(request);
846	rcu_read_unlock();
847	break;
848
849	case FENCE_FREE:
850	i915_request_put(request);
851	break;
852	}
853
854	return NOTIFY_DONE0;
855	}
856
857	static int
858	semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
859	{
860	struct i915_request rq = container_of(fence, typeof(rq), semaphore)({ const __typeof( ((typeof(rq) )0)->semaphore ) __mptr = (fence); (typeof(rq) )( (char )__mptr - __builtin_offsetof (typeof(*rq), semaphore) );});
861
862	switch (state) {
863	case FENCE_COMPLETE:
864	break;
865
866	case FENCE_FREE:
867	i915_request_put(rq);
868	break;
869	}
870
871	return NOTIFY_DONE0;
872	}
873
874	static void retire_requests(struct intel_timeline *tl)
875	{
876	struct i915_request rq, rn;
877
878	list_for_each_entry_safe(rq, rn, &tl->requests, link)for (rq = ({ const __typeof( ((__typeof(rq) )0)->link ) * __mptr = ((&tl->requests)->next); (__typeof(rq) ) ( (char )__mptr - __builtin_offsetof(__typeof(rq), link) ); }), rn = ({ const __typeof( ((__typeof(rq) )0)->link ) * __mptr = (rq->link.next); (__typeof(rq) )( (char )__mptr - __builtin_offsetof(__typeof(rq), link) );}); &rq-> link != (&tl->requests); rq = rn, rn = ({ const __typeof ( ((__typeof(rn) )0)->link ) __mptr = (rn->link.next ); (__typeof(rn) )( (char )__mptr - __builtin_offsetof(__typeof (*rn), link) );}))
879	if (!i915_request_retire(rq))
880	break;
881	}
882
883	static void __i915_request_ctor(void *);
884
885	static noinline__attribute__((__noinline__)) struct i915_request *
886	request_alloc_slow(struct intel_timeline *tl,
887	struct i915_request **rsvd,
888	gfp_t gfp)
889	{
890	struct i915_request *rq;
891
892	/* If we cannot wait, dip into our reserves */
893	if (!gfpflags_allow_blocking(gfp)) {
894	rq = xchg(rsvd, NULL)__sync_lock_test_and_set(rsvd, ((void *)0));
895	if (!rq) /* Use the normal failure path for one final WARN */
896	goto out;
897
898	return rq;
899	}
900
901	if (list_empty(&tl->requests))
902	goto out;
903
904	/* Move our oldest request to the slab-cache (if not in use!) */
905	rq = list_first_entry(&tl->requests, typeof(rq), link)({ const __typeof( ((typeof(rq) )0)->link ) __mptr = (( &tl->requests)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof(rq), link) );});
906	i915_request_retire(rq);
907
908	#ifdef __linux__
909	rq = kmem_cache_alloc(slab_requests,
910	gfp \| __GFP_RETRY_MAYFAIL0 \| __GFP_NOWARN0);
911	#else
912	rq = pool_get(&slab_requests,
913	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
914	if (rq)
915	__i915_request_ctor(rq);
916	#endif
917	if (rq)
918	return rq;
919
920	/* Ratelimit ourselves to prevent oom from malicious clients */
921	rq = list_last_entry(&tl->requests, typeof(rq), link)({ const __typeof( ((typeof(rq) )0)->link ) __mptr = (( &tl->requests)->prev); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof(rq), link) );});
922	cond_synchronize_rcu(rq->rcustate);
923
924	/* Retire our old requests in the hope that we free some */
925	retire_requests(tl);
926
927	out:
928	#ifdef __linux__
929	return kmem_cache_alloc(slab_requests, gfp);
930	#else
931	rq = pool_get(&slab_requests,
932	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
933	if (rq)
934	__i915_request_ctor(rq);
935	return rq;
936	#endif
937	}
938
939	static void __i915_request_ctor(void *arg)
940	{
941	struct i915_request *rq = arg;
942
943	/*
944	* witness does not understand spin_lock_nested()
945	* order reversal in i915 with this lock
946	*/
947	mtx_init_flags(&rq->lock, IPL_TTY, NULL, MTX_NOWITNESS)do { (void)(((void *)0)); (void)(0x01); __mtx_init((&rq-> lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0);
948	i915_sched_node_init(&rq->sched);
949	i915_sw_fence_init(&rq->submit, submit_notify)do { extern char _ctassert[(!((submit_notify) == ((void )0)) ) ? 1 : -1 ] __attribute__((__unused__)); __i915_sw_fence_init ((&rq->submit), (submit_notify), ((void )0), ((void * )0)); } while (0);
950	i915_sw_fence_init(&rq->semaphore, semaphore_notify)do { extern char _ctassert[(!((semaphore_notify) == ((void ) 0))) ? 1 : -1 ] __attribute__((__unused__)); __i915_sw_fence_init ((&rq->semaphore), (semaphore_notify), ((void )0), (( void *)0)); } while (0);
951
952	clear_capture_list(rq)((rq)->capture_list = ((void *)0));
953	rq->batch_res = NULL((void *)0);
954
955	init_llist_head(&rq->execute_cb);
956	}
957
958	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
959	#define clear_batch_ptr(_rq)do {} while (0) ((_rq)->batch = NULL((void *)0))
960	#else
961	#define clear_batch_ptr(_a)do {} while (0) do {} while (0)
962	#endif
963
964	struct i915_request *
965	__i915_request_create(struct intel_context *ce, gfp_t gfp)
966	{
967	struct intel_timeline *tl = ce->timeline;
968	struct i915_request *rq;
969	u32 seqno;
970	int ret;
971
972	might_alloc(gfp);
973
974	/* Check that the caller provided an already pinned context */
975	__intel_context_pin(ce);
976
977	/*
978	* Beware: Dragons be flying overhead.
979	*
980	* We use RCU to look up requests in flight. The lookups may
981	* race with the request being allocated from the slab freelist.
982	* That is the request we are writing to here, may be in the process
983	* of being read by __i915_active_request_get_rcu(). As such,
984	* we have to be very careful when overwriting the contents. During
985	* the RCU lookup, we change chase the request->engine pointer,
986	* read the request->global_seqno and increment the reference count.
987	*
988	* The reference count is incremented atomically. If it is zero,
989	* the lookup knows the request is unallocated and complete. Otherwise,
990	* it is either still in use, or has been reallocated and reset
991	* with dma_fence_init(). This increment is safe for release as we
992	* check that the request we have a reference to and matches the active
993	* request.
994	*
995	* Before we increment the refcount, we chase the request->engine
996	* pointer. We must not call kmem_cache_zalloc() or else we set
997	* that pointer to NULL and cause a crash during the lookup. If
998	* we see the request is completed (based on the value of the
999	* old engine and seqno), the lookup is complete and reports NULL.
1000	* If we decide the request is not completed (new engine or seqno),
1001	* then we grab a reference and double check that it is still the
1002	* active request - which it won't be and restart the lookup.
1003	*
1004	* Do not use kmem_cache_zalloc() here!
1005	*/
1006	#ifdef __linux__
1007	rq = kmem_cache_alloc(slab_requests,
1008	gfp \| __GFP_RETRY_MAYFAIL0 \| __GFP_NOWARN0);
1009	#else
1010	rq = pool_get(&slab_requests,
1011	(gfp & GFP_NOWAIT0x0002) ? PR_NOWAIT0x0002 : PR_WAITOK0x0001);
1012	if (rq)
1013	__i915_request_ctor(rq);
1014	#endif
1015	if (unlikely(!rq)__builtin_expect(!!(!rq), 0)) {
1016	rq = request_alloc_slow(tl, &ce->engine->request_pool, gfp);
1017	if (!rq) {
1018	ret = -ENOMEM12;
1019	goto err_unreserve;
1020	}
1021	}
1022
1023	rq->context = ce;
1024	rq->engine = ce->engine;
1025	rq->ring = ce->ring;
1026	rq->execution_mask = ce->engine->mask;
1027	rq->i915 = ce->engine->i915;
1028
1029	ret = intel_timeline_get_seqno(tl, rq, &seqno);
1030	if (ret)
1031	goto err_free;
1032
1033	dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock,
1034	tl->fence_context, seqno);
1035
1036	RCU_INIT_POINTER(rq->timeline, tl)do { (rq->timeline) = (tl); } while(0);
1037	rq->hwsp_seqno = tl->hwsp_seqno;
1038	GEM_BUG_ON(__i915_request_is_complete(rq))((void)0);
1039
1040	rq->rcustate = get_state_synchronize_rcu()0; /* acts as smp_mb() */
1041
1042	rq->guc_prio = GUC_PRIO_INIT0xff;
1043
1044	/* We bump the ref for the fence chain */
1045	i915_sw_fence_reinit(&i915_request_get(rq)->submit);
1046	i915_sw_fence_reinit(&i915_request_get(rq)->semaphore);
1047
1048	i915_sched_node_reinit(&rq->sched);
1049
1050	/* No zalloc, everything must be cleared after use */
1051	clear_batch_ptr(rq)do {} while (0);
1052	__rq_init_watchdog(rq);
1053	assert_capture_list_is_null(rq)((void)0);
1054	GEM_BUG_ON(!llist_empty(&rq->execute_cb))((void)0);
1055	GEM_BUG_ON(rq->batch_res)((void)0);
1056
1057	/*
1058	* Reserve space in the ring buffer for all the commands required to
1059	* eventually emit this request. This is to guarantee that the
1060	* i915_request_add() call can't fail. Note that the reserve may need
1061	* to be redone if the request is not actually submitted straight
1062	* away, e.g. because a GPU scheduler has deferred it.
1063	*
1064	* Note that due to how we add reserved_space to intel_ring_begin()
1065	* we need to double our request to ensure that if we need to wrap
1066	* around inside i915_request_add() there is sufficient space at
1067	* the beginning of the ring as well.
1068	*/
1069	rq->reserved_space =
1070	2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
1071
1072	/*
1073	* Record the position of the start of the request so that
1074	* should we detect the updated seqno part-way through the
1075	* GPU processing the request, we never over-estimate the
1076	* position of the head.
1077	*/
1078	rq->head = rq->ring->emit;
1079
1080	ret = rq->engine->request_alloc(rq);
1081	if (ret)
1082	goto err_unwind;
1083
1084	rq->infix = rq->ring->emit; /* end of header; start of user payload */
1085
1086	intel_context_mark_active(ce);
1087	list_add_tail_rcu(&rq->link, &tl->requests)list_add_tail(&rq->link, &tl->requests);
1088
1089	return rq;
1090
1091	err_unwind:
1092	ce->ring->emit = rq->head;
1093
1094	/* Make sure we didn't add ourselves to external state before freeing */
1095	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list))((void)0);
1096	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list))((void)0);
1097
1098	err_free:
1099	#ifdef __linux__
1100	kmem_cache_free(slab_requests, rq);
1101	#else
1102	pool_put(&slab_requests, rq);
1103	#endif
1104	err_unreserve:
1105	intel_context_unpin(ce);
1106	return ERR_PTR(ret);
1107	}
1108
1109	struct i915_request *
1110	i915_request_create(struct intel_context *ce)
1111	{
1112	struct i915_request *rq;
1113	struct intel_timeline *tl;
1114
1115	tl = intel_context_timeline_lock(ce);
1116	if (IS_ERR(tl))
1117	return ERR_CAST(tl);
1118
1119	/* Move our oldest request to the slab-cache (if not in use!) */
1120	rq = list_first_entry(&tl->requests, typeof(rq), link)({ const __typeof( ((typeof(rq) )0)->link ) __mptr = (( &tl->requests)->next); (typeof(rq) )( (char )__mptr - __builtin_offsetof(typeof(rq), link) );});
1121	if (!list_is_last(&rq->link, &tl->requests))
1122	i915_request_retire(rq);
1123
1124	intel_context_enter(ce);
1125	rq = __i915_request_create(ce, GFP_KERNEL(0x0001 \| 0x0004));
1126	intel_context_exit(ce); /* active reference transferred to request */
1127	if (IS_ERR(rq))
1128	goto err_unlock;
1129
1130	/* Check that we do not interrupt ourselves with a new request */
1131	rq->cookie = lockdep_pin_lock(&tl->mutex)({ struct pin_cookie pc = {}; pc; });
1132
1133	return rq;
1134
1135	err_unlock:
1136	intel_context_timeline_unlock(tl);
1137	return rq;
1138	}
1139
1140	static int
1141	i915_request_await_start(struct i915_request rq, struct i915_request signal)
1142	{
1143	struct dma_fence *fence;
1144	int err;
1145
1146	if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)(signal->timeline))
1147	return 0;
1148
1149	if (i915_request_started(signal))
1150	return 0;
1151
1152	/*
1153	* The caller holds a reference on @signal, but we do not serialise
1154	* against it being retired and removed from the lists.
1155	*
1156	* We do not hold a reference to the request before @signal, and
1157	* so must be very careful to ensure that it is not _recycled_ as
1158	* we follow the link backwards.
1159	*/
1160	fence = NULL((void *)0);
1161	rcu_read_lock();
1162	do {
1163	struct list_head pos = READ_ONCE(signal->link.prev)({ typeof(signal->link.prev) __tmp = (volatile typeof(signal ->link.prev) *)&(signal->link.prev); membar_datadep_consumer (); __tmp; });
1164	struct i915_request *prev;
1165
1166	/* Confirm signal has not been retired, the link is valid */
1167	if (unlikely(__i915_request_has_started(signal))__builtin_expect(!!(__i915_request_has_started(signal)), 0))
1168	break;
1169
1170	/* Is signal the earliest request on its timeline? */
1171	if (pos == &rcu_dereference(signal->timeline)(signal->timeline)->requests)
1172	break;
1173
1174	/*
1175	* Peek at the request before us in the timeline. That
1176	* request will only be valid before it is retired, so
1177	* after acquiring a reference to it, confirm that it is
1178	* still part of the signaler's timeline.
1179	*/
1180	prev = list_entry(pos, typeof(prev), link)({ const __typeof( ((typeof(prev) )0)->link ) __mptr = ( pos); (typeof(prev) )( (char )__mptr - __builtin_offsetof( typeof(prev), link) );});
1181	if (!i915_request_get_rcu(prev))
1182	break;
1183
1184	/* After the strong barrier, confirm prev is still attached */
1185	if (unlikely(READ_ONCE(prev->link.next) != &signal->link)__builtin_expect(!!(({ typeof(prev->link.next) __tmp = (volatile typeof(prev->link.next) )&(prev->link.next); membar_datadep_consumer (); __tmp; }) != &signal->link), 0)) {
1186	i915_request_put(prev);
1187	break;
1188	}
1189
1190	fence = &prev->fence;
1191	} while (0);
1192	rcu_read_unlock();
1193	if (!fence)
1194	return 0;
1195
1196	err = 0;
1197	if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence))
1198	err = i915_sw_fence_await_dma_fence(&rq->submit,
1199	fence, 0,
1200	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1201	dma_fence_put(fence);
1202
1203	return err;
1204	}
1205
1206	static intel_engine_mask_t
1207	already_busywaiting(struct i915_request *rq)
1208	{
1209	/*
1210	* Polling a semaphore causes bus traffic, delaying other users of
1211	* both the GPU and CPU. We want to limit the impact on others,
1212	* while taking advantage of early submission to reduce GPU
1213	* latency. Therefore we restrict ourselves to not using more
1214	* than one semaphore from each source, and not using a semaphore
1215	* if we have detected the engine is saturated (i.e. would not be
1216	* submitted early and cause bus traffic reading an already passed
1217	* semaphore).
1218	*
1219	* See the are-we-too-late? check in __i915_request_submit().
1220	*/
1221	return rq->sched.semaphores \| READ_ONCE(rq->engine->saturated)({ typeof(rq->engine->saturated) __tmp = (volatile typeof (rq->engine->saturated) )&(rq->engine->saturated ); membar_datadep_consumer(); __tmp; });
1222	}
1223
1224	static int
1225	__emit_semaphore_wait(struct i915_request *to,
1226	struct i915_request *from,
1227	u32 seqno)
1228	{
1229	const int has_token = GRAPHICS_VER(to->engine->i915)((&(to->engine->i915)->__runtime)->graphics.ip .ver) >= 12;
1230	u32 hwsp_offset;
1231	int len, err;
1232	u32 *cs;
1233
1234	GEM_BUG_ON(GRAPHICS_VER(to->engine->i915) < 8)((void)0);
1235	GEM_BUG_ON(i915_request_has_initial_breadcrumb(to))((void)0);
1236
1237	/* We need to pin the signaler's HWSP until we are finished reading. */
1238	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
1239	if (err)
1240	return err;
1241
1242	len = 4;
1243	if (has_token)
1244	len += 2;
1245
1246	cs = intel_ring_begin(to, len);
1247	if (IS_ERR(cs))
1248	return PTR_ERR(cs);
1249
1250	/*
1251	* Using greater-than-or-equal here means we have to worry
1252	* about seqno wraparound. To side step that issue, we swap
1253	* the timeline HWSP upon wrapping, so that everyone listening
1254	* for the old (pre-wrap) values do not see the much smaller
1255	* (post-wrap) values than they were expecting (and so wait
1256	* forever).
1257	*/
1258	*cs++ = (MI_SEMAPHORE_WAIT(((0x0) << 29) \| (0x1c) << 23 \| (2)) \|
1259	MI_SEMAPHORE_GLOBAL_GTT(1<<22) \|
1260	MI_SEMAPHORE_POLL(1 << 15) \|
1261	MI_SEMAPHORE_SAD_GTE_SDD(1 << 12)) +
1262	has_token;
1263	*cs++ = seqno;
1264	*cs++ = hwsp_offset;
1265	*cs++ = 0;
1266	if (has_token) {
1267	*cs++ = 0;
1268	*cs++ = MI_NOOP(((0x0) << 29) \| (0) << 23 \| (0));
1269	}
1270
1271	intel_ring_advance(to, cs);
1272	return 0;
1273	}
1274
1275	static bool_Bool
1276	can_use_semaphore_wait(struct i915_request to, struct i915_request from)
1277	{
1278	return to->engine->gt->ggtt == from->engine->gt->ggtt;
1279	}
1280
1281	static int
1282	emit_semaphore_wait(struct i915_request *to,
1283	struct i915_request *from,
1284	gfp_t gfp)
1285	{
1286	const intel_engine_mask_t mask = READ_ONCE(from->engine)({ typeof(from->engine) __tmp = (volatile typeof(from-> engine) )&(from->engine); membar_datadep_consumer(); __tmp ; })->mask;
1287	struct i915_sw_fence *wait = &to->submit;
1288
1289	if (!can_use_semaphore_wait(to, from))
1290	goto await_fence;
1291
1292	if (!intel_context_use_semaphores(to->context))
1293	goto await_fence;
1294
1295	if (i915_request_has_initial_breadcrumb(to))
1296	goto await_fence;
1297
1298	/*
1299	* If this or its dependents are waiting on an external fence
1300	* that may fail catastrophically, then we want to avoid using
1301	* sempahores as they bypass the fence signaling metadata, and we
1302	* lose the fence->error propagation.
1303	*/
1304	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN(1UL << (0)))
1305	goto await_fence;
1306
1307	/* Just emit the first semaphore we see as request space is limited. */
1308	if (already_busywaiting(to) & mask)
1309	goto await_fence;
1310
1311	if (i915_request_await_start(to, from) < 0)
1312	goto await_fence;
1313
1314	/* Only submit our spinner after the signaler is running! */
1315	if (__await_execution(to, from, gfp))
1316	goto await_fence;
1317
1318	if (__emit_semaphore_wait(to, from, from->fence.seqno))
1319	goto await_fence;
1320
1321	to->sched.semaphores \|= mask;
1322	wait = &to->semaphore;
1323
1324	await_fence:
1325	return i915_sw_fence_await_dma_fence(wait,
1326	&from->fence, 0,
1327	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1328	}
1329
1330	static bool_Bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1331	struct dma_fence *fence)
1332	{
1333	return __intel_timeline_sync_is_later(tl,
1334	fence->context,
1335	fence->seqno - 1);
1336	}
1337
1338	static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1339	const struct dma_fence *fence)
1340	{
1341	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
1342	}
1343
1344	static int
1345	__i915_request_await_execution(struct i915_request *to,
1346	struct i915_request *from)
1347	{
1348	int err;
1349
1350	GEM_BUG_ON(intel_context_is_barrier(from->context))((void)0);
1351
1352	/* Submit both requests at the same time */
1353	err = __await_execution(to, from, I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1354	if (err)
1355	return err;
1356
1357	/* Squash repeated depenendices to the same timelines */
1358	if (intel_timeline_sync_has_start(i915_request_timeline(to),
1359	&from->fence))
1360	return 0;
1361
1362	/*
1363	* Wait until the start of this request.
1364	*
1365	* The execution cb fires when we submit the request to HW. But in
1366	* many cases this may be long before the request itself is ready to
1367	* run (consider that we submit 2 requests for the same context, where
1368	* the request of interest is behind an indefinite spinner). So we hook
1369	* up to both to reduce our queues and keep the execution lag minimised
1370	* in the worst case, though we hope that the await_start is elided.
1371	*/
1372	err = i915_request_await_start(to, from);
1373	if (err < 0)
1374	return err;
1375
1376	/*
1377	* Ensure both start together [after all semaphores in signal]
1378	*
1379	* Now that we are queued to the HW at roughly the same time (thanks
1380	* to the execute cb) and are ready to run at roughly the same time
1381	* (thanks to the await start), our signaler may still be indefinitely
1382	* delayed by waiting on a semaphore from a remote engine. If our
1383	* signaler depends on a semaphore, so indirectly do we, and we do not
1384	* want to start our payload until our signaler also starts theirs.
1385	* So we wait.
1386	*
1387	* However, there is also a second condition for which we need to wait
1388	* for the precise start of the signaler. Consider that the signaler
1389	* was submitted in a chain of requests following another context
1390	* (with just an ordinary intra-engine fence dependency between the
1391	* two). In this case the signaler is queued to HW, but not for
1392	* immediate execution, and so we must wait until it reaches the
1393	* active slot.
1394	*/
1395	if (can_use_semaphore_wait(to, from) &&
1396	intel_engine_has_semaphores(to->engine) &&
1397	!i915_request_has_initial_breadcrumb(to)) {
1398	err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
1399	if (err < 0)
1400	return err;
1401	}
1402
1403	/* Couple the dependency tree for PI on this exposed to->fence */
1404	if (to->engine->sched_engine->schedule) {
1405	err = i915_sched_node_add_dependency(&to->sched,
1406	&from->sched,
1407	I915_DEPENDENCY_WEAK(1UL << (2)));
1408	if (err < 0)
1409	return err;
1410	}
1411
1412	return intel_timeline_sync_set_start(i915_request_timeline(to),
1413	&from->fence);
1414	}
1415
1416	static void mark_external(struct i915_request *rq)
1417	{
1418	/*
1419	* The downside of using semaphores is that we lose metadata passing
1420	* along the signaling chain. This is particularly nasty when we
1421	* need to pass along a fatal error such as EFAULT or EDEADLK. For
1422	* fatal errors we want to scrub the request before it is executed,
1423	* which means that we cannot preload the request onto HW and have
1424	* it wait upon a semaphore.
1425	*/
1426	rq->sched.flags \|= I915_SCHED_HAS_EXTERNAL_CHAIN(1UL << (0));
1427	}
1428
1429	static int
1430	__i915_request_await_external(struct i915_request rq, struct dma_fence fence)
1431	{
1432	mark_external(rq);
1433	return i915_sw_fence_await_dma_fence(&rq->submit, fence,
1434	i915_fence_context_timeout(rq->engine->i915,
1435	fence->context),
1436	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1437	}
1438
1439	static int
1440	i915_request_await_external(struct i915_request rq, struct dma_fence fence)
1441	{
1442	struct dma_fence *iter;
1443	int err = 0;
1444
1445	if (!to_dma_fence_chain(fence))
1446	return __i915_request_await_external(rq, fence);
1447
1448	dma_fence_chain_for_each(iter, fence)for (iter = dma_fence_get(fence); iter != ((void *)0); iter = dma_fence_chain_walk(iter)) {
1449	struct dma_fence_chain *chain = to_dma_fence_chain(iter);
1450
1451	if (!dma_fence_is_i915(chain->fence)) {
1452	err = __i915_request_await_external(rq, iter);
1453	break;
1454	}
1455
1456	err = i915_request_await_dma_fence(rq, chain->fence);
1457	if (err < 0)
1458	break;
1459	}
1460
1461	dma_fence_put(iter);
1462	return err;
1463	}
1464
1465	static inline bool_Bool is_parallel_rq(struct i915_request *rq)
1466	{
1467	return intel_context_is_parallel(rq->context);
1468	}
1469
1470	static inline struct intel_context request_to_parent(struct i915_request rq)
1471	{
1472	return intel_context_to_parent(rq->context);
1473	}
1474
1475	static bool_Bool is_same_parallel_context(struct i915_request *to,
1476	struct i915_request *from)
1477	{
1478	if (is_parallel_rq(to))
1479	return request_to_parent(to) == request_to_parent(from);
1480
1481	return false0;
1482	}
1483
1484	int
1485	i915_request_await_execution(struct i915_request *rq,
1486	struct dma_fence *fence)
1487	{
1488	struct dma_fence **child = &fence;
1489	unsigned int nchild = 1;
1490	int ret;
1491
1492	if (dma_fence_is_array(fence)) {
1493	struct dma_fence_array *array = to_dma_fence_array(fence);
1494
1495	/* XXX Error for signal-on-any fence arrays */
1496
1497	child = array->fences;
1498	nchild = array->num_fences;
1499	GEM_BUG_ON(!nchild)((void)0);
1500	}
1501
1502	do {
1503	fence = *child++;
1504	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1505	continue;
1506
1507	if (fence->context == rq->fence.context)
1508	continue;
1509
1510	/*
1511	* We don't squash repeated fence dependencies here as we
1512	* want to run our callback in all cases.
1513	*/
1514
1515	if (dma_fence_is_i915(fence)) {
1516	if (is_same_parallel_context(rq, to_request(fence)))
1517	continue;
1518	ret = __i915_request_await_execution(rq,
1519	to_request(fence));
1520	} else {
1521	ret = i915_request_await_external(rq, fence);
1522	}
1523	if (ret < 0)
1524	return ret;
1525	} while (--nchild);
1526
1527	return 0;
1528	}
1529
1530	static int
1531	await_request_submit(struct i915_request to, struct i915_request from)
1532	{
1533	/*
1534	* If we are waiting on a virtual engine, then it may be
1535	* constrained to execute on a single engine prior to submission.
1536	* When it is submitted, it will be first submitted to the virtual
1537	* engine and then passed to the physical engine. We cannot allow
1538	* the waiter to be submitted immediately to the physical engine
1539	* as it may then bypass the virtual request.
1540	*/
1541	if (to->engine == READ_ONCE(from->engine)({ typeof(from->engine) __tmp = (volatile typeof(from-> engine) )&(from->engine); membar_datadep_consumer(); __tmp ; }))
1542	return i915_sw_fence_await_sw_fence_gfp(&to->submit,
1543	&from->submit,
1544	I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1545	else
1546	return __i915_request_await_execution(to, from);
1547	}
1548
1549	static int
1550	i915_request_await_request(struct i915_request to, struct i915_request from)
1551	{
1552	int ret;
1553
1554	GEM_BUG_ON(to == from)((void)0);
1555	GEM_BUG_ON(to->timeline == from->timeline)((void)0);
1556
1557	if (i915_request_completed(from)) {
1558	i915_sw_fence_set_error_once(&to->submit, from->fence.error);
1559	return 0;
1560	}
1561
1562	if (to->engine->sched_engine->schedule) {
1563	ret = i915_sched_node_add_dependency(&to->sched,
1564	&from->sched,
1565	I915_DEPENDENCY_EXTERNAL(1UL << (1)));
1566	if (ret < 0)
1567	return ret;
1568	}
1569
1570	if (!intel_engine_uses_guc(to->engine) &&
1571	is_power_of_2(to->execution_mask \| READ_ONCE(from->execution_mask))(((to->execution_mask \| ({ typeof(from->execution_mask) __tmp = (volatile typeof(from->execution_mask) )&(from ->execution_mask); membar_datadep_consumer(); __tmp; })) != 0) && (((to->execution_mask \| ({ typeof(from-> execution_mask) __tmp = (volatile typeof(from->execution_mask ) )&(from->execution_mask); membar_datadep_consumer() ; __tmp; })) - 1) & (to->execution_mask \| ({ typeof(from ->execution_mask) __tmp = (volatile typeof(from->execution_mask ) )&(from->execution_mask); membar_datadep_consumer() ; __tmp; }))) == 0))
1572	ret = await_request_submit(to, from);
1573	else
1574	ret = emit_semaphore_wait(to, from, I915_FENCE_GFP((0x0001 \| 0x0004) \| 0 \| 0));
1575	if (ret < 0)
1576	return ret;
1577
1578	return 0;
1579	}
1580
1581	int
1582	i915_request_await_dma_fence(struct i915_request rq, struct dma_fence fence)
1583	{
1584	struct dma_fence **child = &fence;
1585	unsigned int nchild = 1;
1586	int ret;
1587
1588	/*
1589	* Note that if the fence-array was created in signal-on-any mode,
1590	* we should not decompose it into its individual fences. However,
1591	* we don't currently store which mode the fence-array is operating
1592	* in. Fortunately, the only user of signal-on-any is private to
1593	* amdgpu and we should not see any incoming fence-array from
1594	* sync-file being in signal-on-any mode.
1595	*/
1596	if (dma_fence_is_array(fence)) {
1597	struct dma_fence_array *array = to_dma_fence_array(fence);
1598
1599	child = array->fences;
1600	nchild = array->num_fences;
1601	GEM_BUG_ON(!nchild)((void)0);
1602	}
1603
1604	do {
1605	fence = *child++;
1606	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1607	continue;
1608
1609	/*
1610	* Requests on the same timeline are explicitly ordered, along
1611	* with their dependencies, by i915_request_add() which ensures
1612	* that requests are submitted in-order through each ring.
1613	*/
1614	if (fence->context == rq->fence.context)
1615	continue;
1616
1617	/* Squash repeated waits to the same timelines */
1618	if (fence->context &&
1619	intel_timeline_sync_is_later(i915_request_timeline(rq),
1620	fence))
1621	continue;
1622
1623	if (dma_fence_is_i915(fence)) {
1624	if (is_same_parallel_context(rq, to_request(fence)))
1625	continue;
1626	ret = i915_request_await_request(rq, to_request(fence));
1627	} else {
1628	ret = i915_request_await_external(rq, fence);
1629	}
1630	if (ret < 0)
1631	return ret;
1632
1633	/* Record the latest fence used against each timeline */
1634	if (fence->context)
1635	intel_timeline_sync_set(i915_request_timeline(rq),
1636	fence);
1637	} while (--nchild);
1638
1639	return 0;
1640	}
1641
1642	/**
1643	* i915_request_await_deps - set this request to (async) wait upon a struct
1644	* i915_deps dma_fence collection
1645	* @rq: request we are wishing to use
1646	* @deps: The struct i915_deps containing the dependencies.
1647	*
1648	* Returns 0 if successful, negative error code on error.
1649	*/
1650	int i915_request_await_deps(struct i915_request rq, const struct i915_deps deps)
1651	{
1652	int i, err;
1653
1654	for (i = 0; i < deps->num_deps; ++i) {
1655	err = i915_request_await_dma_fence(rq, deps->fences[i]);
1656	if (err)
1657	return err;
1658	}
1659
1660	return 0;
1661	}
1662
1663	/**
1664	* i915_request_await_object - set this request to (async) wait upon a bo
1665	* @to: request we are wishing to use
1666	* @obj: object which may be in use on another ring.
1667	* @write: whether the wait is on behalf of a writer
1668	*
1669	* This code is meant to abstract object synchronization with the GPU.
1670	* Conceptually we serialise writes between engines inside the GPU.
1671	* We only allow one engine to write into a buffer at any time, but
1672	* multiple readers. To ensure each has a coherent view of memory, we must:
1673	*
1674	* - If there is an outstanding write request to the object, the new
1675	* request must wait for it to complete (either CPU or in hw, requests
1676	* on the same ring will be naturally ordered).
1677	*
1678	* - If we are a write request (pending_write_domain is set), the new
1679	* request must wait for outstanding read requests to complete.
1680	*
1681	* Returns 0 if successful, else propagates up the lower layer error.
1682	*/
1683	int
1684	i915_request_await_object(struct i915_request *to,
1685	struct drm_i915_gem_object *obj,
1686	bool_Bool write)
1687	{
1688	struct dma_resv_iter cursor;
1689	struct dma_fence *fence;
1690	int ret = 0;
1691
1692	dma_resv_for_each_fence(&cursor, obj->base.resv,for (dma_resv_iter_begin(&cursor, obj->base.resv, dma_resv_usage_rw (write)), fence = dma_resv_iter_first(&cursor); fence; fence = dma_resv_iter_next(&cursor))
1693	dma_resv_usage_rw(write), fence)for (dma_resv_iter_begin(&cursor, obj->base.resv, dma_resv_usage_rw (write)), fence = dma_resv_iter_first(&cursor); fence; fence = dma_resv_iter_next(&cursor)) {
1694	ret = i915_request_await_dma_fence(to, fence);
1695	if (ret)
1696	break;
1697	}
1698
1699	return ret;
1700	}
1701
1702	static struct i915_request *
1703	__i915_request_ensure_parallel_ordering(struct i915_request *rq,
1704	struct intel_timeline *timeline)
1705	{
1706	struct i915_request *prev;
1707
1708	GEM_BUG_ON(!is_parallel_rq(rq))((void)0);
1709
1710	prev = request_to_parent(rq)->parallel.last_rq;
1711	if (prev) {
1712	if (!__i915_request_is_complete(prev)) {
1713	i915_sw_fence_await_sw_fence(&rq->submit,
1714	&prev->submit,
1715	&rq->submitq);
1716
1717	if (rq->engine->sched_engine->schedule)
1718	__i915_sched_node_add_dependency(&rq->sched,
1719	&prev->sched,
1720	&rq->dep,
1721	0);
1722	}
1723	i915_request_put(prev);
1724	}
1725
1726	request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
1727
1728	/*
1729	* Users have to put a reference potentially got by
1730	* __i915_active_fence_set() to the returned request
1731	* when no longer needed
1732	*/
1733	return to_request(__i915_active_fence_set(&timeline->last_request,
1734	&rq->fence));
1735	}
1736
1737	static struct i915_request *
1738	__i915_request_ensure_ordering(struct i915_request *rq,
1739	struct intel_timeline *timeline)
1740	{
1741	struct i915_request *prev;
1742
1743	GEM_BUG_ON(is_parallel_rq(rq))((void)0);
1744
1745	prev = to_request(__i915_active_fence_set(&timeline->last_request,
1746	&rq->fence));
1747
1748	if (prev && !__i915_request_is_complete(prev)) {
1749	bool_Bool uses_guc = intel_engine_uses_guc(rq->engine);
1750	bool_Bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask \|(((({ typeof(prev->engine) __tmp = (volatile typeof(prev-> engine) )&(prev->engine); membar_datadep_consumer(); __tmp ; })->mask \| rq->engine->mask) != 0) && (((( { typeof(prev->engine) __tmp = (volatile typeof(prev-> engine) )&(prev->engine); membar_datadep_consumer(); __tmp ; })->mask \| rq->engine->mask) - 1) & (({ typeof (prev->engine) __tmp = (volatile typeof(prev->engine) )&(prev->engine); membar_datadep_consumer(); __tmp; }) ->mask \| rq->engine->mask)) == 0)
1751	rq->engine->mask)(((({ typeof(prev->engine) __tmp = (volatile typeof(prev-> engine) )&(prev->engine); membar_datadep_consumer(); __tmp ; })->mask \| rq->engine->mask) != 0) && (((( { typeof(prev->engine) __tmp = (volatile typeof(prev-> engine) )&(prev->engine); membar_datadep_consumer(); __tmp ; })->mask \| rq->engine->mask) - 1) & (({ typeof (prev->engine) __tmp = (volatile typeof(prev->engine) )&(prev->engine); membar_datadep_consumer(); __tmp; }) ->mask \| rq->engine->mask)) == 0);
1752	bool_Bool same_context = prev->context == rq->context;
1753
1754	/*
1755	* The requests are supposed to be kept in order. However,
1756	* we need to be wary in case the timeline->last_request
1757	* is used as a barrier for external modification to this
1758	* context.
1759	*/
1760	GEM_BUG_ON(same_context &&((void)0)
1761	i915_seqno_passed(prev->fence.seqno,((void)0)
1762	rq->fence.seqno))((void)0);
1763
1764	if ((same_context && uses_guc) \|\| (!uses_guc && pow2))
1765	i915_sw_fence_await_sw_fence(&rq->submit,
1766	&prev->submit,
1767	&rq->submitq);
1768	else
1769	__i915_sw_fence_await_dma_fence(&rq->submit,
1770	&prev->fence,
1771	&rq->dmaq);
1772	if (rq->engine->sched_engine->schedule)
1773	__i915_sched_node_add_dependency(&rq->sched,
1774	&prev->sched,
1775	&rq->dep,
1776	0);
1777	}
1778
1779	/*
1780	* Users have to put the reference to prev potentially got
1781	* by __i915_active_fence_set() when no longer needed
1782	*/
1783	return prev;
1784	}
1785
1786	static struct i915_request *
1787	__i915_request_add_to_timeline(struct i915_request *rq)
1788	{
1789	struct intel_timeline *timeline = i915_request_timeline(rq);
1790	struct i915_request *prev;
1791
1792	/*
1793	* Dependency tracking and request ordering along the timeline
1794	* is special cased so that we can eliminate redundant ordering
1795	* operations while building the request (we know that the timeline
1796	* itself is ordered, and here we guarantee it).
1797	*
1798	* As we know we will need to emit tracking along the timeline,
1799	* we embed the hooks into our request struct -- at the cost of
1800	* having to have specialised no-allocation interfaces (which will
1801	* be beneficial elsewhere).
1802	*
1803	* A second benefit to open-coding i915_request_await_request is
1804	* that we can apply a slight variant of the rules specialised
1805	* for timelines that jump between engines (such as virtual engines).
1806	* If we consider the case of virtual engine, we must emit a dma-fence
1807	* to prevent scheduling of the second request until the first is
1808	* complete (to maximise our greedy late load balancing) and this
1809	* precludes optimising to use semaphores serialisation of a single
1810	* timeline across engines.
1811	*
1812	* We do not order parallel submission requests on the timeline as each
1813	* parallel submission context has its own timeline and the ordering
1814	* rules for parallel requests are that they must be submitted in the
1815	* order received from the execbuf IOCTL. So rather than using the
1816	* timeline we store a pointer to last request submitted in the
1817	* relationship in the gem context and insert a submission fence
1818	* between that request and request passed into this function or
1819	* alternatively we use completion fence if gem context has a single
1820	* timeline and this is the first submission of an execbuf IOCTL.
1821	*/
1822	if (likely(!is_parallel_rq(rq))__builtin_expect(!!(!is_parallel_rq(rq)), 1))
1823	prev = __i915_request_ensure_ordering(rq, timeline);
1824	else
1825	prev = __i915_request_ensure_parallel_ordering(rq, timeline);
1826	if (prev)
1827	i915_request_put(prev);
1828
1829	/*
1830	* Make sure that no request gazumped us - if it was allocated after
1831	* our i915_request_alloc() and called __i915_request_add() before
1832	* us, the timeline will hold its seqno which is later than ours.
1833	*/
1834	GEM_BUG_ON(timeline->seqno != rq->fence.seqno)((void)0);
1835
1836	return prev;
1837	}
1838
1839	/*
1840	* NB: This function is not allowed to fail. Doing so would mean the the
1841	* request is not being tracked for completion but the work itself is
1842	* going to happen on the hardware. This would be a Bad Thing(tm).
1843	*/
1844	struct i915_request __i915_request_commit(struct i915_request rq)
1845	{
1846	struct intel_engine_cs *engine = rq->engine;
1847	struct intel_ring *ring = rq->ring;
	Value stored to 'ring' during its initialization is never read
1848	u32 *cs;
1849
1850	RQ_TRACE(rq, "\n")do { const struct i915_request rq__ = (rq); do { const struct intel_engine_cs e__ __attribute__((__unused__)) = (rq__-> engine); do { } while (0); } while (0); } while (0);
1851
1852	/*
1853	* To ensure that this call will not fail, space for its emissions
1854	* should already have been reserved in the ring buffer. Let the ring
1855	* know that it is time to use that space up.
1856	*/
1857	GEM_BUG_ON(rq->reserved_space > ring->space)((void)0);
1858	rq->reserved_space = 0;
1859	rq->emitted_jiffies = jiffies;
1860
1861	/*
1862	* Record the position of the start of the breadcrumb so that
1863	* should we detect the updated seqno part-way through the
1864	* GPU processing the request, we never over-estimate the
1865	* position of the ring's HEAD.
1866	*/
1867	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
1868	GEM_BUG_ON(IS_ERR(cs))((void)0);
1869	rq->postfix = intel_ring_offset(rq, cs);
1870
1871	return __i915_request_add_to_timeline(rq);
1872	}
1873
1874	void __i915_request_queue_bh(struct i915_request *rq)
1875	{
1876	i915_sw_fence_commit(&rq->semaphore);
1877	i915_sw_fence_commit(&rq->submit);
1878	}
1879
1880	void __i915_request_queue(struct i915_request *rq,
1881	const struct i915_sched_attr *attr)
1882	{
1883	/*
1884	* Let the backend know a new request has arrived that may need
1885	* to adjust the existing execution schedule due to a high priority
1886	* request - i.e. we may want to preempt the current request in order
1887	* to run a high priority dependency chain before we can execute this
1888	* request.
1889	*
1890	* This is called before the request is ready to run so that we can
1891	* decide whether to preempt the entire chain so that it is ready to
1892	* run at the earliest possible convenience.
1893	*/
1894	if (attr && rq->engine->sched_engine->schedule)
1895	rq->engine->sched_engine->schedule(rq, attr);
1896
1897	local_bh_disable();
1898	__i915_request_queue_bh(rq);
1899	local_bh_enable(); /* kick tasklets */
1900	}
1901
1902	void i915_request_add(struct i915_request *rq)
1903	{
1904	struct intel_timeline * const tl = i915_request_timeline(rq);
1905	struct i915_sched_attr attr = {};
1906	struct i915_gem_context *ctx;
1907
1908	lockdep_assert_held(&tl->mutex)do { (void)(&tl->mutex); } while(0);
1909	lockdep_unpin_lock(&tl->mutex, rq->cookie);
1910
1911	trace_i915_request_add(rq);
1912	__i915_request_commit(rq);
1913
1914	/* XXX placeholder for selftests */
1915	rcu_read_lock();
1916	ctx = rcu_dereference(rq->context->gem_context)(rq->context->gem_context);
1917	if (ctx)
1918	attr = ctx->sched;
1919	rcu_read_unlock();
1920
1921	__i915_request_queue(rq, &attr);
1922
1923	mutex_unlock(&tl->mutex)rw_exit_write(&tl->mutex);
1924	}
1925
1926	static unsigned long local_clock_ns(unsigned int *cpu)
1927	{
1928	unsigned long t;
1929
1930	/*
1931	* Cheaply and approximately convert from nanoseconds to microseconds.
1932	* The result and subsequent calculations are also defined in the same
1933	* approximate microseconds units. The principal source of timing
1934	* error here is from the simple truncation.
1935	*
1936	* Note that local_clock() is only defined wrt to the current CPU;
1937	* the comparisons are no longer valid if we switch CPUs. Instead of
1938	* blocking preemption for the entire busywait, we can detect the CPU
1939	* switch and use that as indicator of system load and a reason to
1940	* stop busywaiting, see busywait_stop().
1941	*/
1942	cpu = get_cpu()(({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_cpuid);
1943	t = local_clock();
1944	put_cpu();
1945
1946	return t;
1947	}
1948
1949	static bool_Bool busywait_stop(unsigned long timeout, unsigned int cpu)
1950	{
1951	unsigned int this_cpu;
1952
1953	if (time_after(local_clock_ns(&this_cpu), timeout))
1954	return true1;
1955
1956	return this_cpu != cpu;
1957	}
1958
1959	static bool_Bool __i915_spin_request(struct i915_request * const rq, int state)
1960	{
1961	unsigned long timeout_ns;
1962	unsigned int cpu;
1963
1964	/*
1965	* Only wait for the request if we know it is likely to complete.
1966	*
1967	* We don't track the timestamps around requests, nor the average
1968	* request length, so we do not have a good indicator that this
1969	* request will complete within the timeout. What we do know is the
1970	* order in which requests are executed by the context and so we can
1971	* tell if the request has been started. If the request is not even
1972	* running yet, it is a fair assumption that it will not complete
1973	* within our relatively short timeout.
1974	*/
1975	if (!i915_request_is_running(rq))
1976	return false0;
1977
1978	/*
1979	* When waiting for high frequency requests, e.g. during synchronous
1980	* rendering split between the CPU and GPU, the finite amount of time
1981	* required to set up the irq and wait upon it limits the response
1982	* rate. By busywaiting on the request completion for a short while we
1983	* can service the high frequency waits as quick as possible. However,
1984	* if it is a slow request, we want to sleep as quickly as possible.
1985	* The tradeoff between waiting and sleeping is roughly the time it
1986	* takes to sleep on a request, on the order of a microsecond.
1987	*/
1988
1989	timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns)({ typeof(rq->engine->props.max_busywait_duration_ns) __tmp = (volatile typeof(rq->engine->props.max_busywait_duration_ns ) )&(rq->engine->props.max_busywait_duration_ns); membar_datadep_consumer (); __tmp; });
1990	timeout_ns += local_clock_ns(&cpu);
1991	do {
1992	if (dma_fence_is_signaled(&rq->fence))
1993	return true1;
1994
1995	if (signal_pending_state(state, current)((state) & 0x100 ? (((({struct cpu_info __ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_siglist \| (({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci ) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci; })->ci_curproc)->p_sigmask) : 0))
1996	break;
1997
1998	if (busywait_stop(timeout_ns, cpu))
1999	break;
2000
2001	cpu_relax();
2002	} while (!drm_need_resched()(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate.spc_schedflags & 0x0002));
2003
2004	return false0;
2005	}
2006
2007	struct request_wait {
2008	struct dma_fence_cb cb;
2009	#ifdef __linux__
2010	struct task_struct *tsk;
2011	#else
2012	struct proc *tsk;
2013	#endif
2014	};
2015
2016	static void request_wait_wake(struct dma_fence fence, struct dma_fence_cb cb)
2017	{
2018	struct request_wait wait = container_of(cb, typeof(wait), cb)({ const __typeof( ((typeof(wait) )0)->cb ) __mptr = (cb ); (typeof(wait) )( (char )__mptr - __builtin_offsetof(typeof (*wait), cb) );});
2019
2020	wake_up_process(fetch_and_zero(&wait->tsk)({ typeof(&wait->tsk) __T = (&wait->tsk); (& wait->tsk) = (typeof(&wait->tsk))0; __T; }));
2021	}
2022
2023	/**
2024	* i915_request_wait_timeout - wait until execution of request has finished
2025	* @rq: the request to wait upon
2026	* @flags: how to wait
2027	* @timeout: how long to wait in jiffies
2028	*
2029	* i915_request_wait_timeout() waits for the request to be completed, for a
2030	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
2031	* unbounded wait).
2032	*
2033	* Returns the remaining time (in jiffies) if the request completed, which may
2034	* be zero if the request is unfinished after the timeout expires.
2035	* If the timeout is 0, it will return 1 if the fence is signaled.
2036	*
2037	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
2038	* pending before the request completes.
2039	*
2040	* NOTE: This function has the same wait semantics as dma-fence.
2041	*/
2042	long i915_request_wait_timeout(struct i915_request *rq,
2043	unsigned int flags,
2044	long timeout)
2045	{
2046	const int state = flags & I915_WAIT_INTERRUPTIBLE(1UL << (0)) ?
2047	TASK_INTERRUPTIBLE0x100 : TASK_UNINTERRUPTIBLE0;
2048	struct request_wait wait;
2049
2050	might_sleep()assertwaitok();
2051	GEM_BUG_ON(timeout < 0)((void)0);
2052
2053	if (dma_fence_is_signaled(&rq->fence))
2054	return timeout ?: 1;
2055
2056	if (!timeout)
2057	return -ETIME60;
2058
2059	trace_i915_request_wait_begin(rq, flags);
2060
2061	/*
2062	* We must never wait on the GPU while holding a lock as we
2063	* may need to perform a GPU reset. So while we don't need to
2064	* serialise wait/reset with an explicit lock, we do want
2065	* lockdep to detect potential dependency cycles.
2066	*/
2067	mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, 0, 0, _THIS_IP_);
2068
2069	/*
2070	* Optimistic spin before touching IRQs.
2071	*
2072	* We may use a rather large value here to offset the penalty of
2073	* switching away from the active task. Frequently, the client will
2074	* wait upon an old swapbuffer to throttle itself to remain within a
2075	* frame of the gpu. If the client is running in lockstep with the gpu,
2076	* then it should not be waiting long at all, and a sleep now will incur
2077	* extra scheduler latency in producing the next frame. To try to
2078	* avoid adding the cost of enabling/disabling the interrupt to the
2079	* short wait, we first spin to see if the request would have completed
2080	* in the time taken to setup the interrupt.
2081	*
2082	* We need upto 5us to enable the irq, and upto 20us to hide the
2083	* scheduler latency of a context switch, ignoring the secondary
2084	* impacts from a context switch such as cache eviction.
2085	*
2086	* The scheme used for low-latency IO is called "hybrid interrupt
2087	* polling". The suggestion there is to sleep until just before you
2088	* expect to be woken by the device interrupt and then poll for its
2089	* completion. That requires having a good predictor for the request
2090	* duration, which we currently lack.
2091	*/
2092	if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT8000 &&
2093	__i915_spin_request(rq, state))
2094	goto out;
2095
2096	/*
2097	* This client is about to stall waiting for the GPU. In many cases
2098	* this is undesirable and limits the throughput of the system, as
2099	* many clients cannot continue processing user input/output whilst
2100	* blocked. RPS autotuning may take tens of milliseconds to respond
2101	* to the GPU load and thus incurs additional latency for the client.
2102	* We can circumvent that by promoting the GPU frequency to maximum
2103	* before we sleep. This makes the GPU throttle up much more quickly
2104	* (good for benchmarks and user experience, e.g. window animations),
2105	* but at a cost of spending more power processing the workload
2106	* (bad for battery).
2107	*/
2108	if (flags & I915_WAIT_PRIORITY(1UL << (1)) && !i915_request_started(rq))
2109	intel_rps_boost(rq);
2110
2111	#ifdef __linux__
2112	wait.tsk = current;
2113	#else
2114	wait.tsk = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc;
2115	#endif
2116	if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
2117	goto out;
2118
2119	/*
2120	* Flush the submission tasklet, but only if it may help this request.
2121	*
2122	* We sometimes experience some latency between the HW interrupts and
2123	* tasklet execution (mostly due to ksoftirqd latency, but it can also
2124	* be due to lazy CS events), so lets run the tasklet manually if there
2125	* is a chance it may submit this request. If the request is not ready
2126	* to run, as it is waiting for other fences to be signaled, flushing
2127	* the tasklet is busy work without any advantage for this client.
2128	*
2129	* If the HW is being lazy, this is the last chance before we go to
2130	* sleep to catch any pending events. We will check periodically in
2131	* the heartbeat to flush the submission tasklets as a last resort
2132	* for unhappy HW.
2133	*/
2134	if (i915_request_is_ready(rq))
2135	__intel_engine_flush_submission(rq->engine, false0);
2136
2137	for (;;) {
2138	set_current_state(state);
2139
2140	if (dma_fence_is_signaled(&rq->fence))
2141	break;
2142
2143	if (signal_pending_state(state, current)((state) & 0x100 ? (((({struct cpu_info __ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_siglist \| (({struct cpu_info __ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc)->p_p->ps_siglist) & ~(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci ) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci; })->ci_curproc)->p_sigmask) : 0)) {
2144	timeout = -ERESTARTSYS4;
2145	break;
2146	}
2147
2148	if (!timeout) {
2149	timeout = -ETIME60;
2150	break;
2151	}
2152
2153	timeout = io_schedule_timeout(timeout)schedule_timeout(timeout);
2154	}
2155	__set_current_state(TASK_RUNNING-1);
2156
2157	if (READ_ONCE(wait.tsk)({ typeof(wait.tsk) __tmp = (volatile typeof(wait.tsk) )& (wait.tsk); membar_datadep_consumer(); __tmp; }))
2158	dma_fence_remove_callback(&rq->fence, &wait.cb);
2159	GEM_BUG_ON(!list_empty(&wait.cb.node))((void)0);
2160
2161	out:
2162	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
2163	trace_i915_request_wait_end(rq);
2164	return timeout;
2165	}
2166
2167	/**
2168	* i915_request_wait - wait until execution of request has finished
2169	* @rq: the request to wait upon
2170	* @flags: how to wait
2171	* @timeout: how long to wait in jiffies
2172	*
2173	* i915_request_wait() waits for the request to be completed, for a
2174	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
2175	* unbounded wait).
2176	*
2177	* Returns the remaining time (in jiffies) if the request completed, which may
2178	* be zero or -ETIME if the request is unfinished after the timeout expires.
2179	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
2180	* pending before the request completes.
2181	*
2182	* NOTE: This function behaves differently from dma-fence wait semantics for
2183	* timeout = 0. It returns 0 on success, and -ETIME if not signaled.
2184	*/
2185	long i915_request_wait(struct i915_request *rq,
2186	unsigned int flags,
2187	long timeout)
2188	{
2189	long ret = i915_request_wait_timeout(rq, flags, timeout);
2190
2191	if (!ret)
2192	return -ETIME60;
2193
2194	if (ret > 0 && !timeout)
2195	return 0;
2196
2197	return ret;
2198	}
2199
2200	static int print_sched_attr(const struct i915_sched_attr *attr,
2201	char *buf, int x, int len)
2202	{
2203	if (attr->priority == I915_PRIORITY_INVALID((-0x7fffffff-1)))
2204	return x;
2205
2206	x += snprintf(buf + x, len - x,
2207	" prio=%d", attr->priority);
2208
2209	return x;
2210	}
2211
2212	static char queue_status(const struct i915_request *rq)
2213	{
2214	if (i915_request_is_active(rq))
2215	return 'E';
2216
2217	if (i915_request_is_ready(rq))
2218	return intel_engine_is_virtual(rq->engine) ? 'V' : 'R';
2219
2220	return 'U';
2221	}
2222
2223	static const char run_status(const struct i915_request rq)
2224	{
2225	if (__i915_request_is_complete(rq))
2226	return "!";
2227
2228	if (__i915_request_has_started(rq))
2229	return "*";
2230
2231	if (!i915_sw_fence_signaled(&rq->semaphore))
2232	return "&";
2233
2234	return "";
2235	}
2236
2237	static const char fence_status(const struct i915_request rq)
2238	{
2239	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
2240	return "+";
2241
2242	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
2243	return "-";
2244
2245	return "";
2246	}
2247
2248	void i915_request_show(struct drm_printer *m,
2249	const struct i915_request *rq,
2250	const char *prefix,
2251	int indent)
2252	{
2253	const char name = rq->fence.ops->get_timeline_name((struct dma_fence )&rq->fence);
2254	char buf[80] = "";
2255	int x = 0;
2256
2257	/*
2258	* The prefix is used to show the queue status, for which we use
2259	* the following flags:
2260	*
2261	* U [Unready]
2262	* - initial status upon being submitted by the user
2263	*
2264	* - the request is not ready for execution as it is waiting
2265	* for external fences
2266	*
2267	* R [Ready]
2268	* - all fences the request was waiting on have been signaled,
2269	* and the request is now ready for execution and will be
2270	* in a backend queue
2271	*
2272	* - a ready request may still need to wait on semaphores
2273	* [internal fences]
2274	*
2275	* V [Ready/virtual]
2276	* - same as ready, but queued over multiple backends
2277	*
2278	* E [Executing]
2279	* - the request has been transferred from the backend queue and
2280	* submitted for execution on HW
2281	*
2282	* - a completed request may still be regarded as executing, its
2283	* status may not be updated until it is retired and removed
2284	* from the lists
2285	*/
2286
2287	x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
2288
2289	drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
2290	prefix, indent, " ",
2291	queue_status(rq),
2292	rq->fence.context, rq->fence.seqno,
2293	run_status(rq),
2294	fence_status(rq),
2295	buf,
2296	jiffies_to_msecs(jiffies - rq->emitted_jiffies),
2297	name);
2298	}
2299
2300	static bool_Bool engine_match_ring(struct intel_engine_cs engine, struct i915_request rq)
2301	{
2302	u32 ring = ENGINE_READ(engine, RING_START)intel_uncore_read(((engine))->uncore, ((const i915_reg_t){ .reg = (((engine)->mmio_base) + 0x38) }));
2303
2304	return ring == i915_ggtt_offset(rq->ring->vma);
2305	}
2306
2307	static bool_Bool match_ring(struct i915_request *rq)
2308	{
2309	struct intel_engine_cs *engine;
2310	bool_Bool found;
2311	int i;
2312
2313	if (!intel_engine_is_virtual(rq->engine))
2314	return engine_match_ring(rq->engine, rq);
2315
2316	found = false0;
2317	i = 0;
2318	while ((engine = intel_engine_get_sibling(rq->engine, i++))) {
2319	found = engine_match_ring(engine, rq);
2320	if (found)
2321	break;
2322	}
2323
2324	return found;
2325	}
2326
2327	enum i915_request_state i915_test_request_state(struct i915_request *rq)
2328	{
2329	if (i915_request_completed(rq))
2330	return I915_REQUEST_COMPLETE;
2331
2332	if (!i915_request_started(rq))
2333	return I915_REQUEST_PENDING;
2334
2335	if (match_ring(rq))
2336	return I915_REQUEST_ACTIVE;
2337
2338	return I915_REQUEST_QUEUED;
2339	}
2340
2341	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
2342	#include "selftests/mock_request.c"
2343	#include "selftests/i915_request.c"
2344	#endif
2345
2346	void i915_request_module_exit(void)
2347	{
2348	#ifdef __linux__
2349	kmem_cache_destroy(slab_execute_cbs);
2350	kmem_cache_destroy(slab_requests);
2351	#else
2352	pool_destroy(&slab_execute_cbs);
2353	pool_destroy(&slab_requests);
2354	#endif
2355	}
2356
2357	int __init i915_request_module_init(void)
2358	{
2359	#ifdef __linux__
2360	slab_requests =
2361	kmem_cache_create("i915_request",
2362	sizeof(struct i915_request),
2363	__alignof__(struct i915_request),
2364	SLAB_HWCACHE_ALIGN \|
2365	SLAB_RECLAIM_ACCOUNT \|
2366	SLAB_TYPESAFE_BY_RCU,
2367	__i915_request_ctor);
2368	if (!slab_requests)
2369	return -ENOMEM12;
2370
2371	slab_execute_cbs = KMEM_CACHE(execute_cb,
2372	SLAB_HWCACHE_ALIGN \|
2373	SLAB_RECLAIM_ACCOUNT \|
2374	SLAB_TYPESAFE_BY_RCU);
2375	if (!slab_execute_cbs)
2376	goto err_requests;
2377	#else
2378	pool_init(&slab_requests, sizeof(struct i915_request),
2379	CACHELINESIZE64, IPL_TTY0x9, 0, "i915_request", NULL((void *)0));
2380	pool_init(&slab_execute_cbs, sizeof(struct execute_cb),
2381	CACHELINESIZE64, IPL_TTY0x9, 0, "i915_exec", NULL((void *)0));
2382	#endif
2383
2384	return 0;
2385
2386	#ifdef __linux__
2387	err_requests:
2388	kmem_cache_destroy(slab_requests);
2389	return -ENOMEM12;
2390	#endif
2391	}