Bug Summary

File:dev/pci/drm/i915/gt/uc/intel_guc_submission.c
Warning:line 2579, column 20
Value stored to 'guc' during its initialization is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name intel_guc_submission.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2014 Intel Corporation
4 */
5
6#include <linux/circ_buf.h>
7
8#include "gem/i915_gem_context.h"
9#include "gt/gen8_engine_cs.h"
10#include "gt/intel_breadcrumbs.h"
11#include "gt/intel_context.h"
12#include "gt/intel_engine_heartbeat.h"
13#include "gt/intel_engine_pm.h"
14#include "gt/intel_engine_regs.h"
15#include "gt/intel_gpu_commands.h"
16#include "gt/intel_gt.h"
17#include "gt/intel_gt_clock_utils.h"
18#include "gt/intel_gt_irq.h"
19#include "gt/intel_gt_pm.h"
20#include "gt/intel_gt_regs.h"
21#include "gt/intel_gt_requests.h"
22#include "gt/intel_lrc.h"
23#include "gt/intel_lrc_reg.h"
24#include "gt/intel_mocs.h"
25#include "gt/intel_ring.h"
26
27#include "intel_guc_ads.h"
28#include "intel_guc_capture.h"
29#include "intel_guc_submission.h"
30
31#include "i915_drv.h"
32#include "i915_trace.h"
33
34/**
35 * DOC: GuC-based command submission
36 *
37 * The Scratch registers:
38 * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
39 * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
40 * triggers an interrupt on the GuC via another register write (0xC4C8).
41 * Firmware writes a success/fail code back to the action register after
42 * processes the request. The kernel driver polls waiting for this update and
43 * then proceeds.
44 *
45 * Command Transport buffers (CTBs):
46 * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host
47 * - G2H) are a message interface between the i915 and GuC.
48 *
49 * Context registration:
50 * Before a context can be submitted it must be registered with the GuC via a
51 * H2G. A unique guc_id is associated with each context. The context is either
52 * registered at request creation time (normal operation) or at submission time
53 * (abnormal operation, e.g. after a reset).
54 *
55 * Context submission:
56 * The i915 updates the LRC tail value in memory. The i915 must enable the
57 * scheduling of the context within the GuC for the GuC to actually consider it.
58 * Therefore, the first time a disabled context is submitted we use a schedule
59 * enable H2G, while follow up submissions are done via the context submit H2G,
60 * which informs the GuC that a previously enabled context has new work
61 * available.
62 *
63 * Context unpin:
64 * To unpin a context a H2G is used to disable scheduling. When the
65 * corresponding G2H returns indicating the scheduling disable operation has
66 * completed it is safe to unpin the context. While a disable is in flight it
67 * isn't safe to resubmit the context so a fence is used to stall all future
68 * requests of that context until the G2H is returned.
69 *
70 * Context deregistration:
71 * Before a context can be destroyed or if we steal its guc_id we must
72 * deregister the context with the GuC via H2G. If stealing the guc_id it isn't
73 * safe to submit anything to this guc_id until the deregister completes so a
74 * fence is used to stall all requests associated with this guc_id until the
75 * corresponding G2H returns indicating the guc_id has been deregistered.
76 *
77 * submission_state.guc_ids:
78 * Unique number associated with private GuC context data passed in during
79 * context registration / submission / deregistration. 64k available. Simple ida
80 * is used for allocation.
81 *
82 * Stealing guc_ids:
83 * If no guc_ids are available they can be stolen from another context at
84 * request creation time if that context is unpinned. If a guc_id can't be found
85 * we punt this problem to the user as we believe this is near impossible to hit
86 * during normal use cases.
87 *
88 * Locking:
89 * In the GuC submission code we have 3 basic spin locks which protect
90 * everything. Details about each below.
91 *
92 * sched_engine->lock
93 * This is the submission lock for all contexts that share an i915 schedule
94 * engine (sched_engine), thus only one of the contexts which share a
95 * sched_engine can be submitting at a time. Currently only one sched_engine is
96 * used for all of GuC submission but that could change in the future.
97 *
98 * guc->submission_state.lock
99 * Global lock for GuC submission state. Protects guc_ids and destroyed contexts
100 * list.
101 *
102 * ce->guc_state.lock
103 * Protects everything under ce->guc_state. Ensures that a context is in the
104 * correct state before issuing a H2G. e.g. We don't issue a schedule disable
105 * on a disabled context (bad idea), we don't issue a schedule enable when a
106 * schedule disable is in flight, etc... Also protects list of inflight requests
107 * on the context and the priority management state. Lock is individual to each
108 * context.
109 *
110 * Lock ordering rules:
111 * sched_engine->lock -> ce->guc_state.lock
112 * guc->submission_state.lock -> ce->guc_state.lock
113 *
114 * Reset races:
115 * When a full GT reset is triggered it is assumed that some G2H responses to
116 * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be
117 * fatal as we do certain operations upon receiving a G2H (e.g. destroy
118 * contexts, release guc_ids, etc...). When this occurs we can scrub the
119 * context state and cleanup appropriately, however this is quite racey.
120 * To avoid races, the reset code must disable submission before scrubbing for
121 * the missing G2H, while the submission code must check for submission being
122 * disabled and skip sending H2Gs and updating context states when it is. Both
123 * sides must also make sure to hold the relevant locks.
124 */
125
126/* GuC Virtual Engine */
127struct guc_virtual_engine {
128 struct intel_engine_cs base;
129 struct intel_context context;
130};
131
132static struct intel_context *
133guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
134 unsigned long flags);
135
136static struct intel_context *
137guc_create_parallel(struct intel_engine_cs **engines,
138 unsigned int num_siblings,
139 unsigned int width);
140
141#define GUC_REQUEST_SIZE64 64 /* bytes */
142
143/*
144 * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
145 * per the GuC submission interface. A different allocation algorithm is used
146 * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
147 * partition the guc_id space. We believe the number of multi-lrc contexts in
148 * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
149 * multi-lrc.
150 */
151#define NUMBER_MULTI_LRC_GUC_ID(guc)((guc)->submission_state.num_guc_ids / 16) \
152 ((guc)->submission_state.num_guc_ids / 16)
153
154/*
155 * Below is a set of functions which control the GuC scheduling state which
156 * require a lock.
157 */
158#define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER(1UL << (0)) BIT(0)(1UL << (0))
159#define SCHED_STATE_DESTROYED(1UL << (1)) BIT(1)(1UL << (1))
160#define SCHED_STATE_PENDING_DISABLE(1UL << (2)) BIT(2)(1UL << (2))
161#define SCHED_STATE_BANNED(1UL << (3)) BIT(3)(1UL << (3))
162#define SCHED_STATE_ENABLED(1UL << (4)) BIT(4)(1UL << (4))
163#define SCHED_STATE_PENDING_ENABLE(1UL << (5)) BIT(5)(1UL << (5))
164#define SCHED_STATE_REGISTERED(1UL << (6)) BIT(6)(1UL << (6))
165#define SCHED_STATE_POLICY_REQUIRED(1UL << (7)) BIT(7)(1UL << (7))
166#define SCHED_STATE_BLOCKED_SHIFT8 8
167#define SCHED_STATE_BLOCKED(1UL << (8)) BIT(SCHED_STATE_BLOCKED_SHIFT)(1UL << (8))
168#define SCHED_STATE_BLOCKED_MASK(0xfff << 8) (0xfff << SCHED_STATE_BLOCKED_SHIFT8)
169
170static inline void init_sched_state(struct intel_context *ce)
171{
172 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
173 ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK(0xfff << 8);
174}
175
176__maybe_unused__attribute__((__unused__))
177static bool_Bool sched_state_is_init(struct intel_context *ce)
178{
179 /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
180 return !(ce->guc_state.sched_state &
181 ~(SCHED_STATE_BLOCKED_MASK(0xfff << 8) | SCHED_STATE_REGISTERED(1UL << (6))));
182}
183
184static inline bool_Bool
185context_wait_for_deregister_to_register(struct intel_context *ce)
186{
187 return ce->guc_state.sched_state &
188 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER(1UL << (0));
189}
190
191static inline void
192set_context_wait_for_deregister_to_register(struct intel_context *ce)
193{
194 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
195 ce->guc_state.sched_state |=
196 SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER(1UL << (0));
197}
198
199static inline void
200clr_context_wait_for_deregister_to_register(struct intel_context *ce)
201{
202 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
203 ce->guc_state.sched_state &=
204 ~SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER(1UL << (0));
205}
206
207static inline bool_Bool
208context_destroyed(struct intel_context *ce)
209{
210 return ce->guc_state.sched_state & SCHED_STATE_DESTROYED(1UL << (1));
211}
212
213static inline void
214set_context_destroyed(struct intel_context *ce)
215{
216 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
217 ce->guc_state.sched_state |= SCHED_STATE_DESTROYED(1UL << (1));
218}
219
220static inline bool_Bool context_pending_disable(struct intel_context *ce)
221{
222 return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE(1UL << (2));
223}
224
225static inline void set_context_pending_disable(struct intel_context *ce)
226{
227 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
228 ce->guc_state.sched_state |= SCHED_STATE_PENDING_DISABLE(1UL << (2));
229}
230
231static inline void clr_context_pending_disable(struct intel_context *ce)
232{
233 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
234 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_DISABLE(1UL << (2));
235}
236
237static inline bool_Bool context_banned(struct intel_context *ce)
238{
239 return ce->guc_state.sched_state & SCHED_STATE_BANNED(1UL << (3));
240}
241
242static inline void set_context_banned(struct intel_context *ce)
243{
244 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
245 ce->guc_state.sched_state |= SCHED_STATE_BANNED(1UL << (3));
246}
247
248static inline void clr_context_banned(struct intel_context *ce)
249{
250 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
251 ce->guc_state.sched_state &= ~SCHED_STATE_BANNED(1UL << (3));
252}
253
254static inline bool_Bool context_enabled(struct intel_context *ce)
255{
256 return ce->guc_state.sched_state & SCHED_STATE_ENABLED(1UL << (4));
257}
258
259static inline void set_context_enabled(struct intel_context *ce)
260{
261 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
262 ce->guc_state.sched_state |= SCHED_STATE_ENABLED(1UL << (4));
263}
264
265static inline void clr_context_enabled(struct intel_context *ce)
266{
267 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
268 ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED(1UL << (4));
269}
270
271static inline bool_Bool context_pending_enable(struct intel_context *ce)
272{
273 return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE(1UL << (5));
274}
275
276static inline void set_context_pending_enable(struct intel_context *ce)
277{
278 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
279 ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE(1UL << (5));
280}
281
282static inline void clr_context_pending_enable(struct intel_context *ce)
283{
284 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
285 ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE(1UL << (5));
286}
287
288static inline bool_Bool context_registered(struct intel_context *ce)
289{
290 return ce->guc_state.sched_state & SCHED_STATE_REGISTERED(1UL << (6));
291}
292
293static inline void set_context_registered(struct intel_context *ce)
294{
295 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
296 ce->guc_state.sched_state |= SCHED_STATE_REGISTERED(1UL << (6));
297}
298
299static inline void clr_context_registered(struct intel_context *ce)
300{
301 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
302 ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED(1UL << (6));
303}
304
305static inline bool_Bool context_policy_required(struct intel_context *ce)
306{
307 return ce->guc_state.sched_state & SCHED_STATE_POLICY_REQUIRED(1UL << (7));
308}
309
310static inline void set_context_policy_required(struct intel_context *ce)
311{
312 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
313 ce->guc_state.sched_state |= SCHED_STATE_POLICY_REQUIRED(1UL << (7));
314}
315
316static inline void clr_context_policy_required(struct intel_context *ce)
317{
318 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
319 ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED(1UL << (7));
320}
321
322static inline u32 context_blocked(struct intel_context *ce)
323{
324 return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK(0xfff << 8)) >>
325 SCHED_STATE_BLOCKED_SHIFT8;
326}
327
328static inline void incr_context_blocked(struct intel_context *ce)
329{
330 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
331
332 ce->guc_state.sched_state += SCHED_STATE_BLOCKED(1UL << (8));
333
334 GEM_BUG_ON(!context_blocked(ce))((void)0); /* Overflow check */
335}
336
337static inline void decr_context_blocked(struct intel_context *ce)
338{
339 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
340
341 GEM_BUG_ON(!context_blocked(ce))((void)0); /* Underflow check */
342
343 ce->guc_state.sched_state -= SCHED_STATE_BLOCKED(1UL << (8));
344}
345
346static inline bool_Bool context_has_committed_requests(struct intel_context *ce)
347{
348 return !!ce->guc_state.number_committed_requests;
349}
350
351static inline void incr_context_committed_requests(struct intel_context *ce)
352{
353 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
354 ++ce->guc_state.number_committed_requests;
355 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0)((void)0);
356}
357
358static inline void decr_context_committed_requests(struct intel_context *ce)
359{
360 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
361 --ce->guc_state.number_committed_requests;
362 GEM_BUG_ON(ce->guc_state.number_committed_requests < 0)((void)0);
363}
364
365static struct intel_context *
366request_to_scheduling_context(struct i915_request *rq)
367{
368 return intel_context_to_parent(rq->context);
369}
370
371static inline bool_Bool context_guc_id_invalid(struct intel_context *ce)
372{
373 return ce->guc_id.id == GUC_INVALID_CONTEXT_ID65535;
374}
375
376static inline void set_context_guc_id_invalid(struct intel_context *ce)
377{
378 ce->guc_id.id = GUC_INVALID_CONTEXT_ID65535;
379}
380
381static inline struct intel_guc *ce_to_guc(struct intel_context *ce)
382{
383 return &ce->engine->gt->uc.guc;
384}
385
386static inline struct i915_priolist *to_priolist(struct rb_node *rb)
387{
388 return rb_entry(rb, struct i915_priolist, node)({ const __typeof( ((struct i915_priolist *)0)->node ) *__mptr
= (rb); (struct i915_priolist *)( (char *)__mptr - __builtin_offsetof
(struct i915_priolist, node) );})
;
389}
390
391/*
392 * When using multi-lrc submission a scratch memory area is reserved in the
393 * parent's context state for the process descriptor, work queue, and handshake
394 * between the parent + children contexts to insert safe preemption points
395 * between each of the BBs. Currently the scratch area is sized to a page.
396 *
397 * The layout of this scratch area is below:
398 * 0 guc_process_desc
399 * + sizeof(struct guc_process_desc) child go
400 * + CACHELINE_BYTES child join[0]
401 * ...
402 * + CACHELINE_BYTES child join[n - 1]
403 * ... unused
404 * PARENT_SCRATCH_SIZE / 2 work queue start
405 * ... work queue
406 * PARENT_SCRATCH_SIZE - 1 work queue end
407 */
408#define WQ_SIZE((1 << 12) / 2) (PARENT_SCRATCH_SIZE(1 << 12) / 2)
409#define WQ_OFFSET((1 << 12) - ((1 << 12) / 2)) (PARENT_SCRATCH_SIZE(1 << 12) - WQ_SIZE((1 << 12) / 2))
410
411struct sync_semaphore {
412 u32 semaphore;
413 u8 unused[CACHELINE_BYTES64 - sizeof(u32)];
414};
415
416struct parent_scratch {
417 union guc_descs {
418 struct guc_sched_wq_desc wq_desc;
419 struct guc_process_desc_v69 pdesc;
420 } descs;
421
422 struct sync_semaphore go;
423 struct sync_semaphore join[MAX_ENGINE_INSTANCE8 + 1];
424
425 u8 unused[WQ_OFFSET((1 << 12) - ((1 << 12) / 2)) - sizeof(union guc_descs) -
426 sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE8 + 2)];
427
428 u32 wq[WQ_SIZE((1 << 12) / 2) / sizeof(u32)];
429};
430
431static u32 __get_parent_scratch_offset(struct intel_context *ce)
432{
433 GEM_BUG_ON(!ce->parallel.guc.parent_page)((void)0);
434
435 return ce->parallel.guc.parent_page * PAGE_SIZE(1 << 12);
436}
437
438static u32 __get_wq_offset(struct intel_context *ce)
439{
440 BUILD_BUG_ON(offsetof(struct parent_scratch, wq) != WQ_OFFSET)extern char _ctassert[(!(__builtin_offsetof(struct parent_scratch
, wq) != ((1 << 12) - ((1 << 12) / 2)))) ? 1 : -1
] __attribute__((__unused__))
;
441
442 return __get_parent_scratch_offset(ce) + WQ_OFFSET((1 << 12) - ((1 << 12) / 2));
443}
444
445static struct parent_scratch *
446__get_parent_scratch(struct intel_context *ce)
447{
448 BUILD_BUG_ON(sizeof(struct parent_scratch) != PARENT_SCRATCH_SIZE)extern char _ctassert[(!(sizeof(struct parent_scratch) != (1 <<
12))) ? 1 : -1 ] __attribute__((__unused__))
;
449 BUILD_BUG_ON(sizeof(struct sync_semaphore) != CACHELINE_BYTES)extern char _ctassert[(!(sizeof(struct sync_semaphore) != 64)
) ? 1 : -1 ] __attribute__((__unused__))
;
450
451 /*
452 * Need to subtract LRC_STATE_OFFSET here as the
453 * parallel.guc.parent_page is the offset into ce->state while
454 * ce->lrc_reg_reg is ce->state + LRC_STATE_OFFSET.
455 */
456 return (struct parent_scratch *)
457 (ce->lrc_reg_state +
458 ((__get_parent_scratch_offset(ce) -
459 LRC_STATE_OFFSET(((0) + (1)) * (1 << 12))) / sizeof(u32)));
460}
461
462static struct guc_process_desc_v69 *
463__get_process_desc_v69(struct intel_context *ce)
464{
465 struct parent_scratch *ps = __get_parent_scratch(ce);
466
467 return &ps->descs.pdesc;
468}
469
470static struct guc_sched_wq_desc *
471__get_wq_desc_v70(struct intel_context *ce)
472{
473 struct parent_scratch *ps = __get_parent_scratch(ce);
474
475 return &ps->descs.wq_desc;
476}
477
478static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
479{
480 /*
481 * Check for space in work queue. Caching a value of head pointer in
482 * intel_context structure in order reduce the number accesses to shared
483 * GPU memory which may be across a PCIe bus.
484 */
485#define AVAILABLE_SPACE \
486 CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)(((ce->parallel.guc.wqi_head) - ((ce->parallel.guc.wqi_tail
)+1)) & ((((1 << 12) / 2))-1))
487 if (wqi_size > AVAILABLE_SPACE) {
488 ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head)({ typeof(*ce->parallel.guc.wq_head) __tmp = *(volatile typeof
(*ce->parallel.guc.wq_head) *)&(*ce->parallel.guc.wq_head
); membar_datadep_consumer(); __tmp; })
;
489
490 if (wqi_size > AVAILABLE_SPACE)
491 return NULL((void *)0);
492 }
493#undef AVAILABLE_SPACE
494
495 return &__get_parent_scratch(ce)->wq[ce->parallel.guc.wqi_tail / sizeof(u32)];
496}
497
498static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
499{
500 struct intel_context *ce = xa_load(&guc->context_lookup, id);
501
502 GEM_BUG_ON(id >= GUC_MAX_CONTEXT_ID)((void)0);
503
504 return ce;
505}
506
507static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
508{
509 struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
510
511 if (!base)
512 return NULL((void *)0);
513
514 GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID)((void)0);
515
516 return &base[index];
517}
518
519static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
520{
521 u32 size;
522 int ret;
523
524 size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *(((sizeof(struct guc_lrc_desc_v69) * 65535) + ((1 << 12
) - 1)) & ~((1 << 12) - 1))
525 GUC_MAX_CONTEXT_ID)(((sizeof(struct guc_lrc_desc_v69) * 65535) + ((1 << 12
) - 1)) & ~((1 << 12) - 1))
;
526 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
527 (void **)&guc->lrc_desc_pool_vaddr_v69);
528 if (ret)
529 return ret;
530
531 return 0;
532}
533
534static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
535{
536 if (!guc->lrc_desc_pool_vaddr_v69)
537 return;
538
539 guc->lrc_desc_pool_vaddr_v69 = NULL((void *)0);
540 i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP(1UL << (0)));
541}
542
543static inline bool_Bool guc_submission_initialized(struct intel_guc *guc)
544{
545 return guc->submission_initialized;
546}
547
548static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
549{
550 struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
551
552 if (desc)
553 memset(desc, 0, sizeof(*desc))__builtin_memset((desc), (0), (sizeof(*desc)));
554}
555
556static inline bool_Bool ctx_id_mapped(struct intel_guc *guc, u32 id)
557{
558 return __get_context(guc, id);
559}
560
561static inline void set_ctx_id_mapping(struct intel_guc *guc, u32 id,
562 struct intel_context *ce)
563{
564 unsigned long flags;
565
566 /*
567 * xarray API doesn't have xa_save_irqsave wrapper, so calling the
568 * lower level functions directly.
569 */
570 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
571 __xa_store(&guc->context_lookup, id, ce, GFP_ATOMIC0x0002);
572 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
573}
574
575static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
576{
577 unsigned long flags;
578
579 if (unlikely(!guc_submission_initialized(guc))__builtin_expect(!!(!guc_submission_initialized(guc)), 0))
580 return;
581
582 _reset_lrc_desc_v69(guc, id);
583
584 /*
585 * xarray API doesn't have xa_erase_irqsave wrapper, so calling
586 * the lower level functions directly.
587 */
588 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
589 __xa_erase(&guc->context_lookup, id);
590 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
591}
592
593static void decr_outstanding_submission_g2h(struct intel_guc *guc)
594{
595 if (atomic_dec_and_test(&guc->outstanding_submission_g2h)(__sync_sub_and_fetch((&guc->outstanding_submission_g2h
), 1) == 0)
)
596 wake_up_all(&guc->ct.wq)wake_up(&guc->ct.wq);
597}
598
599static int guc_submission_send_busy_loop(struct intel_guc *guc,
600 const u32 *action,
601 u32 len,
602 u32 g2h_len_dw,
603 bool_Bool loop)
604{
605 /*
606 * We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
607 * so we don't handle the case where we don't get a reply because we
608 * aborted the send due to the channel being busy.
609 */
610 GEM_BUG_ON(g2h_len_dw && !loop)((void)0);
611
612 if (g2h_len_dw)
613 atomic_inc(&guc->outstanding_submission_g2h)__sync_fetch_and_add(&guc->outstanding_submission_g2h,
1)
;
614
615 return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
616}
617
618int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
619 atomic_t *wait_var,
620 bool_Bool interruptible,
621 long timeout)
622{
623 const int state = interruptible ?
624 TASK_INTERRUPTIBLE0x100 : TASK_UNINTERRUPTIBLE0;
625 DEFINE_WAIT(wait)struct wait_queue_entry wait = { .private = ({struct cpu_info
*__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})->ci_curproc, .func =
autoremove_wake_function, .entry = { &((wait).entry), &
((wait).entry) }, }
;
626
627 might_sleep()assertwaitok();
628 GEM_BUG_ON(timeout < 0)((void)0);
629
630 if (!atomic_read(wait_var)({ typeof(*(wait_var)) __tmp = *(volatile typeof(*(wait_var))
*)&(*(wait_var)); membar_datadep_consumer(); __tmp; })
)
631 return 0;
632
633 if (!timeout)
634 return -ETIME60;
635
636 for (;;) {
637 prepare_to_wait(&guc->ct.wq, &wait, state);
638
639 if (!atomic_read(wait_var)({ typeof(*(wait_var)) __tmp = *(volatile typeof(*(wait_var))
*)&(*(wait_var)); membar_datadep_consumer(); __tmp; })
)
640 break;
641
642 if (signal_pending_state(state, current)((state) & 0x100 ? (((({struct cpu_info *__ci; asm volatile
("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct
cpu_info, ci_self))); __ci;})->ci_curproc)->p_siglist |
(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc)->p_p->ps_siglist) & ~(({struct
cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci
) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;
})->ci_curproc)->p_sigmask) : 0)
) {
643 timeout = -EINTR4;
644 break;
645 }
646
647 if (!timeout) {
648 timeout = -ETIME60;
649 break;
650 }
651
652 timeout = io_schedule_timeout(timeout)schedule_timeout(timeout);
653 }
654 finish_wait(&guc->ct.wq, &wait);
655
656 return (timeout < 0) ? timeout : 0;
657}
658
659int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
660{
661 if (!intel_uc_uses_guc_submission(&guc_to_gt(guc)->uc))
662 return 0;
663
664 return intel_guc_wait_for_pending_msg(guc,
665 &guc->outstanding_submission_g2h,
666 true1, timeout);
667}
668
669static int guc_context_policy_init_v70(struct intel_context *ce, bool_Bool loop);
670static int try_context_registration(struct intel_context *ce, bool_Bool loop);
671
672static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
673{
674 int err = 0;
675 struct intel_context *ce = request_to_scheduling_context(rq);
676 u32 action[3];
677 int len = 0;
678 u32 g2h_len_dw = 0;
679 bool_Bool enabled;
680
681 lockdep_assert_held(&rq->engine->sched_engine->lock)do { (void)(&rq->engine->sched_engine->lock); } while
(0)
;
682
683 /*
684 * Corner case where requests were sitting in the priority list or a
685 * request resubmitted after the context was banned.
686 */
687 if (unlikely(!intel_context_is_schedulable(ce))__builtin_expect(!!(!intel_context_is_schedulable(ce)), 0)) {
688 i915_request_put(i915_request_mark_eio(rq));
689 intel_engine_signal_breadcrumbs(ce->engine);
690 return 0;
691 }
692
693 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref))((void)0);
694 GEM_BUG_ON(context_guc_id_invalid(ce))((void)0);
695
696 if (context_policy_required(ce)) {
697 err = guc_context_policy_init_v70(ce, false0);
698 if (err)
699 return err;
700 }
701
702 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
703
704 /*
705 * The request / context will be run on the hardware when scheduling
706 * gets enabled in the unblock. For multi-lrc we still submit the
707 * context to move the LRC tails.
708 */
709 if (unlikely(context_blocked(ce) && !intel_context_is_parent(ce))__builtin_expect(!!(context_blocked(ce) && !intel_context_is_parent
(ce)), 0)
)
710 goto out;
711
712 enabled = context_enabled(ce) || context_blocked(ce);
713
714 if (!enabled) {
715 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
716 action[len++] = ce->guc_id.id;
717 action[len++] = GUC_CONTEXT_ENABLE1;
718 set_context_pending_enable(ce);
719 intel_context_get(ce);
720 g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET2;
721 } else {
722 action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
723 action[len++] = ce->guc_id.id;
724 }
725
726 err = intel_guc_send_nb(guc, action, len, g2h_len_dw);
727 if (!enabled && !err) {
728 trace_intel_context_sched_enable(ce);
729 atomic_inc(&guc->outstanding_submission_g2h)__sync_fetch_and_add(&guc->outstanding_submission_g2h,
1)
;
730 set_context_enabled(ce);
731
732 /*
733 * Without multi-lrc KMD does the submission step (moving the
734 * lrc tail) so enabling scheduling is sufficient to submit the
735 * context. This isn't the case in multi-lrc submission as the
736 * GuC needs to move the tails, hence the need for another H2G
737 * to submit a multi-lrc context after enabling scheduling.
738 */
739 if (intel_context_is_parent(ce)) {
740 action[0] = INTEL_GUC_ACTION_SCHED_CONTEXT;
741 err = intel_guc_send_nb(guc, action, len - 1, 0);
742 }
743 } else if (!enabled) {
744 clr_context_pending_enable(ce);
745 intel_context_put(ce);
746 }
747 if (likely(!err)__builtin_expect(!!(!err), 1))
748 trace_i915_request_guc_submit(rq);
749
750out:
751 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
752 return err;
753}
754
755static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
756{
757 int ret = __guc_add_request(guc, rq);
758
759 if (unlikely(ret == -EBUSY)__builtin_expect(!!(ret == -16), 0)) {
760 guc->stalled_request = rq;
761 guc->submission_stall_reason = STALL_ADD_REQUEST;
762 }
763
764 return ret;
765}
766
767static inline void guc_set_lrc_tail(struct i915_request *rq)
768{
769 rq->context->lrc_reg_state[CTX_RING_TAIL(0x06 + 1)] =
770 intel_ring_set_tail(rq->ring, rq->tail);
771}
772
773static inline int rq_prio(const struct i915_request *rq)
774{
775 return rq->sched.attr.priority;
776}
777
778static bool_Bool is_multi_lrc_rq(struct i915_request *rq)
779{
780 return intel_context_is_parallel(rq->context);
781}
782
783static bool_Bool can_merge_rq(struct i915_request *rq,
784 struct i915_request *last)
785{
786 return request_to_scheduling_context(rq) ==
787 request_to_scheduling_context(last);
788}
789
790static u32 wq_space_until_wrap(struct intel_context *ce)
791{
792 return (WQ_SIZE((1 << 12) / 2) - ce->parallel.guc.wqi_tail);
793}
794
795static void write_wqi(struct intel_context *ce, u32 wqi_size)
796{
797 BUILD_BUG_ON(!is_power_of_2(WQ_SIZE))extern char _ctassert[(!(!(((((1 << 12) / 2)) != 0) &&
(((((1 << 12) / 2)) - 1) & (((1 << 12) / 2))
) == 0))) ? 1 : -1 ] __attribute__((__unused__))
;
798
799 /*
800 * Ensure WQI are visible before updating tail
801 */
802 intel_guc_write_barrier(ce_to_guc(ce));
803
804 ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
805 (WQ_SIZE((1 << 12) / 2) - 1);
806 WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail)({ typeof(*ce->parallel.guc.wq_tail) __tmp = (ce->parallel
.guc.wqi_tail); *(volatile typeof(*ce->parallel.guc.wq_tail
) *)&(*ce->parallel.guc.wq_tail) = __tmp; __tmp; })
;
807}
808
809static int guc_wq_noop_append(struct intel_context *ce)
810{
811 u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
812 u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
813
814 if (!wqi)
815 return -EBUSY16;
816
817 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw))((void)0);
818
819 *wqi = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP)(((typeof((((~0UL) >> (64 - (7) - 1)) & ((~0UL) <<
(0)))))(0x4) << (__builtin_ffsll((((~0UL) >> (64
- (7) - 1)) & ((~0UL) << (0)))) - 1)) & ((((~0UL
) >> (64 - (7) - 1)) & ((~0UL) << (0)))))
|
820 FIELD_PREP(WQ_LEN_MASK, len_dw)(((typeof((((~0UL) >> (64 - (26) - 1)) & ((~0UL) <<
(16)))))(len_dw) << (__builtin_ffsll((((~0UL) >>
(64 - (26) - 1)) & ((~0UL) << (16)))) - 1)) & (
(((~0UL) >> (64 - (26) - 1)) & ((~0UL) << (16
)))))
;
821 ce->parallel.guc.wqi_tail = 0;
822
823 return 0;
824}
825
826static int __guc_wq_item_append(struct i915_request *rq)
827{
828 struct intel_context *ce = request_to_scheduling_context(rq);
829 struct intel_context *child;
830 unsigned int wqi_size = (ce->parallel.number_children + 4) *
831 sizeof(u32);
832 u32 *wqi;
833 u32 len_dw = (wqi_size / sizeof(u32)) - 1;
834 int ret;
835
836 /* Ensure context is in correct state updating work queue */
837 GEM_BUG_ON(!atomic_read(&ce->guc_id.ref))((void)0);
838 GEM_BUG_ON(context_guc_id_invalid(ce))((void)0);
839 GEM_BUG_ON(context_wait_for_deregister_to_register(ce))((void)0);
840 GEM_BUG_ON(!ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id))((void)0);
841
842 /* Insert NOOP if this work queue item will wrap the tail pointer. */
843 if (wqi_size > wq_space_until_wrap(ce)) {
844 ret = guc_wq_noop_append(ce);
845 if (ret)
846 return ret;
847 }
848
849 wqi = get_wq_pointer(ce, wqi_size);
850 if (!wqi)
851 return -EBUSY16;
852
853 GEM_BUG_ON(!FIELD_FIT(WQ_LEN_MASK, len_dw))((void)0);
854
855 *wqi++ = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC)(((typeof((((~0UL) >> (64 - (7) - 1)) & ((~0UL) <<
(0)))))(0x5) << (__builtin_ffsll((((~0UL) >> (64
- (7) - 1)) & ((~0UL) << (0)))) - 1)) & ((((~0UL
) >> (64 - (7) - 1)) & ((~0UL) << (0)))))
|
856 FIELD_PREP(WQ_LEN_MASK, len_dw)(((typeof((((~0UL) >> (64 - (26) - 1)) & ((~0UL) <<
(16)))))(len_dw) << (__builtin_ffsll((((~0UL) >>
(64 - (26) - 1)) & ((~0UL) << (16)))) - 1)) & (
(((~0UL) >> (64 - (26) - 1)) & ((~0UL) << (16
)))))
;
857 *wqi++ = ce->lrc.lrca;
858 *wqi++ = FIELD_PREP(WQ_GUC_ID_MASK, ce->guc_id.id)(((typeof((((~0UL) >> (64 - (15) - 1)) & ((~0UL) <<
(0)))))(ce->guc_id.id) << (__builtin_ffsll((((~0UL)
>> (64 - (15) - 1)) & ((~0UL) << (0)))) - 1)
) & ((((~0UL) >> (64 - (15) - 1)) & ((~0UL) <<
(0)))))
|
859 FIELD_PREP(WQ_RING_TAIL_MASK, ce->ring->tail / sizeof(u64))(((typeof((((~0UL) >> (64 - (28) - 1)) & ((~0UL) <<
(18)))))(ce->ring->tail / sizeof(u64)) << (__builtin_ffsll
((((~0UL) >> (64 - (28) - 1)) & ((~0UL) << (18
)))) - 1)) & ((((~0UL) >> (64 - (28) - 1)) & ((
~0UL) << (18)))))
;
860 *wqi++ = 0; /* fence_id */
861 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
862 *wqi++ = child->ring->tail / sizeof(u64);
863
864 write_wqi(ce, wqi_size);
865
866 return 0;
867}
868
869static int guc_wq_item_append(struct intel_guc *guc,
870 struct i915_request *rq)
871{
872 struct intel_context *ce = request_to_scheduling_context(rq);
873 int ret;
874
875 if (unlikely(!intel_context_is_schedulable(ce))__builtin_expect(!!(!intel_context_is_schedulable(ce)), 0))
876 return 0;
877
878 ret = __guc_wq_item_append(rq);
879 if (unlikely(ret == -EBUSY)__builtin_expect(!!(ret == -16), 0)) {
880 guc->stalled_request = rq;
881 guc->submission_stall_reason = STALL_MOVE_LRC_TAIL;
882 }
883
884 return ret;
885}
886
887static bool_Bool multi_lrc_submit(struct i915_request *rq)
888{
889 struct intel_context *ce = request_to_scheduling_context(rq);
890
891 intel_ring_set_tail(rq->ring, rq->tail);
892
893 /*
894 * We expect the front end (execbuf IOCTL) to set this flag on the last
895 * request generated from a multi-BB submission. This indicates to the
896 * backend (GuC interface) that we should submit this context thus
897 * submitting all the requests generated in parallel.
898 */
899 return test_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL, &rq->fence.flags) ||
900 !intel_context_is_schedulable(ce);
901}
902
903static int guc_dequeue_one_context(struct intel_guc *guc)
904{
905 struct i915_sched_engine * const sched_engine = guc->sched_engine;
906 struct i915_request *last = NULL((void *)0);
907 bool_Bool submit = false0;
908 struct rb_node *rb;
909 int ret;
910
911 lockdep_assert_held(&sched_engine->lock)do { (void)(&sched_engine->lock); } while(0);
912
913 if (guc->stalled_request) {
914 submit = true1;
915 last = guc->stalled_request;
916
917 switch (guc->submission_stall_reason) {
918 case STALL_REGISTER_CONTEXT:
919 goto register_context;
920 case STALL_MOVE_LRC_TAIL:
921 goto move_lrc_tail;
922 case STALL_ADD_REQUEST:
923 goto add_request;
924 default:
925 MISSING_CASE(guc->submission_stall_reason)({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n"
, "guc->submission_stall_reason", (long)(guc->submission_stall_reason
)); __builtin_expect(!!(__ret), 0); })
;
926 }
927 }
928
929 while ((rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
)) {
930 struct i915_priolist *p = to_priolist(rb);
931 struct i915_request *rq, *rn;
932
933 priolist_for_each_request_consume(rq, rn, p)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&(p)->requests)->next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&(p)->requests
); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *)0)->
sched.link ) *__mptr = (rn->sched.link.next); (__typeof(*rn
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
{
934 if (last && !can_merge_rq(rq, last))
935 goto register_context;
936
937 list_del_init(&rq->sched.link);
938
939 __i915_request_submit(rq);
940
941 trace_i915_request_in(rq, 0);
942 last = rq;
943
944 if (is_multi_lrc_rq(rq)) {
945 /*
946 * We need to coalesce all multi-lrc requests in
947 * a relationship into a single H2G. We are
948 * guaranteed that all of these requests will be
949 * submitted sequentially.
950 */
951 if (multi_lrc_submit(rq)) {
952 submit = true1;
953 goto register_context;
954 }
955 } else {
956 submit = true1;
957 }
958 }
959
960 rb_erase_cached(&p->node, &sched_engine->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&sched_engine
->queue)->rb_root), (&p->node))
;
961 i915_priolist_free(p);
962 }
963
964register_context:
965 if (submit) {
966 struct intel_context *ce = request_to_scheduling_context(last);
967
968 if (unlikely(!ctx_id_mapped(guc, ce->guc_id.id) &&__builtin_expect(!!(!ctx_id_mapped(guc, ce->guc_id.id) &&
intel_context_is_schedulable(ce)), 0)
969 intel_context_is_schedulable(ce))__builtin_expect(!!(!ctx_id_mapped(guc, ce->guc_id.id) &&
intel_context_is_schedulable(ce)), 0)
) {
970 ret = try_context_registration(ce, false0);
971 if (unlikely(ret == -EPIPE)__builtin_expect(!!(ret == -32), 0)) {
972 goto deadlk;
973 } else if (ret == -EBUSY16) {
974 guc->stalled_request = last;
975 guc->submission_stall_reason =
976 STALL_REGISTER_CONTEXT;
977 goto schedule_tasklet;
978 } else if (ret != 0) {
979 GEM_WARN_ON(ret)({ __builtin_expect(!!(!!(ret)), 0); }); /* Unexpected */
980 goto deadlk;
981 }
982 }
983
984move_lrc_tail:
985 if (is_multi_lrc_rq(last)) {
986 ret = guc_wq_item_append(guc, last);
987 if (ret == -EBUSY16) {
988 goto schedule_tasklet;
989 } else if (ret != 0) {
990 GEM_WARN_ON(ret)({ __builtin_expect(!!(!!(ret)), 0); }); /* Unexpected */
991 goto deadlk;
992 }
993 } else {
994 guc_set_lrc_tail(last);
995 }
996
997add_request:
998 ret = guc_add_request(guc, last);
999 if (unlikely(ret == -EPIPE)__builtin_expect(!!(ret == -32), 0)) {
1000 goto deadlk;
1001 } else if (ret == -EBUSY16) {
1002 goto schedule_tasklet;
1003 } else if (ret != 0) {
1004 GEM_WARN_ON(ret)({ __builtin_expect(!!(!!(ret)), 0); }); /* Unexpected */
1005 goto deadlk;
1006 }
1007 }
1008
1009 guc->stalled_request = NULL((void *)0);
1010 guc->submission_stall_reason = STALL_NONE;
1011 return submit;
1012
1013deadlk:
1014 sched_engine->tasklet.callback = NULL((void *)0);
1015 tasklet_disable_nosync(&sched_engine->tasklet);
1016 return false0;
1017
1018schedule_tasklet:
1019 tasklet_schedule(&sched_engine->tasklet);
1020 return false0;
1021}
1022
1023static void guc_submission_tasklet(struct tasklet_struct *t)
1024{
1025 struct i915_sched_engine *sched_engine =
1026 from_tasklet(sched_engine, t, tasklet)({ const __typeof( ((typeof(*sched_engine) *)0)->tasklet )
*__mptr = (t); (typeof(*sched_engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*sched_engine), tasklet) );})
;
1027 unsigned long flags;
1028 bool_Bool loop;
1029
1030 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
1031
1032 do {
1033 loop = guc_dequeue_one_context(sched_engine->private_data);
1034 } while (loop);
1035
1036 i915_sched_engine_reset_on_empty(sched_engine);
1037
1038 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
1039}
1040
1041static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
1042{
1043 if (iir & GT_RENDER_USER_INTERRUPT(1 << 0))
1044 intel_engine_signal_breadcrumbs(engine);
1045}
1046
1047static void __guc_context_destroy(struct intel_context *ce);
1048static void release_guc_id(struct intel_guc *guc, struct intel_context *ce);
1049static void guc_signal_context_fence(struct intel_context *ce);
1050static void guc_cancel_context_requests(struct intel_context *ce);
1051static void guc_blocked_fence_complete(struct intel_context *ce);
1052
1053static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
1054{
1055 struct intel_context *ce;
1056 unsigned long index, flags;
1057 bool_Bool pending_disable, pending_enable, deregister, destroyed, banned;
1058
1059 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
1060 xa_for_each(&guc->context_lookup, index, ce)for (index = 0; ((ce) = xa_get_next(&guc->context_lookup
, &(index))) != ((void *)0); index++)
{
1061 /*
1062 * Corner case where the ref count on the object is zero but and
1063 * deregister G2H was lost. In this case we don't touch the ref
1064 * count and finish the destroy of the context.
1065 */
1066 bool_Bool do_put = kref_get_unless_zero(&ce->ref);
1067
1068 xa_unlock(&guc->context_lookup)do { mtx_leave(&(&guc->context_lookup)->xa_lock
); } while (0)
;
1069
1070 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
1071
1072 /*
1073 * Once we are at this point submission_disabled() is guaranteed
1074 * to be visible to all callers who set the below flags (see above
1075 * flush and flushes in reset_prepare). If submission_disabled()
1076 * is set, the caller shouldn't set these flags.
1077 */
1078
1079 destroyed = context_destroyed(ce);
1080 pending_enable = context_pending_enable(ce);
1081 pending_disable = context_pending_disable(ce);
1082 deregister = context_wait_for_deregister_to_register(ce);
1083 banned = context_banned(ce);
1084 init_sched_state(ce);
1085
1086 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
1087
1088 if (pending_enable || destroyed || deregister) {
1089 decr_outstanding_submission_g2h(guc);
1090 if (deregister)
1091 guc_signal_context_fence(ce);
1092 if (destroyed) {
1093 intel_gt_pm_put_async(guc_to_gt(guc));
1094 release_guc_id(guc, ce);
1095 __guc_context_destroy(ce);
1096 }
1097 if (pending_enable || deregister)
1098 intel_context_put(ce);
1099 }
1100
1101 /* Not mutualy exclusive with above if statement. */
1102 if (pending_disable) {
1103 guc_signal_context_fence(ce);
1104 if (banned) {
1105 guc_cancel_context_requests(ce);
1106 intel_engine_signal_breadcrumbs(ce->engine);
1107 }
1108 intel_context_sched_disable_unpin(ce);
1109 decr_outstanding_submission_g2h(guc);
1110
1111 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
1112 guc_blocked_fence_complete(ce);
1113 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
1114
1115 intel_context_put(ce);
1116 }
1117
1118 if (do_put)
1119 intel_context_put(ce);
1120 xa_lock(&guc->context_lookup)do { mtx_enter(&(&guc->context_lookup)->xa_lock
); } while (0)
;
1121 }
1122 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
1123}
1124
1125/*
1126 * GuC stores busyness stats for each engine at context in/out boundaries. A
1127 * context 'in' logs execution start time, 'out' adds in -> out delta to total.
1128 * i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
1129 * GuC.
1130 *
1131 * __i915_pmu_event_read samples engine busyness. When sampling, if context id
1132 * is valid (!= ~0) and start is non-zero, the engine is considered to be
1133 * active. For an active engine total busyness = total + (now - start), where
1134 * 'now' is the time at which the busyness is sampled. For inactive engine,
1135 * total busyness = total.
1136 *
1137 * All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
1138 *
1139 * The start and total values provided by GuC are 32 bits and wrap around in a
1140 * few minutes. Since perf pmu provides busyness as 64 bit monotonically
1141 * increasing ns values, there is a need for this implementation to account for
1142 * overflows and extend the GuC provided values to 64 bits before returning
1143 * busyness to the user. In order to do that, a worker runs periodically at
1144 * frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
1145 * 27 seconds for a gt clock frequency of 19.2 MHz).
1146 */
1147
1148#define WRAP_TIME_CLKS0xffffffffU U32_MAX0xffffffffU
1149#define POLL_TIME_CLKS(0xffffffffU >> 3) (WRAP_TIME_CLKS0xffffffffU >> 3)
1150
1151static void
1152__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
1153{
1154 u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp)((u32)(((guc->timestamp.gt_stamp) >> 16) >> 16
))
;
1155 u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp)((u32)(guc->timestamp.gt_stamp));
1156
1157 if (new_start == lower_32_bits(*prev_start)((u32)(*prev_start)))
1158 return;
1159
1160 /*
1161 * When gt is unparked, we update the gt timestamp and start the ping
1162 * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt
1163 * is unparked, all switched in contexts will have a start time that is
1164 * within +/- POLL_TIME_CLKS of the most recent gt_stamp.
1165 *
1166 * If neither gt_stamp nor new_start has rolled over, then the
1167 * gt_stamp_hi does not need to be adjusted, however if one of them has
1168 * rolled over, we need to adjust gt_stamp_hi accordingly.
1169 *
1170 * The below conditions address the cases of new_start rollover and
1171 * gt_stamp_last rollover respectively.
1172 */
1173 if (new_start < gt_stamp_last &&
1174 (new_start - gt_stamp_last) <= POLL_TIME_CLKS(0xffffffffU >> 3))
1175 gt_stamp_hi++;
1176
1177 if (new_start > gt_stamp_last &&
1178 (gt_stamp_last - new_start) <= POLL_TIME_CLKS(0xffffffffU >> 3) && gt_stamp_hi)
1179 gt_stamp_hi--;
1180
1181 *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
1182}
1183
1184#define record_read(map_, field_)iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_
)
\
1185 iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
1186
1187/*
1188 * GuC updates shared memory and KMD reads it. Since this is not synchronized,
1189 * we run into a race where the value read is inconsistent. Sometimes the
1190 * inconsistency is in reading the upper MSB bytes of the last_in value when
1191 * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper
1192 * 24 bits are zero. Since these are non-zero values, it is non-trivial to
1193 * determine validity of these values. Instead we read the values multiple times
1194 * until they are consistent. In test runs, 3 attempts results in consistent
1195 * values. The upper bound is set to 6 attempts and may need to be tuned as per
1196 * any new occurences.
1197 */
1198static void __get_engine_usage_record(struct intel_engine_cs *engine,
1199 u32 *last_in, u32 *id, u32 *total)
1200{
1201 STUB()do { printf("%s: stub\n", __func__); } while(0);
1202#ifdef notyet
1203 struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
1204 int i = 0;
1205
1206 do {
1207 *last_in = record_read(&rec_map, last_switch_in_stamp)iosys_map_rd_field(&rec_map, 0, struct guc_engine_usage_record
, last_switch_in_stamp)
;
1208 *id = record_read(&rec_map, current_context_index)iosys_map_rd_field(&rec_map, 0, struct guc_engine_usage_record
, current_context_index)
;
1209 *total = record_read(&rec_map, total_runtime)iosys_map_rd_field(&rec_map, 0, struct guc_engine_usage_record
, total_runtime)
;
1210
1211 if (record_read(&rec_map, last_switch_in_stamp)iosys_map_rd_field(&rec_map, 0, struct guc_engine_usage_record
, last_switch_in_stamp)
== *last_in &&
1212 record_read(&rec_map, current_context_index)iosys_map_rd_field(&rec_map, 0, struct guc_engine_usage_record
, current_context_index)
== *id &&
1213 record_read(&rec_map, total_runtime)iosys_map_rd_field(&rec_map, 0, struct guc_engine_usage_record
, total_runtime)
== *total)
1214 break;
1215 } while (++i < 6);
1216#endif
1217}
1218
1219static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
1220{
1221 struct intel_engine_guc_stats *stats = &engine->stats.guc;
1222 struct intel_guc *guc = &engine->gt->uc.guc;
1223 u32 last_switch, ctx_id, total;
1224
1225 lockdep_assert_held(&guc->timestamp.lock)do { (void)(&guc->timestamp.lock); } while(0);
1226
1227 __get_engine_usage_record(engine, &last_switch, &ctx_id, &total);
1228
1229 stats->running = ctx_id != ~0U && last_switch;
1230 if (stats->running)
1231 __extend_last_switch(guc, &stats->start_gt_clk, last_switch);
1232
1233 /*
1234 * Instead of adjusting the total for overflow, just add the
1235 * difference from previous sample stats->total_gt_clks
1236 */
1237 if (total && total != ~0U) {
1238 stats->total_gt_clks += (u32)(total - stats->prev_total);
1239 stats->prev_total = total;
1240 }
1241}
1242
1243static u32 gpm_timestamp_shift(struct intel_gt *gt)
1244{
1245 intel_wakeref_t wakeref;
1246 u32 reg, shift;
1247
1248 with_intel_runtime_pm(gt->uncore->rpm, wakeref)for ((wakeref) = intel_runtime_pm_get(gt->uncore->rpm);
(wakeref); intel_runtime_pm_put((gt->uncore->rpm), (wakeref
)), (wakeref) = 0)
1249 reg = intel_uncore_read(gt->uncore, RPM_CONFIG0((const i915_reg_t){ .reg = (0xd00) }));
1250
1251 shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK(0x3 << 1)) >>
1252 GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT1;
1253
1254 return 3 - shift;
1255}
1256
1257static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now)
1258{
1259 struct intel_gt *gt = guc_to_gt(guc);
1260 u32 gt_stamp_lo, gt_stamp_hi;
1261 u64 gpm_ts;
1262
1263 lockdep_assert_held(&guc->timestamp.lock)do { (void)(&guc->timestamp.lock); } while(0);
1264
1265 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp)((u32)(((guc->timestamp.gt_stamp) >> 16) >> 16
))
;
1266 gpm_ts = intel_uncore_read64_2x32(gt->uncore, MISC_STATUS0((const i915_reg_t){ .reg = (0xa500) }),
1267 MISC_STATUS1((const i915_reg_t){ .reg = (0xa504) })) >> guc->timestamp.shift;
1268 gt_stamp_lo = lower_32_bits(gpm_ts)((u32)(gpm_ts));
1269 *now = ktime_get();
1270
1271 if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)((u32)(guc->timestamp.gt_stamp)))
1272 gt_stamp_hi++;
1273
1274 guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo;
1275}
1276
1277/*
1278 * Unlike the execlist mode of submission total and active times are in terms of
1279 * gt clocks. The *now parameter is retained to return the cpu time at which the
1280 * busyness was sampled.
1281 */
1282static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
1283{
1284 struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
1285 struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
1286 struct intel_gt *gt = engine->gt;
1287 struct intel_guc *guc = &gt->uc.guc;
1288 u64 total, gt_stamp_saved;
1289 unsigned long flags;
1290 u32 reset_count;
1291 bool_Bool in_reset;
1292
1293 spin_lock_irqsave(&guc->timestamp.lock, flags)do { flags = 0; mtx_enter(&guc->timestamp.lock); } while
(0)
;
1294
1295 /*
1296 * If a reset happened, we risk reading partially updated engine
1297 * busyness from GuC, so we just use the driver stored copy of busyness.
1298 * Synchronize with gt reset using reset_count and the
1299 * I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
1300 * after I915_RESET_BACKOFF flag, so ensure that the reset_count is
1301 * usable by checking the flag afterwards.
1302 */
1303 reset_count = i915_reset_count(gpu_error);
1304 in_reset = test_bit(I915_RESET_BACKOFF0, &gt->reset.flags);
1305
1306 *now = ktime_get();
1307
1308 /*
1309 * The active busyness depends on start_gt_clk and gt_stamp.
1310 * gt_stamp is updated by i915 only when gt is awake and the
1311 * start_gt_clk is derived from GuC state. To get a consistent
1312 * view of activity, we query the GuC state only if gt is awake.
1313 */
1314 if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
1315 stats_saved = *stats;
1316 gt_stamp_saved = guc->timestamp.gt_stamp;
1317 /*
1318 * Update gt_clks, then gt timestamp to simplify the 'gt_stamp -
1319 * start_gt_clk' calculation below for active engines.
1320 */
1321 guc_update_engine_gt_clks(engine);
1322 guc_update_pm_timestamp(guc, now);
1323 intel_gt_pm_put_async(gt);
1324 if (i915_reset_count(gpu_error) != reset_count) {
1325 *stats = stats_saved;
1326 guc->timestamp.gt_stamp = gt_stamp_saved;
1327 }
1328 }
1329
1330 total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
1331 if (stats->running) {
1332 u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
1333
1334 total += intel_gt_clock_interval_to_ns(gt, clk);
1335 }
1336
1337 spin_unlock_irqrestore(&guc->timestamp.lock, flags)do { (void)(flags); mtx_leave(&guc->timestamp.lock); }
while (0)
;
1338
1339 return ns_to_ktime(total);
1340}
1341
1342static void __reset_guc_busyness_stats(struct intel_guc *guc)
1343{
1344 struct intel_gt *gt = guc_to_gt(guc);
1345 struct intel_engine_cs *engine;
1346 enum intel_engine_id id;
1347 unsigned long flags;
1348 ktime_t unused;
1349
1350 cancel_delayed_work_sync(&guc->timestamp.work);
1351
1352 spin_lock_irqsave(&guc->timestamp.lock, flags)do { flags = 0; mtx_enter(&guc->timestamp.lock); } while
(0)
;
1353
1354 guc_update_pm_timestamp(guc, &unused);
1355 for_each_engine(engine, gt, id)for ((id) = 0; (id) < I915_NUM_ENGINES; (id)++) if (!((engine
) = (gt)->engine[(id)])) {} else
{
1356 guc_update_engine_gt_clks(engine);
1357 engine->stats.guc.prev_total = 0;
1358 }
1359
1360 spin_unlock_irqrestore(&guc->timestamp.lock, flags)do { (void)(flags); mtx_leave(&guc->timestamp.lock); }
while (0)
;
1361}
1362
1363static void __update_guc_busyness_stats(struct intel_guc *guc)
1364{
1365 struct intel_gt *gt = guc_to_gt(guc);
1366 struct intel_engine_cs *engine;
1367 enum intel_engine_id id;
1368 unsigned long flags;
1369 ktime_t unused;
1370
1371 guc->timestamp.last_stat_jiffies = jiffies;
1372
1373 spin_lock_irqsave(&guc->timestamp.lock, flags)do { flags = 0; mtx_enter(&guc->timestamp.lock); } while
(0)
;
1374
1375 guc_update_pm_timestamp(guc, &unused);
1376 for_each_engine(engine, gt, id)for ((id) = 0; (id) < I915_NUM_ENGINES; (id)++) if (!((engine
) = (gt)->engine[(id)])) {} else
1377 guc_update_engine_gt_clks(engine);
1378
1379 spin_unlock_irqrestore(&guc->timestamp.lock, flags)do { (void)(flags); mtx_leave(&guc->timestamp.lock); }
while (0)
;
1380}
1381
1382static void guc_timestamp_ping(struct work_struct *wrk)
1383{
1384 struct intel_guc *guc = container_of(wrk, typeof(*guc),({ const __typeof( ((typeof(*guc) *)0)->timestamp.work.work
) *__mptr = (wrk); (typeof(*guc) *)( (char *)__mptr - __builtin_offsetof
(typeof(*guc), timestamp.work.work) );})
1385 timestamp.work.work)({ const __typeof( ((typeof(*guc) *)0)->timestamp.work.work
) *__mptr = (wrk); (typeof(*guc) *)( (char *)__mptr - __builtin_offsetof
(typeof(*guc), timestamp.work.work) );})
;
1386 struct intel_uc *uc = container_of(guc, typeof(*uc), guc)({ const __typeof( ((typeof(*uc) *)0)->guc ) *__mptr = (guc
); (typeof(*uc) *)( (char *)__mptr - __builtin_offsetof(typeof
(*uc), guc) );})
;
1387 struct intel_gt *gt = guc_to_gt(guc);
1388 intel_wakeref_t wakeref;
1389 int srcu, ret;
1390
1391 /*
1392 * Synchronize with gt reset to make sure the worker does not
1393 * corrupt the engine/guc stats.
1394 */
1395 ret = intel_gt_reset_trylock(gt, &srcu);
1396 if (ret)
1397 return;
1398
1399 with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(&gt->i915->runtime_pm
); (wakeref); intel_runtime_pm_put((&gt->i915->runtime_pm
), (wakeref)), (wakeref) = 0)
1400 __update_guc_busyness_stats(guc);
1401
1402 intel_gt_reset_unlock(gt, srcu);
1403
1404 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1405 guc->timestamp.ping_delay);
1406}
1407
1408static int guc_action_enable_usage_stats(struct intel_guc *guc)
1409{
1410 u32 offset = intel_guc_engine_usage_offset(guc);
1411 u32 action[] = {
1412 INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
1413 offset,
1414 0,
1415 };
1416
1417 return intel_guc_send(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])));
1418}
1419
1420static void guc_init_engine_stats(struct intel_guc *guc)
1421{
1422 struct intel_gt *gt = guc_to_gt(guc);
1423 intel_wakeref_t wakeref;
1424
1425 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1426 guc->timestamp.ping_delay);
1427
1428 with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(&gt->i915->runtime_pm
); (wakeref); intel_runtime_pm_put((&gt->i915->runtime_pm
), (wakeref)), (wakeref) = 0)
{
1429 int ret = guc_action_enable_usage_stats(guc);
1430
1431 if (ret)
1432 drm_err(&gt->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Failed to enable usage stats: %d!\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ret)
1433 "Failed to enable usage stats: %d!\n", ret)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Failed to enable usage stats: %d!\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ret)
;
1434 }
1435}
1436
1437void intel_guc_busyness_park(struct intel_gt *gt)
1438{
1439 struct intel_guc *guc = &gt->uc.guc;
1440
1441 if (!guc_submission_initialized(guc))
1442 return;
1443
1444 /*
1445 * There is a race with suspend flow where the worker runs after suspend
1446 * and causes an unclaimed register access warning. Cancel the worker
1447 * synchronously here.
1448 */
1449 cancel_delayed_work_sync(&guc->timestamp.work);
1450
1451 /*
1452 * Before parking, we should sample engine busyness stats if we need to.
1453 * We can skip it if we are less than half a ping from the last time we
1454 * sampled the busyness stats.
1455 */
1456 if (guc->timestamp.last_stat_jiffies &&
1457 !time_after(jiffies, guc->timestamp.last_stat_jiffies +
1458 (guc->timestamp.ping_delay / 2)))
1459 return;
1460
1461 __update_guc_busyness_stats(guc);
1462}
1463
1464void intel_guc_busyness_unpark(struct intel_gt *gt)
1465{
1466 struct intel_guc *guc = &gt->uc.guc;
1467 unsigned long flags;
1468 ktime_t unused;
1469
1470 if (!guc_submission_initialized(guc))
1471 return;
1472
1473 spin_lock_irqsave(&guc->timestamp.lock, flags)do { flags = 0; mtx_enter(&guc->timestamp.lock); } while
(0)
;
1474 guc_update_pm_timestamp(guc, &unused);
1475 spin_unlock_irqrestore(&guc->timestamp.lock, flags)do { (void)(flags); mtx_leave(&guc->timestamp.lock); }
while (0)
;
1476 mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
1477 guc->timestamp.ping_delay);
1478}
1479
1480static inline bool_Bool
1481submission_disabled(struct intel_guc *guc)
1482{
1483 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1484
1485 return unlikely(!sched_engine ||__builtin_expect(!!(!sched_engine || !__tasklet_is_enabled(&
sched_engine->tasklet) || intel_gt_is_wedged(guc_to_gt(guc
))), 0)
1486 !__tasklet_is_enabled(&sched_engine->tasklet) ||__builtin_expect(!!(!sched_engine || !__tasklet_is_enabled(&
sched_engine->tasklet) || intel_gt_is_wedged(guc_to_gt(guc
))), 0)
1487 intel_gt_is_wedged(guc_to_gt(guc)))__builtin_expect(!!(!sched_engine || !__tasklet_is_enabled(&
sched_engine->tasklet) || intel_gt_is_wedged(guc_to_gt(guc
))), 0)
;
1488}
1489
1490static void disable_submission(struct intel_guc *guc)
1491{
1492 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1493
1494 if (__tasklet_is_enabled(&sched_engine->tasklet)) {
1495 GEM_BUG_ON(!guc->ct.enabled)((void)0);
1496 __tasklet_disable_sync_once(&sched_engine->tasklet);
1497 sched_engine->tasklet.callback = NULL((void *)0);
1498 }
1499}
1500
1501static void enable_submission(struct intel_guc *guc)
1502{
1503 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1504 unsigned long flags;
1505
1506 spin_lock_irqsave(&guc->sched_engine->lock, flags)do { flags = 0; mtx_enter(&guc->sched_engine->lock)
; } while (0)
;
1507 sched_engine->tasklet.callback = guc_submission_tasklet;
1508 wmb()do { __asm volatile("sfence" ::: "memory"); } while (0); /* Make sure callback visible */
1509 if (!__tasklet_is_enabled(&sched_engine->tasklet) &&
1510 __tasklet_enable(&sched_engine->tasklet)) {
1511 GEM_BUG_ON(!guc->ct.enabled)((void)0);
1512
1513 /* And kick in case we missed a new request submission. */
1514 tasklet_hi_schedule(&sched_engine->tasklet);
1515 }
1516 spin_unlock_irqrestore(&guc->sched_engine->lock, flags)do { (void)(flags); mtx_leave(&guc->sched_engine->lock
); } while (0)
;
1517}
1518
1519static void guc_flush_submissions(struct intel_guc *guc)
1520{
1521 struct i915_sched_engine * const sched_engine = guc->sched_engine;
1522 unsigned long flags;
1523
1524 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
1525 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
1526}
1527
1528static void guc_flush_destroyed_contexts(struct intel_guc *guc);
1529
1530void intel_guc_submission_reset_prepare(struct intel_guc *guc)
1531{
1532 if (unlikely(!guc_submission_initialized(guc))__builtin_expect(!!(!guc_submission_initialized(guc)), 0)) {
1533 /* Reset called during driver load? GuC not yet initialised! */
1534 return;
1535 }
1536
1537 intel_gt_park_heartbeats(guc_to_gt(guc));
1538 disable_submission(guc);
1539 guc->interrupts.disable(guc);
1540 __reset_guc_busyness_stats(guc);
1541
1542 /* Flush IRQ handler */
1543 spin_lock_irq(guc_to_gt(guc)->irq_lock)mtx_enter(guc_to_gt(guc)->irq_lock);
1544 spin_unlock_irq(guc_to_gt(guc)->irq_lock)mtx_leave(guc_to_gt(guc)->irq_lock);
1545
1546 guc_flush_submissions(guc);
1547 guc_flush_destroyed_contexts(guc);
1548 flush_work(&guc->ct.requests.worker);
1549
1550 scrub_guc_desc_for_outstanding_g2h(guc);
1551}
1552
1553static struct intel_engine_cs *
1554guc_virtual_get_sibling(struct intel_engine_cs *ve, unsigned int sibling)
1555{
1556 struct intel_engine_cs *engine;
1557 intel_engine_mask_t tmp, mask = ve->mask;
1558 unsigned int num_siblings = 0;
1559
1560 for_each_engine_masked(engine, ve->gt, mask, tmp)for ((tmp) = (mask) & (ve->gt)->info.engine_mask; (
tmp) ? ((engine) = (ve->gt)->engine[({ int __idx = ffs(
tmp) - 1; tmp &= ~(1UL << (__idx)); __idx; })]), 1 :
0;)
1561 if (num_siblings++ == sibling)
1562 return engine;
1563
1564 return NULL((void *)0);
1565}
1566
1567static inline struct intel_engine_cs *
1568__context_to_physical_engine(struct intel_context *ce)
1569{
1570 struct intel_engine_cs *engine = ce->engine;
1571
1572 if (intel_engine_is_virtual(engine))
1573 engine = guc_virtual_get_sibling(engine, 0);
1574
1575 return engine;
1576}
1577
1578static void guc_reset_state(struct intel_context *ce, u32 head, bool_Bool scrub)
1579{
1580 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
1581
1582 if (!intel_context_is_schedulable(ce))
1583 return;
1584
1585 GEM_BUG_ON(!intel_context_is_pinned(ce))((void)0);
1586
1587 /*
1588 * We want a simple context + ring to execute the breadcrumb update.
1589 * We cannot rely on the context being intact across the GPU hang,
1590 * so clear it and rebuild just what we need for the breadcrumb.
1591 * All pending requests for this context will be zapped, and any
1592 * future request will be after userspace has had the opportunity
1593 * to recreate its own state.
1594 */
1595 if (scrub)
1596 lrc_init_regs(ce, engine, true1);
1597
1598 /* Rerun the request; its payload has been neutered (if guilty). */
1599 lrc_update_regs(ce, engine, head);
1600}
1601
1602static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
1603{
1604 if (!IS_GRAPHICS_VER(engine->i915, 11, 12)(((&(engine->i915)->__runtime)->graphics.ip.ver)
>= (11) && ((&(engine->i915)->__runtime
)->graphics.ip.ver) <= (12))
)
1605 return;
1606
1607 intel_engine_stop_cs(engine);
1608
1609 /*
1610 * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
1611 * to wait for any pending mi force wakeups
1612 */
1613 intel_engine_wait_for_pending_mi_fw(engine);
1614}
1615
1616static void guc_reset_nop(struct intel_engine_cs *engine)
1617{
1618}
1619
1620static void guc_rewind_nop(struct intel_engine_cs *engine, bool_Bool stalled)
1621{
1622}
1623
1624static void
1625__unwind_incomplete_requests(struct intel_context *ce)
1626{
1627 struct i915_request *rq, *rn;
1628 struct list_head *pl;
1629 int prio = I915_PRIORITY_INVALID((-0x7fffffff-1));
1630 struct i915_sched_engine * const sched_engine =
1631 ce->engine->sched_engine;
1632 unsigned long flags;
1633
1634 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
1635 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
1636 list_for_each_entry_safe_reverse(rq, rn,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&ce->guc_state.requests)->prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = ((rq)->sched.link.prev); (__typeof(
*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &(rq)->sched.link != (&ce->guc_state
.requests); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *
)0)->sched.link ) *__mptr = (rn->sched.link.prev); (__typeof
(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
1637 &ce->guc_state.requests,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&ce->guc_state.requests)->prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = ((rq)->sched.link.prev); (__typeof(
*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &(rq)->sched.link != (&ce->guc_state
.requests); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *
)0)->sched.link ) *__mptr = (rn->sched.link.prev); (__typeof
(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
1638 sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&ce->guc_state.requests)->prev); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = ((rq)->sched.link.prev); (__typeof(
*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &(rq)->sched.link != (&ce->guc_state
.requests); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *
)0)->sched.link ) *__mptr = (rn->sched.link.prev); (__typeof
(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
{
1639 if (i915_request_completed(rq))
1640 continue;
1641
1642 list_del_init(&rq->sched.link);
1643 __i915_request_unsubmit(rq);
1644
1645 /* Push the request back into the queue for later resubmission. */
1646 GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID)((void)0);
1647 if (rq_prio(rq) != prio) {
1648 prio = rq_prio(rq);
1649 pl = i915_sched_lookup_priolist(sched_engine, prio);
1650 }
1651 GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine))((void)0);
1652
1653 list_add(&rq->sched.link, pl);
1654 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1655 }
1656 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
1657 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
1658}
1659
1660static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
1661{
1662 bool_Bool guilty;
1663 struct i915_request *rq;
1664 unsigned long flags;
1665 u32 head;
1666 int i, number_children = ce->parallel.number_children;
1667 struct intel_context *parent = ce;
1668
1669 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
1670
1671 intel_context_get(ce);
1672
1673 /*
1674 * GuC will implicitly mark the context as non-schedulable when it sends
1675 * the reset notification. Make sure our state reflects this change. The
1676 * context will be marked enabled on resubmission.
1677 */
1678 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
1679 clr_context_enabled(ce);
1680 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
1681
1682 /*
1683 * For each context in the relationship find the hanging request
1684 * resetting each context / request as needed
1685 */
1686 for (i = 0; i < number_children + 1; ++i) {
1687 if (!intel_context_is_pinned(ce))
1688 goto next_context;
1689
1690 guilty = false0;
1691 rq = intel_context_get_active_request(ce);
1692 if (!rq) {
1693 head = ce->ring->tail;
1694 goto out_replay;
1695 }
1696
1697 if (i915_request_started(rq))
1698 guilty = stalled & ce->engine->mask;
1699
1700 GEM_BUG_ON(i915_active_is_idle(&ce->active))((void)0);
1701 head = intel_ring_wrap(ce->ring, rq->head);
1702
1703 __i915_request_reset(rq, guilty);
1704 i915_request_put(rq);
1705out_replay:
1706 guc_reset_state(ce, head, guilty);
1707next_context:
1708 if (i != number_children)
1709 ce = list_next_entry(ce, parallel.child_link)({ const __typeof( ((typeof(*(ce)) *)0)->parallel.child_link
) *__mptr = (((ce)->parallel.child_link.next)); (typeof(*
(ce)) *)( (char *)__mptr - __builtin_offsetof(typeof(*(ce)), parallel
.child_link) );})
;
1710 }
1711
1712 __unwind_incomplete_requests(parent);
1713 intel_context_put(parent);
1714}
1715
1716void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
1717{
1718 struct intel_context *ce;
1719 unsigned long index;
1720 unsigned long flags;
1721
1722 if (unlikely(!guc_submission_initialized(guc))__builtin_expect(!!(!guc_submission_initialized(guc)), 0)) {
1723 /* Reset called during driver load? GuC not yet initialised! */
1724 return;
1725 }
1726
1727 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
1728 xa_for_each(&guc->context_lookup, index, ce)for (index = 0; ((ce) = xa_get_next(&guc->context_lookup
, &(index))) != ((void *)0); index++)
{
1729 if (!kref_get_unless_zero(&ce->ref))
1730 continue;
1731
1732 xa_unlock(&guc->context_lookup)do { mtx_leave(&(&guc->context_lookup)->xa_lock
); } while (0)
;
1733
1734 if (intel_context_is_pinned(ce) &&
1735 !intel_context_is_child(ce))
1736 __guc_reset_context(ce, stalled);
1737
1738 intel_context_put(ce);
1739
1740 xa_lock(&guc->context_lookup)do { mtx_enter(&(&guc->context_lookup)->xa_lock
); } while (0)
;
1741 }
1742 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
1743
1744 /* GuC is blown away, drop all references to contexts */
1745 xa_destroy(&guc->context_lookup);
1746}
1747
1748static void guc_cancel_context_requests(struct intel_context *ce)
1749{
1750 struct i915_sched_engine *sched_engine = ce_to_guc(ce)->sched_engine;
1751 struct i915_request *rq;
1752 unsigned long flags;
1753
1754 /* Mark all executing requests as skipped. */
1755 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
1756 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
1757 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&ce->guc_state.requests)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&ce->guc_state
.requests); rq = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}))
1758 i915_request_put(i915_request_mark_eio(rq));
1759 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
1760 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
1761}
1762
1763static void
1764guc_cancel_sched_engine_requests(struct i915_sched_engine *sched_engine)
1765{
1766 struct i915_request *rq, *rn;
1767 struct rb_node *rb;
1768 unsigned long flags;
1769
1770 /* Can be called during boot if GuC fails to load */
1771 if (!sched_engine)
1772 return;
1773
1774 /*
1775 * Before we call engine->cancel_requests(), we should have exclusive
1776 * access to the submission state. This is arranged for us by the
1777 * caller disabling the interrupt generation, the tasklet and other
1778 * threads that may then access the same state, giving us a free hand
1779 * to reset state. However, we still need to let lockdep be aware that
1780 * we know this state may be accessed in hardirq context, so we
1781 * disable the irq around this manipulation and we want to keep
1782 * the spinlock focused on its duties and not accidentally conflate
1783 * coverage to the submission's irq state. (Similarly, although we
1784 * shouldn't need to disable irq around the manipulation of the
1785 * submission's irq state, we also wish to remind ourselves that
1786 * it is irq state.)
1787 */
1788 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
1789
1790 /* Flush the queued requests to the timeline list (for retiring). */
1791 while ((rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
)) {
1792 struct i915_priolist *p = to_priolist(rb);
1793
1794 priolist_for_each_request_consume(rq, rn, p)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&(p)->requests)->next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&(p)->requests
); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *)0)->
sched.link ) *__mptr = (rn->sched.link.next); (__typeof(*rn
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rn), sched
.link) );}))
{
1795 list_del_init(&rq->sched.link);
1796
1797 __i915_request_submit(rq);
1798
1799 i915_request_put(i915_request_mark_eio(rq));
1800 }
1801
1802 rb_erase_cached(&p->node, &sched_engine->queue)linux_root_RB_REMOVE((struct linux_root *)(&(&sched_engine
->queue)->rb_root), (&p->node))
;
1803 i915_priolist_free(p);
1804 }
1805
1806 /* Remaining _unready_ requests will be nop'ed when submitted */
1807
1808 sched_engine->queue_priority_hint = INT_MIN(-0x7fffffff-1);
1809 sched_engine->queue = RB_ROOT_CACHED(struct rb_root_cached) { ((void *)0) };
1810
1811 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
1812}
1813
1814void intel_guc_submission_cancel_requests(struct intel_guc *guc)
1815{
1816 struct intel_context *ce;
1817 unsigned long index;
1818 unsigned long flags;
1819
1820 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
1821 xa_for_each(&guc->context_lookup, index, ce)for (index = 0; ((ce) = xa_get_next(&guc->context_lookup
, &(index))) != ((void *)0); index++)
{
1822 if (!kref_get_unless_zero(&ce->ref))
1823 continue;
1824
1825 xa_unlock(&guc->context_lookup)do { mtx_leave(&(&guc->context_lookup)->xa_lock
); } while (0)
;
1826
1827 if (intel_context_is_pinned(ce) &&
1828 !intel_context_is_child(ce))
1829 guc_cancel_context_requests(ce);
1830
1831 intel_context_put(ce);
1832
1833 xa_lock(&guc->context_lookup)do { mtx_enter(&(&guc->context_lookup)->xa_lock
); } while (0)
;
1834 }
1835 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
1836
1837 guc_cancel_sched_engine_requests(guc->sched_engine);
1838
1839 /* GuC is blown away, drop all references to contexts */
1840 xa_destroy(&guc->context_lookup);
1841}
1842
1843void intel_guc_submission_reset_finish(struct intel_guc *guc)
1844{
1845 /* Reset called during driver load or during wedge? */
1846 if (unlikely(!guc_submission_initialized(guc) ||__builtin_expect(!!(!guc_submission_initialized(guc) || intel_gt_is_wedged
(guc_to_gt(guc))), 0)
1847 intel_gt_is_wedged(guc_to_gt(guc)))__builtin_expect(!!(!guc_submission_initialized(guc) || intel_gt_is_wedged
(guc_to_gt(guc))), 0)
) {
1848 return;
1849 }
1850
1851 /*
1852 * Technically possible for either of these values to be non-zero here,
1853 * but very unlikely + harmless. Regardless let's add a warn so we can
1854 * see in CI if this happens frequently / a precursor to taking down the
1855 * machine.
1856 */
1857 GEM_WARN_ON(atomic_read(&guc->outstanding_submission_g2h))({ __builtin_expect(!!(!!(({ typeof(*(&guc->outstanding_submission_g2h
)) __tmp = *(volatile typeof(*(&guc->outstanding_submission_g2h
)) *)&(*(&guc->outstanding_submission_g2h)); membar_datadep_consumer
(); __tmp; }))), 0); })
;
1858 atomic_set(&guc->outstanding_submission_g2h, 0)({ typeof(*(&guc->outstanding_submission_g2h)) __tmp =
((0)); *(volatile typeof(*(&guc->outstanding_submission_g2h
)) *)&(*(&guc->outstanding_submission_g2h)) = __tmp
; __tmp; })
;
1859
1860 intel_guc_global_policies_update(guc);
1861 enable_submission(guc);
1862 intel_gt_unpark_heartbeats(guc_to_gt(guc));
1863}
1864
1865static void destroyed_worker_func(struct work_struct *w);
1866static void reset_fail_worker_func(struct work_struct *w);
1867
1868/*
1869 * Set up the memory resources to be shared with the GuC (via the GGTT)
1870 * at firmware loading time.
1871 */
1872int intel_guc_submission_init(struct intel_guc *guc)
1873{
1874 struct intel_gt *gt = guc_to_gt(guc);
1875 int ret;
1876
1877 if (guc->submission_initialized)
1878 return 0;
1879
1880 if (GET_UC_VER(guc)((((guc)->fw.file_selected.patch_ver) | (((guc)->fw.file_selected
.minor_ver) << 8) | (((guc)->fw.file_selected.major_ver
) << 16)))
< MAKE_UC_VER(70, 0, 0)((0) | ((0) << 8) | ((70) << 16))) {
1881 ret = guc_lrc_desc_pool_create_v69(guc);
1882 if (ret)
1883 return ret;
1884 }
1885
1886 guc->submission_state.guc_ids_bitmap =
1887 bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc)((guc)->submission_state.num_guc_ids / 16), GFP_KERNEL(0x0001 | 0x0004));
1888 if (!guc->submission_state.guc_ids_bitmap) {
1889 ret = -ENOMEM12;
1890 goto destroy_pool;
1891 }
1892
1893 guc->timestamp.ping_delay = (POLL_TIME_CLKS(0xffffffffU >> 3) / gt->clock_frequency + 1) * HZhz;
1894 guc->timestamp.shift = gpm_timestamp_shift(gt);
1895 guc->submission_initialized = true1;
1896
1897 return 0;
1898
1899destroy_pool:
1900 guc_lrc_desc_pool_destroy_v69(guc);
1901
1902 return ret;
1903}
1904
1905void intel_guc_submission_fini(struct intel_guc *guc)
1906{
1907 if (!guc->submission_initialized)
1908 return;
1909
1910 guc_flush_destroyed_contexts(guc);
1911 guc_lrc_desc_pool_destroy_v69(guc);
1912 i915_sched_engine_put(guc->sched_engine);
1913 bitmap_free(guc->submission_state.guc_ids_bitmap);
1914 guc->submission_initialized = false0;
1915}
1916
1917static inline void queue_request(struct i915_sched_engine *sched_engine,
1918 struct i915_request *rq,
1919 int prio)
1920{
1921 GEM_BUG_ON(!list_empty(&rq->sched.link))((void)0);
1922 list_add_tail(&rq->sched.link,
1923 i915_sched_lookup_priolist(sched_engine, prio));
1924 set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
1925 tasklet_hi_schedule(&sched_engine->tasklet);
1926}
1927
1928static int guc_bypass_tasklet_submit(struct intel_guc *guc,
1929 struct i915_request *rq)
1930{
1931 int ret = 0;
1932
1933 __i915_request_submit(rq);
1934
1935 trace_i915_request_in(rq, 0);
1936
1937 if (is_multi_lrc_rq(rq)) {
1938 if (multi_lrc_submit(rq)) {
1939 ret = guc_wq_item_append(guc, rq);
1940 if (!ret)
1941 ret = guc_add_request(guc, rq);
1942 }
1943 } else {
1944 guc_set_lrc_tail(rq);
1945 ret = guc_add_request(guc, rq);
1946 }
1947
1948 if (unlikely(ret == -EPIPE)__builtin_expect(!!(ret == -32), 0))
1949 disable_submission(guc);
1950
1951 return ret;
1952}
1953
1954static bool_Bool need_tasklet(struct intel_guc *guc, struct i915_request *rq)
1955{
1956 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1957 struct intel_context *ce = request_to_scheduling_context(rq);
1958
1959 return submission_disabled(guc) || guc->stalled_request ||
1960 !i915_sched_engine_is_empty(sched_engine) ||
1961 !ctx_id_mapped(guc, ce->guc_id.id);
1962}
1963
1964static void guc_submit_request(struct i915_request *rq)
1965{
1966 struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
1967 struct intel_guc *guc = &rq->engine->gt->uc.guc;
1968 unsigned long flags;
1969
1970 /* Will be called from irq-context when using foreign fences. */
1971 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
1972
1973 if (need_tasklet(guc, rq))
1974 queue_request(sched_engine, rq, rq_prio(rq));
1975 else if (guc_bypass_tasklet_submit(guc, rq) == -EBUSY16)
1976 tasklet_hi_schedule(&sched_engine->tasklet);
1977
1978 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
1979}
1980
1981static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
1982{
1983 STUB()do { printf("%s: stub\n", __func__); } while(0);
1984 return -ENOSYS78;
1985#ifdef notyet
1986 int ret;
1987
1988 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
1989
1990 if (intel_context_is_parent(ce))
1991 ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
1992 NUMBER_MULTI_LRC_GUC_ID(guc)((guc)->submission_state.num_guc_ids / 16),
1993 order_base_2(ce->parallel.number_childrendrm_order(ce->parallel.number_children + 1)
1994 + 1)drm_order(ce->parallel.number_children + 1));
1995 else
1996 ret = ida_simple_get(&guc->submission_state.guc_ids,
1997 NUMBER_MULTI_LRC_GUC_ID(guc)((guc)->submission_state.num_guc_ids / 16),
1998 guc->submission_state.num_guc_ids,
1999 GFP_KERNEL(0x0001 | 0x0004) | __GFP_RETRY_MAYFAIL0 |
2000 __GFP_NOWARN0);
2001 if (unlikely(ret < 0)__builtin_expect(!!(ret < 0), 0))
2002 return ret;
2003
2004 ce->guc_id.id = ret;
2005 return 0;
2006#endif
2007}
2008
2009static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2010{
2011 STUB()do { printf("%s: stub\n", __func__); } while(0);
2012#ifdef notyet
2013 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2014
2015 if (!context_guc_id_invalid(ce)) {
2016 if (intel_context_is_parent(ce))
2017 bitmap_release_region(guc->submission_state.guc_ids_bitmap,
2018 ce->guc_id.id,
2019 order_base_2(ce->parallel.number_childrendrm_order(ce->parallel.number_children + 1)
2020 + 1)drm_order(ce->parallel.number_children + 1));
2021 else
2022 ida_simple_remove(&guc->submission_state.guc_ids,
2023 ce->guc_id.id);
2024 clr_ctx_id_mapping(guc, ce->guc_id.id);
2025 set_context_guc_id_invalid(ce);
2026 }
2027 if (!list_empty(&ce->guc_id.link))
2028 list_del_init(&ce->guc_id.link);
2029#endif
2030}
2031
2032static void release_guc_id(struct intel_guc *guc, struct intel_context *ce)
2033{
2034 unsigned long flags;
2035
2036 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
2037 __release_guc_id(guc, ce);
2038 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
2039}
2040
2041static int steal_guc_id(struct intel_guc *guc, struct intel_context *ce)
2042{
2043 struct intel_context *cn;
2044
2045 lockdep_assert_held(&guc->submission_state.lock)do { (void)(&guc->submission_state.lock); } while(0);
2046 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2047 GEM_BUG_ON(intel_context_is_parent(ce))((void)0);
2048
2049 if (!list_empty(&guc->submission_state.guc_id_list)) {
2050 cn = list_first_entry(&guc->submission_state.guc_id_list,({ const __typeof( ((struct intel_context *)0)->guc_id.link
) *__mptr = ((&guc->submission_state.guc_id_list)->
next); (struct intel_context *)( (char *)__mptr - __builtin_offsetof
(struct intel_context, guc_id.link) );})
2051 struct intel_context,({ const __typeof( ((struct intel_context *)0)->guc_id.link
) *__mptr = ((&guc->submission_state.guc_id_list)->
next); (struct intel_context *)( (char *)__mptr - __builtin_offsetof
(struct intel_context, guc_id.link) );})
2052 guc_id.link)({ const __typeof( ((struct intel_context *)0)->guc_id.link
) *__mptr = ((&guc->submission_state.guc_id_list)->
next); (struct intel_context *)( (char *)__mptr - __builtin_offsetof
(struct intel_context, guc_id.link) );})
;
2053
2054 GEM_BUG_ON(atomic_read(&cn->guc_id.ref))((void)0);
2055 GEM_BUG_ON(context_guc_id_invalid(cn))((void)0);
2056 GEM_BUG_ON(intel_context_is_child(cn))((void)0);
2057 GEM_BUG_ON(intel_context_is_parent(cn))((void)0);
2058
2059 list_del_init(&cn->guc_id.link);
2060 ce->guc_id.id = cn->guc_id.id;
2061
2062 spin_lock(&cn->guc_state.lock)mtx_enter(&cn->guc_state.lock);
2063 clr_context_registered(cn);
2064 spin_unlock(&cn->guc_state.lock)mtx_leave(&cn->guc_state.lock);
2065
2066 set_context_guc_id_invalid(cn);
2067
2068#ifdef CONFIG_DRM_I915_SELFTEST
2069 guc->number_guc_id_stolen++;
2070#endif
2071
2072 return 0;
2073 } else {
2074 return -EAGAIN35;
2075 }
2076}
2077
2078static int assign_guc_id(struct intel_guc *guc, struct intel_context *ce)
2079{
2080 int ret;
2081
2082 lockdep_assert_held(&guc->submission_state.lock)do { (void)(&guc->submission_state.lock); } while(0);
2083 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2084
2085 ret = new_guc_id(guc, ce);
2086 if (unlikely(ret < 0)__builtin_expect(!!(ret < 0), 0)) {
2087 if (intel_context_is_parent(ce))
2088 return -ENOSPC28;
2089
2090 ret = steal_guc_id(guc, ce);
2091 if (ret < 0)
2092 return ret;
2093 }
2094
2095 if (intel_context_is_parent(ce)) {
2096 struct intel_context *child;
2097 int i = 1;
2098
2099 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
2100 child->guc_id.id = ce->guc_id.id + i++;
2101 }
2102
2103 return 0;
2104}
2105
2106#define PIN_GUC_ID_TRIES4 4
2107static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2108{
2109 int ret = 0;
2110 unsigned long flags, tries = PIN_GUC_ID_TRIES4;
2111
2112 GEM_BUG_ON(atomic_read(&ce->guc_id.ref))((void)0);
2113
2114try_again:
2115 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
2116
2117 might_lock(&ce->guc_state.lock);
2118
2119 if (context_guc_id_invalid(ce)) {
2120 ret = assign_guc_id(guc, ce);
2121 if (ret)
2122 goto out_unlock;
2123 ret = 1; /* Indidcates newly assigned guc_id */
2124 }
2125 if (!list_empty(&ce->guc_id.link))
2126 list_del_init(&ce->guc_id.link);
2127 atomic_inc(&ce->guc_id.ref)__sync_fetch_and_add(&ce->guc_id.ref, 1);
2128
2129out_unlock:
2130 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
2131
2132 /*
2133 * -EAGAIN indicates no guc_id are available, let's retire any
2134 * outstanding requests to see if that frees up a guc_id. If the first
2135 * retire didn't help, insert a sleep with the timeslice duration before
2136 * attempting to retire more requests. Double the sleep period each
2137 * subsequent pass before finally giving up. The sleep period has max of
2138 * 100ms and minimum of 1ms.
2139 */
2140 if (ret == -EAGAIN35 && --tries) {
2141 if (PIN_GUC_ID_TRIES4 - tries > 1) {
2142 unsigned int timeslice_shifted =
2143 ce->engine->props.timeslice_duration_ms <<
2144 (PIN_GUC_ID_TRIES4 - tries - 2);
2145 unsigned int max = min_t(unsigned int, 100,({ unsigned int __min_a = (100); unsigned int __min_b = (timeslice_shifted
); __min_a < __min_b ? __min_a : __min_b; })
2146 timeslice_shifted)({ unsigned int __min_a = (100); unsigned int __min_b = (timeslice_shifted
); __min_a < __min_b ? __min_a : __min_b; })
;
2147
2148 drm_msleep(max_t(unsigned int, max, 1))mdelay(({ unsigned int __max_a = (max); unsigned int __max_b =
(1); __max_a > __max_b ? __max_a : __max_b; }))
;
2149 }
2150 intel_gt_retire_requests(guc_to_gt(guc));
2151 goto try_again;
2152 }
2153
2154 return ret;
2155}
2156
2157static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
2158{
2159 unsigned long flags;
2160
2161 GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0)((void)0);
2162 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2163
2164 if (unlikely(context_guc_id_invalid(ce) ||__builtin_expect(!!(context_guc_id_invalid(ce) || intel_context_is_parent
(ce)), 0)
2165 intel_context_is_parent(ce))__builtin_expect(!!(context_guc_id_invalid(ce) || intel_context_is_parent
(ce)), 0)
)
2166 return;
2167
2168 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
2169 if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) &&
2170 !atomic_read(&ce->guc_id.ref)({ typeof(*(&ce->guc_id.ref)) __tmp = *(volatile typeof
(*(&ce->guc_id.ref)) *)&(*(&ce->guc_id.ref)
); membar_datadep_consumer(); __tmp; })
)
2171 list_add_tail(&ce->guc_id.link,
2172 &guc->submission_state.guc_id_list);
2173 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
2174}
2175
2176static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
2177 struct intel_context *ce,
2178 u32 guc_id,
2179 u32 offset,
2180 bool_Bool loop)
2181{
2182 struct intel_context *child;
2183 u32 action[4 + MAX_ENGINE_INSTANCE8];
2184 int len = 0;
2185
2186 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE)((void)0);
2187
2188 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2189 action[len++] = guc_id;
2190 action[len++] = ce->parallel.number_children + 1;
2191 action[len++] = offset;
2192 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2193 offset += sizeof(struct guc_lrc_desc_v69);
2194 action[len++] = offset;
2195 }
2196
2197 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2198}
2199
2200static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
2201 struct intel_context *ce,
2202 struct guc_ctxt_registration_info *info,
2203 bool_Bool loop)
2204{
2205 struct intel_context *child;
2206 u32 action[13 + (MAX_ENGINE_INSTANCE8 * 2)];
2207 int len = 0;
2208 u32 next_id;
2209
2210 GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE)((void)0);
2211
2212 action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
2213 action[len++] = info->flags;
2214 action[len++] = info->context_idx;
2215 action[len++] = info->engine_class;
2216 action[len++] = info->engine_submit_mask;
2217 action[len++] = info->wq_desc_lo;
2218 action[len++] = info->wq_desc_hi;
2219 action[len++] = info->wq_base_lo;
2220 action[len++] = info->wq_base_hi;
2221 action[len++] = info->wq_size;
2222 action[len++] = ce->parallel.number_children + 1;
2223 action[len++] = info->hwlrca_lo;
2224 action[len++] = info->hwlrca_hi;
2225
2226 next_id = info->context_idx + 1;
2227 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2228 GEM_BUG_ON(next_id++ != child->guc_id.id)((void)0);
2229
2230 /*
2231 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2232 * only supports 32 bit currently.
2233 */
2234 action[len++] = lower_32_bits(child->lrc.lrca)((u32)(child->lrc.lrca));
2235 action[len++] = upper_32_bits(child->lrc.lrca)((u32)(((child->lrc.lrca) >> 16) >> 16));
2236 }
2237
2238 GEM_BUG_ON(len > ARRAY_SIZE(action))((void)0);
2239
2240 return guc_submission_send_busy_loop(guc, action, len, 0, loop);
2241}
2242
2243static int __guc_action_register_context_v69(struct intel_guc *guc,
2244 u32 guc_id,
2245 u32 offset,
2246 bool_Bool loop)
2247{
2248 u32 action[] = {
2249 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2250 guc_id,
2251 offset,
2252 };
2253
2254 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])),
2255 0, loop);
2256}
2257
2258static int __guc_action_register_context_v70(struct intel_guc *guc,
2259 struct guc_ctxt_registration_info *info,
2260 bool_Bool loop)
2261{
2262 u32 action[] = {
2263 INTEL_GUC_ACTION_REGISTER_CONTEXT,
2264 info->flags,
2265 info->context_idx,
2266 info->engine_class,
2267 info->engine_submit_mask,
2268 info->wq_desc_lo,
2269 info->wq_desc_hi,
2270 info->wq_base_lo,
2271 info->wq_base_hi,
2272 info->wq_size,
2273 info->hwlrca_lo,
2274 info->hwlrca_hi,
2275 };
2276
2277 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])),
2278 0, loop);
2279}
2280
2281static void prepare_context_registration_info_v69(struct intel_context *ce);
2282static void prepare_context_registration_info_v70(struct intel_context *ce,
2283 struct guc_ctxt_registration_info *info);
2284
2285static int
2286register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool_Bool loop)
2287{
2288 u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
2289 ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
2290
2291 prepare_context_registration_info_v69(ce);
2292
2293 if (intel_context_is_parent(ce))
2294 return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
2295 offset, loop);
2296 else
2297 return __guc_action_register_context_v69(guc, ce->guc_id.id,
2298 offset, loop);
2299}
2300
2301static int
2302register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool_Bool loop)
2303{
2304 struct guc_ctxt_registration_info info;
2305
2306 prepare_context_registration_info_v70(ce, &info);
2307
2308 if (intel_context_is_parent(ce))
2309 return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
2310 else
2311 return __guc_action_register_context_v70(guc, &info, loop);
2312}
2313
2314static int register_context(struct intel_context *ce, bool_Bool loop)
2315{
2316 struct intel_guc *guc = ce_to_guc(ce);
2317 int ret;
2318
2319 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2320 trace_intel_context_register(ce);
2321
2322 if (GET_UC_VER(guc)((((guc)->fw.file_selected.patch_ver) | (((guc)->fw.file_selected
.minor_ver) << 8) | (((guc)->fw.file_selected.major_ver
) << 16)))
>= MAKE_UC_VER(70, 0, 0)((0) | ((0) << 8) | ((70) << 16)))
2323 ret = register_context_v70(guc, ce, loop);
2324 else
2325 ret = register_context_v69(guc, ce, loop);
2326
2327 if (likely(!ret)__builtin_expect(!!(!ret), 1)) {
2328 unsigned long flags;
2329
2330 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
2331 set_context_registered(ce);
2332 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2333
2334 if (GET_UC_VER(guc)((((guc)->fw.file_selected.patch_ver) | (((guc)->fw.file_selected
.minor_ver) << 8) | (((guc)->fw.file_selected.major_ver
) << 16)))
>= MAKE_UC_VER(70, 0, 0)((0) | ((0) << 8) | ((70) << 16)))
2335 guc_context_policy_init_v70(ce, loop);
2336 }
2337
2338 return ret;
2339}
2340
2341static int __guc_action_deregister_context(struct intel_guc *guc,
2342 u32 guc_id)
2343{
2344 u32 action[] = {
2345 INTEL_GUC_ACTION_DEREGISTER_CONTEXT,
2346 guc_id,
2347 };
2348
2349 return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])),
2350 G2H_LEN_DW_DEREGISTER_CONTEXT1,
2351 true1);
2352}
2353
2354static int deregister_context(struct intel_context *ce, u32 guc_id)
2355{
2356 struct intel_guc *guc = ce_to_guc(ce);
2357
2358 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2359 trace_intel_context_deregister(ce);
2360
2361 return __guc_action_deregister_context(guc, guc_id);
2362}
2363
2364static inline void clear_children_join_go_memory(struct intel_context *ce)
2365{
2366 struct parent_scratch *ps = __get_parent_scratch(ce);
2367 int i;
2368
2369 ps->go.semaphore = 0;
2370 for (i = 0; i < ce->parallel.number_children + 1; ++i)
2371 ps->join[i].semaphore = 0;
2372}
2373
2374static inline u32 get_children_go_value(struct intel_context *ce)
2375{
2376 return __get_parent_scratch(ce)->go.semaphore;
2377}
2378
2379static inline u32 get_children_join_value(struct intel_context *ce,
2380 u8 child_index)
2381{
2382 return __get_parent_scratch(ce)->join[child_index].semaphore;
2383}
2384
2385struct context_policy {
2386 u32 count;
2387 struct guc_update_context_policy h2g;
2388};
2389
2390static u32 __guc_context_policy_action_size(struct context_policy *policy)
2391{
2392 size_t bytes = sizeof(policy->h2g.header) +
2393 (sizeof(policy->h2g.klv[0]) * policy->count);
2394
2395 return bytes / sizeof(u32);
2396}
2397
2398static void __guc_context_policy_start_klv(struct context_policy *policy, u16 guc_id)
2399{
2400 policy->h2g.header.action = INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
2401 policy->h2g.header.ctx_id = guc_id;
2402 policy->count = 0;
2403}
2404
2405#define MAKE_CONTEXT_POLICY_ADD(func, id) \
2406static void __guc_context_policy_add_##func(struct context_policy *policy, u32 data) \
2407{ \
2408 GEM_BUG_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS)((void)0); \
2409 policy->h2g.klv[policy->count].kl = \
2410 FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id)(((typeof((0xffff << 16)))(GUC_CONTEXT_POLICIES_KLV_ID_
##id) << (__builtin_ffsll((0xffff << 16)) - 1)) &
((0xffff << 16)))
| \
2411 FIELD_PREP(GUC_KLV_0_LEN, 1)(((typeof((0xffff << 0)))(1) << (__builtin_ffsll(
(0xffff << 0)) - 1)) & ((0xffff << 0)))
; \
2412 policy->h2g.klv[policy->count].value = data; \
2413 policy->count++; \
2414}
2415
2416MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
2417MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
2418MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
2419MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
2420
2421#undef MAKE_CONTEXT_POLICY_ADD
2422
2423static int __guc_context_set_context_policies(struct intel_guc *guc,
2424 struct context_policy *policy,
2425 bool_Bool loop)
2426{
2427 return guc_submission_send_busy_loop(guc, (u32 *)&policy->h2g,
2428 __guc_context_policy_action_size(policy),
2429 0, loop);
2430}
2431
2432static int guc_context_policy_init_v70(struct intel_context *ce, bool_Bool loop)
2433{
2434 struct intel_engine_cs *engine = ce->engine;
2435 struct intel_guc *guc = &engine->gt->uc.guc;
2436 struct context_policy policy;
2437 u32 execution_quantum;
2438 u32 preemption_timeout;
2439 unsigned long flags;
2440 int ret;
2441
2442 /* NB: For both of these, zero means disabled. */
2443 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,((void)0)
2444 execution_quantum))((void)0);
2445 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,((void)0)
2446 preemption_timeout))((void)0);
2447 execution_quantum = engine->props.timeslice_duration_ms * 1000;
2448 preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2449
2450 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
2451
2452 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
2453 __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
2454 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2455
2456 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION(1UL << (8)))
2457 __guc_context_policy_add_preempt_to_idle(&policy, 1);
2458
2459 ret = __guc_context_set_context_policies(guc, &policy, loop);
2460
2461 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
2462 if (ret != 0)
2463 set_context_policy_required(ce);
2464 else
2465 clr_context_policy_required(ce);
2466 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2467
2468 return ret;
2469}
2470
2471static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
2472 struct guc_lrc_desc_v69 *desc)
2473{
2474 desc->policy_flags = 0;
2475
2476 if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION(1UL << (8)))
2477 desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69(1UL << (0));
2478
2479 /* NB: For both of these, zero means disabled. */
2480 GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,((void)0)
2481 desc->execution_quantum))((void)0);
2482 GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,((void)0)
2483 desc->preemption_timeout))((void)0);
2484 desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
2485 desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
2486}
2487
2488static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
2489{
2490 /*
2491 * this matches the mapping we do in map_i915_prio_to_guc_prio()
2492 * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
2493 */
2494 switch (prio) {
2495 default:
2496 MISSING_CASE(prio)({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n"
, "prio", (long)(prio)); __builtin_expect(!!(__ret), 0); })
;
2497 fallthroughdo {} while (0);
2498 case GUC_CLIENT_PRIORITY_KMD_NORMAL2:
2499 return GEN12_CTX_PRIORITY_NORMAL(((typeof((((~0UL) >> (64 - (10) - 1)) & ((~0UL) <<
(9)))))(1) << (__builtin_ffsll((((~0UL) >> (64 -
(10) - 1)) & ((~0UL) << (9)))) - 1)) & ((((~0UL
) >> (64 - (10) - 1)) & ((~0UL) << (9)))))
;
2500 case GUC_CLIENT_PRIORITY_NORMAL3:
2501 return GEN12_CTX_PRIORITY_LOW(((typeof((((~0UL) >> (64 - (10) - 1)) & ((~0UL) <<
(9)))))(0) << (__builtin_ffsll((((~0UL) >> (64 -
(10) - 1)) & ((~0UL) << (9)))) - 1)) & ((((~0UL
) >> (64 - (10) - 1)) & ((~0UL) << (9)))))
;
2502 case GUC_CLIENT_PRIORITY_HIGH1:
2503 case GUC_CLIENT_PRIORITY_KMD_HIGH0:
2504 return GEN12_CTX_PRIORITY_HIGH(((typeof((((~0UL) >> (64 - (10) - 1)) & ((~0UL) <<
(9)))))(2) << (__builtin_ffsll((((~0UL) >> (64 -
(10) - 1)) & ((~0UL) << (9)))) - 1)) & ((((~0UL
) >> (64 - (10) - 1)) & ((~0UL) << (9)))))
;
2505 }
2506}
2507
2508static void prepare_context_registration_info_v69(struct intel_context *ce)
2509{
2510 struct intel_engine_cs *engine = ce->engine;
2511 struct intel_guc *guc = &engine->gt->uc.guc;
2512 u32 ctx_id = ce->guc_id.id;
2513 struct guc_lrc_desc_v69 *desc;
2514 struct intel_context *child;
2515
2516 GEM_BUG_ON(!engine->mask)((void)0);
2517
2518 /*
2519 * Ensure LRC + CT vmas are is same region as write barrier is done
2520 * based on CT vma region.
2521 */
2522 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=((void)0)
2523 i915_gem_object_is_lmem(ce->ring->vma->obj))((void)0);
2524
2525 desc = __get_lrc_desc_v69(guc, ctx_id);
2526 desc->engine_class = engine_class_to_guc_class(engine->class);
2527 desc->engine_submit_mask = engine->logical_mask;
2528 desc->hw_context_desc = ce->lrc.lrca;
2529 desc->priority = ce->guc_state.prio;
2530 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD(1UL << (0));
2531 guc_context_policy_init_v69(engine, desc);
2532
2533 /*
2534 * If context is a parent, we need to register a process descriptor
2535 * describing a work queue and register all child contexts.
2536 */
2537 if (intel_context_is_parent(ce)) {
2538 struct guc_process_desc_v69 *pdesc;
2539
2540 ce->parallel.guc.wqi_tail = 0;
2541 ce->parallel.guc.wqi_head = 0;
2542
2543 desc->process_desc = i915_ggtt_offset(ce->state) +
2544 __get_parent_scratch_offset(ce);
2545 desc->wq_addr = i915_ggtt_offset(ce->state) +
2546 __get_wq_offset(ce);
2547 desc->wq_size = WQ_SIZE((1 << 12) / 2);
2548
2549 pdesc = __get_process_desc_v69(ce);
2550 memset(pdesc, 0, sizeof(*(pdesc)))__builtin_memset((pdesc), (0), (sizeof(*(pdesc))));
2551 pdesc->stage_id = ce->guc_id.id;
2552 pdesc->wq_base_addr = desc->wq_addr;
2553 pdesc->wq_size_bytes = desc->wq_size;
2554 pdesc->wq_status = WQ_STATUS_ACTIVE1;
2555
2556 ce->parallel.guc.wq_head = &pdesc->head;
2557 ce->parallel.guc.wq_tail = &pdesc->tail;
2558 ce->parallel.guc.wq_status = &pdesc->wq_status;
2559
2560 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
{
2561 desc = __get_lrc_desc_v69(guc, child->guc_id.id);
2562
2563 desc->engine_class =
2564 engine_class_to_guc_class(engine->class);
2565 desc->hw_context_desc = child->lrc.lrca;
2566 desc->priority = ce->guc_state.prio;
2567 desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD(1UL << (0));
2568 guc_context_policy_init_v69(engine, desc);
2569 }
2570
2571 clear_children_join_go_memory(ce);
2572 }
2573}
2574
2575static void prepare_context_registration_info_v70(struct intel_context *ce,
2576 struct guc_ctxt_registration_info *info)
2577{
2578 struct intel_engine_cs *engine = ce->engine;
2579 struct intel_guc *guc = &engine->gt->uc.guc;
Value stored to 'guc' during its initialization is never read
2580 u32 ctx_id = ce->guc_id.id;
2581
2582 GEM_BUG_ON(!engine->mask)((void)0);
2583
2584 /*
2585 * Ensure LRC + CT vmas are is same region as write barrier is done
2586 * based on CT vma region.
2587 */
2588 GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=((void)0)
2589 i915_gem_object_is_lmem(ce->ring->vma->obj))((void)0);
2590
2591 memset(info, 0, sizeof(*info))__builtin_memset((info), (0), (sizeof(*info)));
2592 info->context_idx = ctx_id;
2593 info->engine_class = engine_class_to_guc_class(engine->class);
2594 info->engine_submit_mask = engine->logical_mask;
2595 /*
2596 * NB: GuC interface supports 64 bit LRCA even though i915/HW
2597 * only supports 32 bit currently.
2598 */
2599 info->hwlrca_lo = lower_32_bits(ce->lrc.lrca)((u32)(ce->lrc.lrca));
2600 info->hwlrca_hi = upper_32_bits(ce->lrc.lrca)((u32)(((ce->lrc.lrca) >> 16) >> 16));
2601 if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY(1UL << (10)))
2602 info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
2603 info->flags = CONTEXT_REGISTRATION_FLAG_KMD(1UL << (0));
2604
2605 /*
2606 * If context is a parent, we need to register a process descriptor
2607 * describing a work queue and register all child contexts.
2608 */
2609 if (intel_context_is_parent(ce)) {
2610 struct guc_sched_wq_desc *wq_desc;
2611 u64 wq_desc_offset, wq_base_offset;
2612
2613 ce->parallel.guc.wqi_tail = 0;
2614 ce->parallel.guc.wqi_head = 0;
2615
2616 wq_desc_offset = i915_ggtt_offset(ce->state) +
2617 __get_parent_scratch_offset(ce);
2618 wq_base_offset = i915_ggtt_offset(ce->state) +
2619 __get_wq_offset(ce);
2620 info->wq_desc_lo = lower_32_bits(wq_desc_offset)((u32)(wq_desc_offset));
2621 info->wq_desc_hi = upper_32_bits(wq_desc_offset)((u32)(((wq_desc_offset) >> 16) >> 16));
2622 info->wq_base_lo = lower_32_bits(wq_base_offset)((u32)(wq_base_offset));
2623 info->wq_base_hi = upper_32_bits(wq_base_offset)((u32)(((wq_base_offset) >> 16) >> 16));
2624 info->wq_size = WQ_SIZE((1 << 12) / 2);
2625
2626 wq_desc = __get_wq_desc_v70(ce);
2627 memset(wq_desc, 0, sizeof(*wq_desc))__builtin_memset((wq_desc), (0), (sizeof(*wq_desc)));
2628 wq_desc->wq_status = WQ_STATUS_ACTIVE1;
2629
2630 ce->parallel.guc.wq_head = &wq_desc->head;
2631 ce->parallel.guc.wq_tail = &wq_desc->tail;
2632 ce->parallel.guc.wq_status = &wq_desc->wq_status;
2633
2634 clear_children_join_go_memory(ce);
2635 }
2636}
2637
2638static int try_context_registration(struct intel_context *ce, bool_Bool loop)
2639{
2640 struct intel_engine_cs *engine = ce->engine;
2641 struct intel_runtime_pm *runtime_pm = engine->uncore->rpm;
2642 struct intel_guc *guc = &engine->gt->uc.guc;
2643 intel_wakeref_t wakeref;
2644 u32 ctx_id = ce->guc_id.id;
2645 bool_Bool context_registered;
2646 int ret = 0;
2647
2648 GEM_BUG_ON(!sched_state_is_init(ce))((void)0);
2649
2650 context_registered = ctx_id_mapped(guc, ctx_id);
2651
2652 clr_ctx_id_mapping(guc, ctx_id);
2653 set_ctx_id_mapping(guc, ctx_id, ce);
2654
2655 /*
2656 * The context_lookup xarray is used to determine if the hardware
2657 * context is currently registered. There are two cases in which it
2658 * could be registered either the guc_id has been stolen from another
2659 * context or the lrc descriptor address of this context has changed. In
2660 * either case the context needs to be deregistered with the GuC before
2661 * registering this context.
2662 */
2663 if (context_registered) {
2664 bool_Bool disabled;
2665 unsigned long flags;
2666
2667 trace_intel_context_steal_guc_id(ce);
2668 GEM_BUG_ON(!loop)((void)0);
2669
2670 /* Seal race with Reset */
2671 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
2672 disabled = submission_disabled(guc);
2673 if (likely(!disabled)__builtin_expect(!!(!disabled), 1)) {
2674 set_context_wait_for_deregister_to_register(ce);
2675 intel_context_get(ce);
2676 }
2677 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2678 if (unlikely(disabled)__builtin_expect(!!(disabled), 0)) {
2679 clr_ctx_id_mapping(guc, ctx_id);
2680 return 0; /* Will get registered later */
2681 }
2682
2683 /*
2684 * If stealing the guc_id, this ce has the same guc_id as the
2685 * context whose guc_id was stolen.
2686 */
2687 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
2688 ret = deregister_context(ce, ce->guc_id.id);
2689 if (unlikely(ret == -ENODEV)__builtin_expect(!!(ret == -19), 0))
2690 ret = 0; /* Will get registered later */
2691 } else {
2692 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
2693 ret = register_context(ce, loop);
2694 if (unlikely(ret == -EBUSY)__builtin_expect(!!(ret == -16), 0)) {
2695 clr_ctx_id_mapping(guc, ctx_id);
2696 } else if (unlikely(ret == -ENODEV)__builtin_expect(!!(ret == -19), 0)) {
2697 clr_ctx_id_mapping(guc, ctx_id);
2698 ret = 0; /* Will get registered later */
2699 }
2700 }
2701
2702 return ret;
2703}
2704
2705static int __guc_context_pre_pin(struct intel_context *ce,
2706 struct intel_engine_cs *engine,
2707 struct i915_gem_ww_ctx *ww,
2708 void **vaddr)
2709{
2710 return lrc_pre_pin(ce, engine, ww, vaddr);
2711}
2712
2713static int __guc_context_pin(struct intel_context *ce,
2714 struct intel_engine_cs *engine,
2715 void *vaddr)
2716{
2717 if (i915_ggtt_offset(ce->state) !=
2718 (ce->lrc.lrca & CTX_GTT_ADDRESS_MASK(((~0UL) >> (64 - (31) - 1)) & ((~0UL) << (12
)))
))
2719 set_bit(CONTEXT_LRCA_DIRTY9, &ce->flags);
2720
2721 /*
2722 * GuC context gets pinned in guc_request_alloc. See that function for
2723 * explaination of why.
2724 */
2725
2726 return lrc_pin(ce, engine, vaddr);
2727}
2728
2729static int guc_context_pre_pin(struct intel_context *ce,
2730 struct i915_gem_ww_ctx *ww,
2731 void **vaddr)
2732{
2733 return __guc_context_pre_pin(ce, ce->engine, ww, vaddr);
2734}
2735
2736static int guc_context_pin(struct intel_context *ce, void *vaddr)
2737{
2738 int ret = __guc_context_pin(ce, ce->engine, vaddr);
2739
2740 if (likely(!ret && !intel_context_is_barrier(ce))__builtin_expect(!!(!ret && !intel_context_is_barrier
(ce)), 1)
)
2741 intel_engine_pm_get(ce->engine);
2742
2743 return ret;
2744}
2745
2746static void guc_context_unpin(struct intel_context *ce)
2747{
2748 struct intel_guc *guc = ce_to_guc(ce);
2749
2750 unpin_guc_id(guc, ce);
2751 lrc_unpin(ce);
2752
2753 if (likely(!intel_context_is_barrier(ce))__builtin_expect(!!(!intel_context_is_barrier(ce)), 1))
2754 intel_engine_pm_put_async(ce->engine);
2755}
2756
2757static void guc_context_post_unpin(struct intel_context *ce)
2758{
2759 lrc_post_unpin(ce);
2760}
2761
2762static void __guc_context_sched_enable(struct intel_guc *guc,
2763 struct intel_context *ce)
2764{
2765 u32 action[] = {
2766 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2767 ce->guc_id.id,
2768 GUC_CONTEXT_ENABLE1
2769 };
2770
2771 trace_intel_context_sched_enable(ce);
2772
2773 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])),
2774 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET2, true1);
2775}
2776
2777static void __guc_context_sched_disable(struct intel_guc *guc,
2778 struct intel_context *ce,
2779 u16 guc_id)
2780{
2781 u32 action[] = {
2782 INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET,
2783 guc_id, /* ce->guc_id.id not stable */
2784 GUC_CONTEXT_DISABLE0
2785 };
2786
2787 GEM_BUG_ON(guc_id == GUC_INVALID_CONTEXT_ID)((void)0);
2788
2789 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2790 trace_intel_context_sched_disable(ce);
2791
2792 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])),
2793 G2H_LEN_DW_SCHED_CONTEXT_MODE_SET2, true1);
2794}
2795
2796static void guc_blocked_fence_complete(struct intel_context *ce)
2797{
2798 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
2799
2800 if (!i915_sw_fence_done(&ce->guc_state.blocked))
2801 i915_sw_fence_complete(&ce->guc_state.blocked);
2802}
2803
2804static void guc_blocked_fence_reinit(struct intel_context *ce)
2805{
2806 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
2807 GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked))((void)0);
2808
2809 /*
2810 * This fence is always complete unless a pending schedule disable is
2811 * outstanding. We arm the fence here and complete it when we receive
2812 * the pending schedule disable complete message.
2813 */
2814 i915_sw_fence_fini(&ce->guc_state.blocked);
2815 i915_sw_fence_reinit(&ce->guc_state.blocked);
2816 i915_sw_fence_await(&ce->guc_state.blocked);
2817 i915_sw_fence_commit(&ce->guc_state.blocked);
2818}
2819
2820static u16 prep_context_pending_disable(struct intel_context *ce)
2821{
2822 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
2823
2824 set_context_pending_disable(ce);
2825 clr_context_enabled(ce);
2826 guc_blocked_fence_reinit(ce);
2827 intel_context_get(ce);
2828
2829 return ce->guc_id.id;
2830}
2831
2832static struct i915_sw_fence *guc_context_block(struct intel_context *ce)
2833{
2834 struct intel_guc *guc = ce_to_guc(ce);
2835 unsigned long flags;
2836 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2837 intel_wakeref_t wakeref;
2838 u16 guc_id;
2839 bool_Bool enabled;
2840
2841 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2842
2843 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
2844
2845 incr_context_blocked(ce);
2846
2847 enabled = context_enabled(ce);
2848 if (unlikely(!enabled || submission_disabled(guc))__builtin_expect(!!(!enabled || submission_disabled(guc)), 0)) {
2849 if (enabled)
2850 clr_context_enabled(ce);
2851 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2852 return &ce->guc_state.blocked;
2853 }
2854
2855 /*
2856 * We add +2 here as the schedule disable complete CTB handler calls
2857 * intel_context_sched_disable_unpin (-2 to pin_count).
2858 */
2859 atomic_add(2, &ce->pin_count)__sync_fetch_and_add(&ce->pin_count, 2);
2860
2861 guc_id = prep_context_pending_disable(ce);
2862
2863 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2864
2865 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
2866 __guc_context_sched_disable(guc, ce, guc_id);
2867
2868 return &ce->guc_state.blocked;
2869}
2870
2871#define SCHED_STATE_MULTI_BLOCKED_MASK((0xfff << 8) & ~(1UL << (8))) \
2872 (SCHED_STATE_BLOCKED_MASK(0xfff << 8) & ~SCHED_STATE_BLOCKED(1UL << (8)))
2873#define SCHED_STATE_NO_UNBLOCK(((0xfff << 8) & ~(1UL << (8))) | (1UL <<
(2)) | (1UL << (3)))
\
2874 (SCHED_STATE_MULTI_BLOCKED_MASK((0xfff << 8) & ~(1UL << (8))) | \
2875 SCHED_STATE_PENDING_DISABLE(1UL << (2)) | \
2876 SCHED_STATE_BANNED(1UL << (3)))
2877
2878static bool_Bool context_cant_unblock(struct intel_context *ce)
2879{
2880 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
2881
2882 return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK(((0xfff << 8) & ~(1UL << (8))) | (1UL <<
(2)) | (1UL << (3)))
) ||
2883 context_guc_id_invalid(ce) ||
2884 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id) ||
2885 !intel_context_is_pinned(ce);
2886}
2887
2888static void guc_context_unblock(struct intel_context *ce)
2889{
2890 struct intel_guc *guc = ce_to_guc(ce);
2891 unsigned long flags;
2892 struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm;
2893 intel_wakeref_t wakeref;
2894 bool_Bool enable;
2895
2896 GEM_BUG_ON(context_enabled(ce))((void)0);
2897 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2898
2899 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
2900
2901 if (unlikely(submission_disabled(guc) ||__builtin_expect(!!(submission_disabled(guc) || context_cant_unblock
(ce)), 0)
2902 context_cant_unblock(ce))__builtin_expect(!!(submission_disabled(guc) || context_cant_unblock
(ce)), 0)
) {
2903 enable = false0;
2904 } else {
2905 enable = true1;
2906 set_context_pending_enable(ce);
2907 set_context_enabled(ce);
2908 intel_context_get(ce);
2909 }
2910
2911 decr_context_blocked(ce);
2912
2913 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2914
2915 if (enable) {
2916 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
2917 __guc_context_sched_enable(guc, ce);
2918 }
2919}
2920
2921static void guc_context_cancel_request(struct intel_context *ce,
2922 struct i915_request *rq)
2923{
2924 struct intel_context *block_context =
2925 request_to_scheduling_context(rq);
2926
2927 if (i915_sw_fence_signaled(&rq->submit)) {
2928 struct i915_sw_fence *fence;
2929
2930 intel_context_get(ce);
2931 fence = guc_context_block(block_context);
2932 i915_sw_fence_wait(fence);
2933 if (!i915_request_completed(rq)) {
2934 __i915_request_skip(rq);
2935 guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head),
2936 true1);
2937 }
2938
2939 guc_context_unblock(block_context);
2940 intel_context_put(ce);
2941 }
2942}
2943
2944static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
2945 u16 guc_id,
2946 u32 preemption_timeout)
2947{
2948 if (GET_UC_VER(guc)((((guc)->fw.file_selected.patch_ver) | (((guc)->fw.file_selected
.minor_ver) << 8) | (((guc)->fw.file_selected.major_ver
) << 16)))
>= MAKE_UC_VER(70, 0, 0)((0) | ((0) << 8) | ((70) << 16))) {
2949 struct context_policy policy;
2950
2951 __guc_context_policy_start_klv(&policy, guc_id);
2952 __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
2953 __guc_context_set_context_policies(guc, &policy, true1);
2954 } else {
2955 u32 action[] = {
2956 INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
2957 guc_id,
2958 preemption_timeout
2959 };
2960
2961 intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])), 0, true1);
2962 }
2963}
2964
2965static void
2966guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
2967 unsigned int preempt_timeout_ms)
2968{
2969 struct intel_guc *guc = ce_to_guc(ce);
2970 struct intel_runtime_pm *runtime_pm =
2971 &ce->engine->gt->i915->runtime_pm;
2972 intel_wakeref_t wakeref;
2973 unsigned long flags;
2974
2975 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
2976
2977 guc_flush_submissions(guc);
2978
2979 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
2980 set_context_banned(ce);
2981
2982 if (submission_disabled(guc) ||
2983 (!context_enabled(ce) && !context_pending_disable(ce))) {
2984 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2985
2986 guc_cancel_context_requests(ce);
2987 intel_engine_signal_breadcrumbs(ce->engine);
2988 } else if (!context_pending_disable(ce)) {
2989 u16 guc_id;
2990
2991 /*
2992 * We add +2 here as the schedule disable complete CTB handler
2993 * calls intel_context_sched_disable_unpin (-2 to pin_count).
2994 */
2995 atomic_add(2, &ce->pin_count)__sync_fetch_and_add(&ce->pin_count, 2);
2996
2997 guc_id = prep_context_pending_disable(ce);
2998 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
2999
3000 /*
3001 * In addition to disabling scheduling, set the preemption
3002 * timeout to the minimum value (1 us) so the banned context
3003 * gets kicked off the HW ASAP.
3004 */
3005 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
{
3006 __guc_context_set_preemption_timeout(guc, guc_id,
3007 preempt_timeout_ms);
3008 __guc_context_sched_disable(guc, ce, guc_id);
3009 }
3010 } else {
3011 if (!context_guc_id_invalid(ce))
3012 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
3013 __guc_context_set_preemption_timeout(guc,
3014 ce->guc_id.id,
3015 preempt_timeout_ms);
3016 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
3017 }
3018}
3019
3020static void guc_context_sched_disable(struct intel_context *ce)
3021{
3022 struct intel_guc *guc = ce_to_guc(ce);
3023 unsigned long flags;
3024 struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
3025 intel_wakeref_t wakeref;
3026 u16 guc_id;
3027
3028 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
3029
3030 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
3031
3032 /*
3033 * We have to check if the context has been disabled by another thread,
3034 * check if submssion has been disabled to seal a race with reset and
3035 * finally check if any more requests have been committed to the
3036 * context ensursing that a request doesn't slip through the
3037 * 'context_pending_disable' fence.
3038 */
3039 if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||__builtin_expect(!!(!context_enabled(ce) || submission_disabled
(guc) || context_has_committed_requests(ce)), 0)
3040 context_has_committed_requests(ce))__builtin_expect(!!(!context_enabled(ce) || submission_disabled
(guc) || context_has_committed_requests(ce)), 0)
) {
3041 clr_context_enabled(ce);
3042 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
3043 goto unpin;
3044 }
3045 guc_id = prep_context_pending_disable(ce);
3046
3047 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
3048
3049 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
3050 __guc_context_sched_disable(guc, ce, guc_id);
3051
3052 return;
3053unpin:
3054 intel_context_sched_disable_unpin(ce);
3055}
3056
3057static inline void guc_lrc_desc_unpin(struct intel_context *ce)
3058{
3059 struct intel_guc *guc = ce_to_guc(ce);
3060 struct intel_gt *gt = guc_to_gt(guc);
3061 unsigned long flags;
3062 bool_Bool disabled;
3063
3064 GEM_BUG_ON(!intel_gt_pm_is_awake(gt))((void)0);
3065 GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id))((void)0);
3066 GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id))((void)0);
3067 GEM_BUG_ON(context_enabled(ce))((void)0);
3068
3069 /* Seal race with Reset */
3070 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
3071 disabled = submission_disabled(guc);
3072 if (likely(!disabled)__builtin_expect(!!(!disabled), 1)) {
3073 __intel_gt_pm_get(gt);
3074 set_context_destroyed(ce);
3075 clr_context_registered(ce);
3076 }
3077 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
3078 if (unlikely(disabled)__builtin_expect(!!(disabled), 0)) {
3079 release_guc_id(guc, ce);
3080 __guc_context_destroy(ce);
3081 return;
3082 }
3083
3084 deregister_context(ce, ce->guc_id.id);
3085}
3086
3087static void __guc_context_destroy(struct intel_context *ce)
3088{
3089 GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] ||((void)0)
3090 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||((void)0)
3091 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||((void)0)
3092 ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL])((void)0);
3093 GEM_BUG_ON(ce->guc_state.number_committed_requests)((void)0);
3094
3095 lrc_fini(ce);
3096 intel_context_fini(ce);
3097
3098 if (intel_engine_is_virtual(ce->engine)) {
3099 struct guc_virtual_engine *ve =
3100 container_of(ce, typeof(*ve), context)({ const __typeof( ((typeof(*ve) *)0)->context ) *__mptr =
(ce); (typeof(*ve) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ve), context) );})
;
3101
3102 if (ve->base.breadcrumbs)
3103 intel_breadcrumbs_put(ve->base.breadcrumbs);
3104
3105 kfree(ve);
3106 } else {
3107 intel_context_free(ce);
3108 }
3109}
3110
3111static void guc_flush_destroyed_contexts(struct intel_guc *guc)
3112{
3113 struct intel_context *ce;
3114 unsigned long flags;
3115
3116 GEM_BUG_ON(!submission_disabled(guc) &&((void)0)
3117 guc_submission_initialized(guc))((void)0);
3118
3119 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3120 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
3121 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,(list_empty(&guc->submission_state.destroyed_contexts)
? ((void *)0) : ({ const __typeof( ((struct intel_context *)
0)->destroyed_link ) *__mptr = ((&guc->submission_state
.destroyed_contexts)->next); (struct intel_context *)( (char
*)__mptr - __builtin_offsetof(struct intel_context, destroyed_link
) );}))
3122 struct intel_context,(list_empty(&guc->submission_state.destroyed_contexts)
? ((void *)0) : ({ const __typeof( ((struct intel_context *)
0)->destroyed_link ) *__mptr = ((&guc->submission_state
.destroyed_contexts)->next); (struct intel_context *)( (char
*)__mptr - __builtin_offsetof(struct intel_context, destroyed_link
) );}))
3123 destroyed_link)(list_empty(&guc->submission_state.destroyed_contexts)
? ((void *)0) : ({ const __typeof( ((struct intel_context *)
0)->destroyed_link ) *__mptr = ((&guc->submission_state
.destroyed_contexts)->next); (struct intel_context *)( (char
*)__mptr - __builtin_offsetof(struct intel_context, destroyed_link
) );}))
;
3124 if (ce)
3125 list_del_init(&ce->destroyed_link);
3126 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
3127
3128 if (!ce)
3129 break;
3130
3131 release_guc_id(guc, ce);
3132 __guc_context_destroy(ce);
3133 }
3134}
3135
3136static void deregister_destroyed_contexts(struct intel_guc *guc)
3137{
3138 struct intel_context *ce;
3139 unsigned long flags;
3140
3141 while (!list_empty(&guc->submission_state.destroyed_contexts)) {
3142 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
3143 ce = list_first_entry_or_null(&guc->submission_state.destroyed_contexts,(list_empty(&guc->submission_state.destroyed_contexts)
? ((void *)0) : ({ const __typeof( ((struct intel_context *)
0)->destroyed_link ) *__mptr = ((&guc->submission_state
.destroyed_contexts)->next); (struct intel_context *)( (char
*)__mptr - __builtin_offsetof(struct intel_context, destroyed_link
) );}))
3144 struct intel_context,(list_empty(&guc->submission_state.destroyed_contexts)
? ((void *)0) : ({ const __typeof( ((struct intel_context *)
0)->destroyed_link ) *__mptr = ((&guc->submission_state
.destroyed_contexts)->next); (struct intel_context *)( (char
*)__mptr - __builtin_offsetof(struct intel_context, destroyed_link
) );}))
3145 destroyed_link)(list_empty(&guc->submission_state.destroyed_contexts)
? ((void *)0) : ({ const __typeof( ((struct intel_context *)
0)->destroyed_link ) *__mptr = ((&guc->submission_state
.destroyed_contexts)->next); (struct intel_context *)( (char
*)__mptr - __builtin_offsetof(struct intel_context, destroyed_link
) );}))
;
3146 if (ce)
3147 list_del_init(&ce->destroyed_link);
3148 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
3149
3150 if (!ce)
3151 break;
3152
3153 guc_lrc_desc_unpin(ce);
3154 }
3155}
3156
3157static void destroyed_worker_func(struct work_struct *w)
3158{
3159 struct intel_guc *guc = container_of(w, struct intel_guc,({ const __typeof( ((struct intel_guc *)0)->submission_state
.destroyed_worker ) *__mptr = (w); (struct intel_guc *)( (char
*)__mptr - __builtin_offsetof(struct intel_guc, submission_state
.destroyed_worker) );})
3160 submission_state.destroyed_worker)({ const __typeof( ((struct intel_guc *)0)->submission_state
.destroyed_worker ) *__mptr = (w); (struct intel_guc *)( (char
*)__mptr - __builtin_offsetof(struct intel_guc, submission_state
.destroyed_worker) );})
;
3161 struct intel_gt *gt = guc_to_gt(guc);
3162 int tmp;
3163
3164 with_intel_gt_pm(gt, tmp)for (tmp = 1, intel_gt_pm_get(gt); tmp; intel_gt_pm_put(gt), tmp
= 0)
3165 deregister_destroyed_contexts(guc);
3166}
3167
3168static void guc_context_destroy(struct kref *kref)
3169{
3170 struct intel_context *ce = container_of(kref, typeof(*ce), ref)({ const __typeof( ((typeof(*ce) *)0)->ref ) *__mptr = (kref
); (typeof(*ce) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ce), ref) );})
;
3171 struct intel_guc *guc = ce_to_guc(ce);
3172 unsigned long flags;
3173 bool_Bool destroy;
3174
3175 /*
3176 * If the guc_id is invalid this context has been stolen and we can free
3177 * it immediately. Also can be freed immediately if the context is not
3178 * registered with the GuC or the GuC is in the middle of a reset.
3179 */
3180 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
3181 destroy = submission_disabled(guc) || context_guc_id_invalid(ce) ||
3182 !ctx_id_mapped(guc, ce->guc_id.id);
3183 if (likely(!destroy)__builtin_expect(!!(!destroy), 1)) {
3184 if (!list_empty(&ce->guc_id.link))
3185 list_del_init(&ce->guc_id.link);
3186 list_add_tail(&ce->destroyed_link,
3187 &guc->submission_state.destroyed_contexts);
3188 } else {
3189 __release_guc_id(guc, ce);
3190 }
3191 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
3192 if (unlikely(destroy)__builtin_expect(!!(destroy), 0)) {
3193 __guc_context_destroy(ce);
3194 return;
3195 }
3196
3197 /*
3198 * We use a worker to issue the H2G to deregister the context as we can
3199 * take the GT PM for the first time which isn't allowed from an atomic
3200 * context.
3201 */
3202 queue_work(system_unbound_wq, &guc->submission_state.destroyed_worker);
3203}
3204
3205static int guc_context_alloc(struct intel_context *ce)
3206{
3207 return lrc_alloc(ce, ce->engine);
3208}
3209
3210static void __guc_context_set_prio(struct intel_guc *guc,
3211 struct intel_context *ce)
3212{
3213 if (GET_UC_VER(guc)((((guc)->fw.file_selected.patch_ver) | (((guc)->fw.file_selected
.minor_ver) << 8) | (((guc)->fw.file_selected.major_ver
) << 16)))
>= MAKE_UC_VER(70, 0, 0)((0) | ((0) << 8) | ((70) << 16))) {
3214 struct context_policy policy;
3215
3216 __guc_context_policy_start_klv(&policy, ce->guc_id.id);
3217 __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
3218 __guc_context_set_context_policies(guc, &policy, true1);
3219 } else {
3220 u32 action[] = {
3221 INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
3222 ce->guc_id.id,
3223 ce->guc_state.prio,
3224 };
3225
3226 guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action)(sizeof((action)) / sizeof((action)[0])), 0, true1);
3227 }
3228}
3229
3230static void guc_context_set_prio(struct intel_guc *guc,
3231 struct intel_context *ce,
3232 u8 prio)
3233{
3234 GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH ||((void)0)
3235 prio > GUC_CLIENT_PRIORITY_NORMAL)((void)0);
3236 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
3237
3238 if (ce->guc_state.prio == prio || submission_disabled(guc) ||
3239 !context_registered(ce)) {
3240 ce->guc_state.prio = prio;
3241 return;
3242 }
3243
3244 ce->guc_state.prio = prio;
3245 __guc_context_set_prio(guc, ce);
3246
3247 trace_intel_context_set_prio(ce);
3248}
3249
3250static inline u8 map_i915_prio_to_guc_prio(int prio)
3251{
3252 if (prio == I915_PRIORITY_NORMAL)
3253 return GUC_CLIENT_PRIORITY_KMD_NORMAL2;
3254 else if (prio < I915_PRIORITY_NORMAL)
3255 return GUC_CLIENT_PRIORITY_NORMAL3;
3256 else if (prio < I915_PRIORITY_DISPLAY)
3257 return GUC_CLIENT_PRIORITY_HIGH1;
3258 else
3259 return GUC_CLIENT_PRIORITY_KMD_HIGH0;
3260}
3261
3262static inline void add_context_inflight_prio(struct intel_context *ce,
3263 u8 guc_prio)
3264{
3265 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
3266 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count))((void)0);
3267
3268 ++ce->guc_state.prio_count[guc_prio];
3269
3270 /* Overflow protection */
3271 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio])({ __builtin_expect(!!(!!(!ce->guc_state.prio_count[guc_prio
])), 0); })
;
3272}
3273
3274static inline void sub_context_inflight_prio(struct intel_context *ce,
3275 u8 guc_prio)
3276{
3277 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
3278 GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count))((void)0);
3279
3280 /* Underflow protection */
3281 GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio])({ __builtin_expect(!!(!!(!ce->guc_state.prio_count[guc_prio
])), 0); })
;
3282
3283 --ce->guc_state.prio_count[guc_prio];
3284}
3285
3286static inline void update_context_prio(struct intel_context *ce)
3287{
3288 struct intel_guc *guc = &ce->engine->gt->uc.guc;
3289 int i;
3290
3291 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0)extern char _ctassert[(!(0 != 0)) ? 1 : -1 ] __attribute__((__unused__
))
;
3292 BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL)extern char _ctassert[(!(0 > 3)) ? 1 : -1 ] __attribute__(
(__unused__))
;
3293
3294 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
3295
3296 for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count)(sizeof((ce->guc_state.prio_count)) / sizeof((ce->guc_state
.prio_count)[0]))
; ++i) {
3297 if (ce->guc_state.prio_count[i]) {
3298 guc_context_set_prio(guc, ce, i);
3299 break;
3300 }
3301 }
3302}
3303
3304static inline bool_Bool new_guc_prio_higher(u8 old_guc_prio, u8 new_guc_prio)
3305{
3306 /* Lower value is higher priority */
3307 return new_guc_prio < old_guc_prio;
3308}
3309
3310static void add_to_context(struct i915_request *rq)
3311{
3312 struct intel_context *ce = request_to_scheduling_context(rq);
3313 u8 new_guc_prio = map_i915_prio_to_guc_prio(rq_prio(rq));
3314
3315 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
3316 GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI)((void)0);
3317
3318 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
3319 list_move_tail(&rq->sched.link, &ce->guc_state.requests);
3320
3321 if (rq->guc_prio == GUC_PRIO_INIT0xff) {
3322 rq->guc_prio = new_guc_prio;
3323 add_context_inflight_prio(ce, rq->guc_prio);
3324 } else if (new_guc_prio_higher(rq->guc_prio, new_guc_prio)) {
3325 sub_context_inflight_prio(ce, rq->guc_prio);
3326 rq->guc_prio = new_guc_prio;
3327 add_context_inflight_prio(ce, rq->guc_prio);
3328 }
3329 update_context_prio(ce);
3330
3331 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
3332}
3333
3334static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce)
3335{
3336 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
3337
3338 if (rq->guc_prio != GUC_PRIO_INIT0xff &&
3339 rq->guc_prio != GUC_PRIO_FINI0xfe) {
3340 sub_context_inflight_prio(ce, rq->guc_prio);
3341 update_context_prio(ce);
3342 }
3343 rq->guc_prio = GUC_PRIO_FINI0xfe;
3344}
3345
3346static void remove_from_context(struct i915_request *rq)
3347{
3348 struct intel_context *ce = request_to_scheduling_context(rq);
3349
3350 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
3351
3352 spin_lock_irq(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
3353
3354 list_del_init(&rq->sched.link);
3355 clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
3356
3357 /* Prevent further __await_execution() registering a cb, then flush */
3358 set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
3359
3360 guc_prio_fini(rq, ce);
3361
3362 decr_context_committed_requests(ce);
3363
3364 spin_unlock_irq(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
3365
3366 atomic_dec(&ce->guc_id.ref)__sync_fetch_and_sub(&ce->guc_id.ref, 1);
3367 i915_request_notify_execute_cb_imm(rq);
3368}
3369
3370static const struct intel_context_ops guc_context_ops = {
3371 .alloc = guc_context_alloc,
3372
3373 .pre_pin = guc_context_pre_pin,
3374 .pin = guc_context_pin,
3375 .unpin = guc_context_unpin,
3376 .post_unpin = guc_context_post_unpin,
3377
3378 .revoke = guc_context_revoke,
3379
3380 .cancel_request = guc_context_cancel_request,
3381
3382 .enter = intel_context_enter_engine,
3383 .exit = intel_context_exit_engine,
3384
3385 .sched_disable = guc_context_sched_disable,
3386
3387 .reset = lrc_reset,
3388 .destroy = guc_context_destroy,
3389
3390 .create_virtual = guc_create_virtual,
3391 .create_parallel = guc_create_parallel,
3392};
3393
3394static void submit_work_cb(struct irq_work *wrk)
3395{
3396 struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work)({ const __typeof( ((typeof(*rq) *)0)->submit_work ) *__mptr
= (wrk); (typeof(*rq) *)( (char *)__mptr - __builtin_offsetof
(typeof(*rq), submit_work) );})
;
3397
3398 might_lock(&rq->engine->sched_engine->lock);
3399 i915_sw_fence_complete(&rq->submit);
3400}
3401
3402static void __guc_signal_context_fence(struct intel_context *ce)
3403{
3404 struct i915_request *rq, *rn;
3405
3406 lockdep_assert_held(&ce->guc_state.lock)do { (void)(&ce->guc_state.lock); } while(0);
3407
3408 if (!list_empty(&ce->guc_state.fences))
3409 trace_intel_context_fence_release(ce);
3410
3411 /*
3412 * Use an IRQ to ensure locking order of sched_engine->lock ->
3413 * ce->guc_state.lock is preserved.
3414 */
3415 list_for_each_entry_safe(rq, rn, &ce->guc_state.fences,for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->guc_fence_link
) *__mptr = ((&ce->guc_state.fences)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), guc_fence_link
) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->guc_fence_link
) *__mptr = (rq->guc_fence_link.next); (__typeof(*rq) *)(
(char *)__mptr - __builtin_offsetof(__typeof(*rq), guc_fence_link
) );}); &rq->guc_fence_link != (&ce->guc_state.
fences); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *)0
)->guc_fence_link ) *__mptr = (rn->guc_fence_link.next)
; (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rn), guc_fence_link) );}))
3416 guc_fence_link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->guc_fence_link
) *__mptr = ((&ce->guc_state.fences)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), guc_fence_link
) );}), rn = ({ const __typeof( ((__typeof(*rq) *)0)->guc_fence_link
) *__mptr = (rq->guc_fence_link.next); (__typeof(*rq) *)(
(char *)__mptr - __builtin_offsetof(__typeof(*rq), guc_fence_link
) );}); &rq->guc_fence_link != (&ce->guc_state.
fences); rq = rn, rn = ({ const __typeof( ((__typeof(*rn) *)0
)->guc_fence_link ) *__mptr = (rn->guc_fence_link.next)
; (__typeof(*rn) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*rn), guc_fence_link) );}))
{
3417 list_del(&rq->guc_fence_link);
3418 irq_work_queue(&rq->submit_work);
3419 }
3420
3421 INIT_LIST_HEAD(&ce->guc_state.fences);
3422}
3423
3424static void guc_signal_context_fence(struct intel_context *ce)
3425{
3426 unsigned long flags;
3427
3428 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
3429
3430 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
3431 clr_context_wait_for_deregister_to_register(ce);
3432 __guc_signal_context_fence(ce);
3433 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
3434}
3435
3436static bool_Bool context_needs_register(struct intel_context *ce, bool_Bool new_guc_id)
3437{
3438 return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY9, &ce->flags) ||
3439 !ctx_id_mapped(ce_to_guc(ce), ce->guc_id.id)) &&
3440 !submission_disabled(ce_to_guc(ce));
3441}
3442
3443static void guc_context_init(struct intel_context *ce)
3444{
3445 const struct i915_gem_context *ctx;
3446 int prio = I915_CONTEXT_DEFAULT_PRIORITY0;
3447
3448 rcu_read_lock();
3449 ctx = rcu_dereference(ce->gem_context)(ce->gem_context);
3450 if (ctx)
3451 prio = ctx->sched.priority;
3452 rcu_read_unlock();
3453
3454 ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
3455 set_bit(CONTEXT_GUC_INIT10, &ce->flags);
3456}
3457
3458static int guc_request_alloc(struct i915_request *rq)
3459{
3460 struct intel_context *ce = request_to_scheduling_context(rq);
3461 struct intel_guc *guc = ce_to_guc(ce);
3462 unsigned long flags;
3463 int ret;
3464
3465 GEM_BUG_ON(!intel_context_is_pinned(rq->context))((void)0);
3466
3467 /*
3468 * Flush enough space to reduce the likelihood of waiting after
3469 * we start building the request - in which case we will just
3470 * have to repeat work.
3471 */
3472 rq->reserved_space += GUC_REQUEST_SIZE64;
3473
3474 /*
3475 * Note that after this point, we have committed to using
3476 * this request as it is being used to both track the
3477 * state of engine initialisation and liveness of the
3478 * golden renderstate above. Think twice before you try
3479 * to cancel/unwind this request now.
3480 */
3481
3482 /* Unconditionally invalidate GPU caches and TLBs. */
3483 ret = rq->engine->emit_flush(rq, EMIT_INVALIDATE(1UL << (0)));
3484 if (ret)
3485 return ret;
3486
3487 rq->reserved_space -= GUC_REQUEST_SIZE64;
3488
3489 if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))__builtin_expect(!!(!test_bit(10, &ce->flags)), 0))
3490 guc_context_init(ce);
3491
3492 /*
3493 * Call pin_guc_id here rather than in the pinning step as with
3494 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
3495 * guc_id and creating horrible race conditions. This is especially bad
3496 * when guc_id are being stolen due to over subscription. By the time
3497 * this function is reached, it is guaranteed that the guc_id will be
3498 * persistent until the generated request is retired. Thus, sealing these
3499 * race conditions. It is still safe to fail here if guc_id are
3500 * exhausted and return -EAGAIN to the user indicating that they can try
3501 * again in the future.
3502 *
3503 * There is no need for a lock here as the timeline mutex ensures at
3504 * most one context can be executing this code path at once. The
3505 * guc_id_ref is incremented once for every request in flight and
3506 * decremented on each retire. When it is zero, a lock around the
3507 * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id.
3508 */
3509 if (atomic_add_unless(&ce->guc_id.ref, 1, 0))
3510 goto out;
3511
3512 ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */
3513 if (unlikely(ret < 0)__builtin_expect(!!(ret < 0), 0))
3514 return ret;
3515 if (context_needs_register(ce, !!ret)) {
3516 ret = try_context_registration(ce, true1);
3517 if (unlikely(ret)__builtin_expect(!!(ret), 0)) { /* unwind */
3518 if (ret == -EPIPE32) {
3519 disable_submission(guc);
3520 goto out; /* GPU will be reset */
3521 }
3522 atomic_dec(&ce->guc_id.ref)__sync_fetch_and_sub(&ce->guc_id.ref, 1);
3523 unpin_guc_id(guc, ce);
3524 return ret;
3525 }
3526 }
3527
3528 clear_bit(CONTEXT_LRCA_DIRTY9, &ce->flags);
3529
3530out:
3531 /*
3532 * We block all requests on this context if a G2H is pending for a
3533 * schedule disable or context deregistration as the GuC will fail a
3534 * schedule enable or context registration if either G2H is pending
3535 * respectfully. Once a G2H returns, the fence is released that is
3536 * blocking these requests (see guc_signal_context_fence).
3537 */
3538 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
3539 if (context_wait_for_deregister_to_register(ce) ||
3540 context_pending_disable(ce)) {
3541 init_irq_work(&rq->submit_work, submit_work_cb);
3542 i915_sw_fence_await(&rq->submit);
3543
3544 list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
3545 }
3546 incr_context_committed_requests(ce);
3547 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
3548
3549 return 0;
3550}
3551
3552static int guc_virtual_context_pre_pin(struct intel_context *ce,
3553 struct i915_gem_ww_ctx *ww,
3554 void **vaddr)
3555{
3556 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3557
3558 return __guc_context_pre_pin(ce, engine, ww, vaddr);
3559}
3560
3561static int guc_virtual_context_pin(struct intel_context *ce, void *vaddr)
3562{
3563 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3564 int ret = __guc_context_pin(ce, engine, vaddr);
3565 intel_engine_mask_t tmp, mask = ce->engine->mask;
3566
3567 if (likely(!ret)__builtin_expect(!!(!ret), 1))
3568 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)for ((tmp) = (mask) & (ce->engine->gt)->info.engine_mask
; (tmp) ? ((engine) = (ce->engine->gt)->engine[({ int
__idx = ffs(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx
; })]), 1 : 0;)
3569 intel_engine_pm_get(engine);
3570
3571 return ret;
3572}
3573
3574static void guc_virtual_context_unpin(struct intel_context *ce)
3575{
3576 intel_engine_mask_t tmp, mask = ce->engine->mask;
3577 struct intel_engine_cs *engine;
3578 struct intel_guc *guc = ce_to_guc(ce);
3579
3580 GEM_BUG_ON(context_enabled(ce))((void)0);
3581 GEM_BUG_ON(intel_context_is_barrier(ce))((void)0);
3582
3583 unpin_guc_id(guc, ce);
3584 lrc_unpin(ce);
3585
3586 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)for ((tmp) = (mask) & (ce->engine->gt)->info.engine_mask
; (tmp) ? ((engine) = (ce->engine->gt)->engine[({ int
__idx = ffs(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx
; })]), 1 : 0;)
3587 intel_engine_pm_put_async(engine);
3588}
3589
3590static void guc_virtual_context_enter(struct intel_context *ce)
3591{
3592 intel_engine_mask_t tmp, mask = ce->engine->mask;
3593 struct intel_engine_cs *engine;
3594
3595 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)for ((tmp) = (mask) & (ce->engine->gt)->info.engine_mask
; (tmp) ? ((engine) = (ce->engine->gt)->engine[({ int
__idx = ffs(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx
; })]), 1 : 0;)
3596 intel_engine_pm_get(engine);
3597
3598 intel_timeline_enter(ce->timeline);
3599}
3600
3601static void guc_virtual_context_exit(struct intel_context *ce)
3602{
3603 intel_engine_mask_t tmp, mask = ce->engine->mask;
3604 struct intel_engine_cs *engine;
3605
3606 for_each_engine_masked(engine, ce->engine->gt, mask, tmp)for ((tmp) = (mask) & (ce->engine->gt)->info.engine_mask
; (tmp) ? ((engine) = (ce->engine->gt)->engine[({ int
__idx = ffs(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx
; })]), 1 : 0;)
3607 intel_engine_pm_put(engine);
3608
3609 intel_timeline_exit(ce->timeline);
3610}
3611
3612static int guc_virtual_context_alloc(struct intel_context *ce)
3613{
3614 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3615
3616 return lrc_alloc(ce, engine);
3617}
3618
3619static const struct intel_context_ops virtual_guc_context_ops = {
3620 .alloc = guc_virtual_context_alloc,
3621
3622 .pre_pin = guc_virtual_context_pre_pin,
3623 .pin = guc_virtual_context_pin,
3624 .unpin = guc_virtual_context_unpin,
3625 .post_unpin = guc_context_post_unpin,
3626
3627 .revoke = guc_context_revoke,
3628
3629 .cancel_request = guc_context_cancel_request,
3630
3631 .enter = guc_virtual_context_enter,
3632 .exit = guc_virtual_context_exit,
3633
3634 .sched_disable = guc_context_sched_disable,
3635
3636 .destroy = guc_context_destroy,
3637
3638 .get_sibling = guc_virtual_get_sibling,
3639};
3640
3641static int guc_parent_context_pin(struct intel_context *ce, void *vaddr)
3642{
3643 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3644 struct intel_guc *guc = ce_to_guc(ce);
3645 int ret;
3646
3647 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
3648 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine))((void)0);
3649
3650 ret = pin_guc_id(guc, ce);
3651 if (unlikely(ret < 0)__builtin_expect(!!(ret < 0), 0))
3652 return ret;
3653
3654 return __guc_context_pin(ce, engine, vaddr);
3655}
3656
3657static int guc_child_context_pin(struct intel_context *ce, void *vaddr)
3658{
3659 struct intel_engine_cs *engine = guc_virtual_get_sibling(ce->engine, 0);
3660
3661 GEM_BUG_ON(!intel_context_is_child(ce))((void)0);
3662 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine))((void)0);
3663
3664 __intel_context_pin(ce->parallel.parent);
3665 return __guc_context_pin(ce, engine, vaddr);
3666}
3667
3668static void guc_parent_context_unpin(struct intel_context *ce)
3669{
3670 struct intel_guc *guc = ce_to_guc(ce);
3671
3672 GEM_BUG_ON(context_enabled(ce))((void)0);
3673 GEM_BUG_ON(intel_context_is_barrier(ce))((void)0);
3674 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
3675 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine))((void)0);
3676
3677 unpin_guc_id(guc, ce);
3678 lrc_unpin(ce);
3679}
3680
3681static void guc_child_context_unpin(struct intel_context *ce)
3682{
3683 GEM_BUG_ON(context_enabled(ce))((void)0);
3684 GEM_BUG_ON(intel_context_is_barrier(ce))((void)0);
3685 GEM_BUG_ON(!intel_context_is_child(ce))((void)0);
3686 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine))((void)0);
3687
3688 lrc_unpin(ce);
3689}
3690
3691static void guc_child_context_post_unpin(struct intel_context *ce)
3692{
3693 GEM_BUG_ON(!intel_context_is_child(ce))((void)0);
3694 GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent))((void)0);
3695 GEM_BUG_ON(!intel_engine_is_virtual(ce->engine))((void)0);
3696
3697 lrc_post_unpin(ce);
3698 intel_context_unpin(ce->parallel.parent);
3699}
3700
3701static void guc_child_context_destroy(struct kref *kref)
3702{
3703 struct intel_context *ce = container_of(kref, typeof(*ce), ref)({ const __typeof( ((typeof(*ce) *)0)->ref ) *__mptr = (kref
); (typeof(*ce) *)( (char *)__mptr - __builtin_offsetof(typeof
(*ce), ref) );})
;
3704
3705 __guc_context_destroy(ce);
3706}
3707
3708static const struct intel_context_ops virtual_parent_context_ops = {
3709 .alloc = guc_virtual_context_alloc,
3710
3711 .pre_pin = guc_context_pre_pin,
3712 .pin = guc_parent_context_pin,
3713 .unpin = guc_parent_context_unpin,
3714 .post_unpin = guc_context_post_unpin,
3715
3716 .revoke = guc_context_revoke,
3717
3718 .cancel_request = guc_context_cancel_request,
3719
3720 .enter = guc_virtual_context_enter,
3721 .exit = guc_virtual_context_exit,
3722
3723 .sched_disable = guc_context_sched_disable,
3724
3725 .destroy = guc_context_destroy,
3726
3727 .get_sibling = guc_virtual_get_sibling,
3728};
3729
3730static const struct intel_context_ops virtual_child_context_ops = {
3731 .alloc = guc_virtual_context_alloc,
3732
3733 .pre_pin = guc_context_pre_pin,
3734 .pin = guc_child_context_pin,
3735 .unpin = guc_child_context_unpin,
3736 .post_unpin = guc_child_context_post_unpin,
3737
3738 .cancel_request = guc_context_cancel_request,
3739
3740 .enter = guc_virtual_context_enter,
3741 .exit = guc_virtual_context_exit,
3742
3743 .destroy = guc_child_context_destroy,
3744
3745 .get_sibling = guc_virtual_get_sibling,
3746};
3747
3748/*
3749 * The below override of the breadcrumbs is enabled when the user configures a
3750 * context for parallel submission (multi-lrc, parent-child).
3751 *
3752 * The overridden breadcrumbs implements an algorithm which allows the GuC to
3753 * safely preempt all the hw contexts configured for parallel submission
3754 * between each BB. The contract between the i915 and GuC is if the parent
3755 * context can be preempted, all the children can be preempted, and the GuC will
3756 * always try to preempt the parent before the children. A handshake between the
3757 * parent / children breadcrumbs ensures the i915 holds up its end of the deal
3758 * creating a window to preempt between each set of BBs.
3759 */
3760static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
3761 u64 offset, u32 len,
3762 const unsigned int flags);
3763static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
3764 u64 offset, u32 len,
3765 const unsigned int flags);
3766static u32 *
3767emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
3768 u32 *cs);
3769static u32 *
3770emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
3771 u32 *cs);
3772
3773static struct intel_context *
3774guc_create_parallel(struct intel_engine_cs **engines,
3775 unsigned int num_siblings,
3776 unsigned int width)
3777{
3778 struct intel_engine_cs **siblings = NULL((void *)0);
3779 struct intel_context *parent = NULL((void *)0), *ce, *err;
3780 int i, j;
3781
3782 siblings = kmalloc_array(num_siblings,
3783 sizeof(*siblings),
3784 GFP_KERNEL(0x0001 | 0x0004));
3785 if (!siblings)
3786 return ERR_PTR(-ENOMEM12);
3787
3788 for (i = 0; i < width; ++i) {
3789 for (j = 0; j < num_siblings; ++j)
3790 siblings[j] = engines[i * num_siblings + j];
3791
3792 ce = intel_engine_create_virtual(siblings, num_siblings,
3793 FORCE_VIRTUAL(1UL << (0)));
3794 if (IS_ERR(ce)) {
3795 err = ERR_CAST(ce);
3796 goto unwind;
3797 }
3798
3799 if (i == 0) {
3800 parent = ce;
3801 parent->ops = &virtual_parent_context_ops;
3802 } else {
3803 ce->ops = &virtual_child_context_ops;
3804 intel_context_bind_parent_child(parent, ce);
3805 }
3806 }
3807
3808 parent->parallel.fence_context = dma_fence_context_alloc(1);
3809
3810 parent->engine->emit_bb_start =
3811 emit_bb_start_parent_no_preempt_mid_batch;
3812 parent->engine->emit_fini_breadcrumb =
3813 emit_fini_breadcrumb_parent_no_preempt_mid_batch;
3814 parent->engine->emit_fini_breadcrumb_dw =
3815 12 + 4 * parent->parallel.number_children;
3816 for_each_child(parent, ce)for (ce = ({ const __typeof( ((__typeof(*ce) *)0)->parallel
.child_link ) *__mptr = ((&(parent)->parallel.child_list
)->next); (__typeof(*ce) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*ce), parallel.child_link) );}); &ce->parallel
.child_link != (&(parent)->parallel.child_list); ce = (
{ const __typeof( ((__typeof(*ce) *)0)->parallel.child_link
) *__mptr = (ce->parallel.child_link.next); (__typeof(*ce
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*ce), parallel
.child_link) );}))
{
3817 ce->engine->emit_bb_start =
3818 emit_bb_start_child_no_preempt_mid_batch;
3819 ce->engine->emit_fini_breadcrumb =
3820 emit_fini_breadcrumb_child_no_preempt_mid_batch;
3821 ce->engine->emit_fini_breadcrumb_dw = 16;
3822 }
3823
3824 kfree(siblings);
3825 return parent;
3826
3827unwind:
3828 if (parent)
3829 intel_context_put(parent);
3830 kfree(siblings);
3831 return err;
3832}
3833
3834static bool_Bool
3835guc_irq_enable_breadcrumbs(struct intel_breadcrumbs *b)
3836{
3837 struct intel_engine_cs *sibling;
3838 intel_engine_mask_t tmp, mask = b->engine_mask;
3839 bool_Bool result = false0;
3840
3841 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)for ((tmp) = (mask) & (b->irq_engine->gt)->info.
engine_mask; (tmp) ? ((sibling) = (b->irq_engine->gt)->
engine[({ int __idx = ffs(tmp) - 1; tmp &= ~(1UL <<
(__idx)); __idx; })]), 1 : 0;)
3842 result |= intel_engine_irq_enable(sibling);
3843
3844 return result;
3845}
3846
3847static void
3848guc_irq_disable_breadcrumbs(struct intel_breadcrumbs *b)
3849{
3850 struct intel_engine_cs *sibling;
3851 intel_engine_mask_t tmp, mask = b->engine_mask;
3852
3853 for_each_engine_masked(sibling, b->irq_engine->gt, mask, tmp)for ((tmp) = (mask) & (b->irq_engine->gt)->info.
engine_mask; (tmp) ? ((sibling) = (b->irq_engine->gt)->
engine[({ int __idx = ffs(tmp) - 1; tmp &= ~(1UL <<
(__idx)); __idx; })]), 1 : 0;)
3854 intel_engine_irq_disable(sibling);
3855}
3856
3857static void guc_init_breadcrumbs(struct intel_engine_cs *engine)
3858{
3859 int i;
3860
3861 /*
3862 * In GuC submission mode we do not know which physical engine a request
3863 * will be scheduled on, this creates a problem because the breadcrumb
3864 * interrupt is per physical engine. To work around this we attach
3865 * requests and direct all breadcrumb interrupts to the first instance
3866 * of an engine per class. In addition all breadcrumb interrupts are
3867 * enabled / disabled across an engine class in unison.
3868 */
3869 for (i = 0; i < MAX_ENGINE_INSTANCE8; ++i) {
3870 struct intel_engine_cs *sibling =
3871 engine->gt->engine_class[engine->class][i];
3872
3873 if (sibling) {
3874 if (engine->breadcrumbs != sibling->breadcrumbs) {
3875 intel_breadcrumbs_put(engine->breadcrumbs);
3876 engine->breadcrumbs =
3877 intel_breadcrumbs_get(sibling->breadcrumbs);
3878 }
3879 break;
3880 }
3881 }
3882
3883 if (engine->breadcrumbs) {
3884 engine->breadcrumbs->engine_mask |= engine->mask;
3885 engine->breadcrumbs->irq_enable = guc_irq_enable_breadcrumbs;
3886 engine->breadcrumbs->irq_disable = guc_irq_disable_breadcrumbs;
3887 }
3888}
3889
3890static void guc_bump_inflight_request_prio(struct i915_request *rq,
3891 int prio)
3892{
3893 struct intel_context *ce = request_to_scheduling_context(rq);
3894 u8 new_guc_prio = map_i915_prio_to_guc_prio(prio);
3895
3896 /* Short circuit function */
3897 if (prio < I915_PRIORITY_NORMAL ||
3898 rq->guc_prio == GUC_PRIO_FINI0xfe ||
3899 (rq->guc_prio != GUC_PRIO_INIT0xff &&
3900 !new_guc_prio_higher(rq->guc_prio, new_guc_prio)))
3901 return;
3902
3903 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
3904 if (rq->guc_prio != GUC_PRIO_FINI0xfe) {
3905 if (rq->guc_prio != GUC_PRIO_INIT0xff)
3906 sub_context_inflight_prio(ce, rq->guc_prio);
3907 rq->guc_prio = new_guc_prio;
3908 add_context_inflight_prio(ce, rq->guc_prio);
3909 update_context_prio(ce);
3910 }
3911 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
3912}
3913
3914static void guc_retire_inflight_request_prio(struct i915_request *rq)
3915{
3916 struct intel_context *ce = request_to_scheduling_context(rq);
3917
3918 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
3919 guc_prio_fini(rq, ce);
3920 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
3921}
3922
3923static void sanitize_hwsp(struct intel_engine_cs *engine)
3924{
3925 struct intel_timeline *tl;
3926
3927 list_for_each_entry(tl, &engine->status_page.timelines, engine_link)for (tl = ({ const __typeof( ((__typeof(*tl) *)0)->engine_link
) *__mptr = ((&engine->status_page.timelines)->next
); (__typeof(*tl) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tl), engine_link) );}); &tl->engine_link != (&engine
->status_page.timelines); tl = ({ const __typeof( ((__typeof
(*tl) *)0)->engine_link ) *__mptr = (tl->engine_link.next
); (__typeof(*tl) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*tl), engine_link) );}))
3928 intel_timeline_reset_seqno(tl);
3929}
3930
3931static void guc_sanitize(struct intel_engine_cs *engine)
3932{
3933 /*
3934 * Poison residual state on resume, in case the suspend didn't!
3935 *
3936 * We have to assume that across suspend/resume (or other loss
3937 * of control) that the contents of our pinned buffers has been
3938 * lost, replaced by garbage. Since this doesn't always happen,
3939 * let's poison such state so that we more quickly spot when
3940 * we falsely assume it has been preserved.
3941 */
3942 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)0)
3943 memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE)__builtin_memset((engine->status_page.addr), (0xdb), ((1 <<
12)))
;
3944
3945 /*
3946 * The kernel_context HWSP is stored in the status_page. As above,
3947 * that may be lost on resume/initialisation, and so we need to
3948 * reset the value in the HWSP.
3949 */
3950 sanitize_hwsp(engine);
3951
3952 /* And scrub the dirty cachelines for the HWSP */
3953 drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE(1 << 12));
3954
3955 intel_engine_reset_pinned_contexts(engine);
3956}
3957
3958static void setup_hwsp(struct intel_engine_cs *engine)
3959{
3960 intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
3961
3962 ENGINE_WRITE_FW(engine,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
3963 RING_HWS_PGA,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
3964 i915_ggtt_offset(engine->status_page.vma))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x80) }), (i915_ggtt_offset
(engine->status_page.vma)))
;
3965}
3966
3967static void start_engine(struct intel_engine_cs *engine)
3968{
3969 ENGINE_WRITE_FW(engine,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x29c) }), (({ typeof((
1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })))
3970 RING_MODE_GEN7,__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x29c) }), (({ typeof((
1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })))
3971 _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x29c) }), (({ typeof((
1 << 3)) _a = ((1 << 3)); ({ if (__builtin_constant_p
(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while
(0); if (__builtin_constant_p(_a) && __builtin_constant_p
(_a)) do { } while (0); ((_a) << 16 | (_a)); }); })))
;
3972
3973 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING))__raw_uncore_write32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x9c) }), ((({ if (__builtin_constant_p
((((u32)((1UL << (8)) + 0))))) do { } while (0); if (__builtin_constant_p
(0)) do { } while (0); if (__builtin_constant_p((((u32)((1UL <<
(8)) + 0)))) && __builtin_constant_p(0)) do { } while
(0); (((((u32)((1UL << (8)) + 0)))) << 16 | (0))
; }))))
;
3974 ENGINE_POSTING_READ(engine, RING_MI_MODE)((void)__raw_uncore_read32(((engine))->uncore, ((const i915_reg_t
){ .reg = (((engine)->mmio_base) + 0x9c) })))
;
3975}
3976
3977static int guc_resume(struct intel_engine_cs *engine)
3978{
3979 assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
3980
3981 intel_mocs_init_engine(engine);
3982
3983 intel_breadcrumbs_reset(engine->breadcrumbs);
3984
3985 setup_hwsp(engine);
3986 start_engine(engine);
3987
3988 if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE(1UL << (11)))
3989 xehp_enable_ccs_engines(engine);
3990
3991 return 0;
3992}
3993
3994static bool_Bool guc_sched_engine_disabled(struct i915_sched_engine *sched_engine)
3995{
3996 return !sched_engine->tasklet.callback;
3997}
3998
3999static void guc_set_default_submission(struct intel_engine_cs *engine)
4000{
4001 engine->submit_request = guc_submit_request;
4002}
4003
4004static inline void guc_kernel_context_pin(struct intel_guc *guc,
4005 struct intel_context *ce)
4006{
4007 /*
4008 * Note: we purposefully do not check the returns below because
4009 * the registration can only fail if a reset is just starting.
4010 * This is called at the end of reset so presumably another reset
4011 * isn't happening and even it did this code would be run again.
4012 */
4013
4014 if (context_guc_id_invalid(ce))
4015 pin_guc_id(guc, ce);
4016
4017 try_context_registration(ce, true1);
4018}
4019
4020static inline void guc_init_lrc_mapping(struct intel_guc *guc)
4021{
4022 struct intel_gt *gt = guc_to_gt(guc);
4023 struct intel_engine_cs *engine;
4024 enum intel_engine_id id;
4025
4026 /* make sure all descriptors are clean... */
4027 xa_destroy(&guc->context_lookup);
4028
4029 /*
4030 * A reset might have occurred while we had a pending stalled request,
4031 * so make sure we clean that up.
4032 */
4033 guc->stalled_request = NULL((void *)0);
4034 guc->submission_stall_reason = STALL_NONE;
4035
4036 /*
4037 * Some contexts might have been pinned before we enabled GuC
4038 * submission, so we need to add them to the GuC bookeeping.
4039 * Also, after a reset the of the GuC we want to make sure that the
4040 * information shared with GuC is properly reset. The kernel LRCs are
4041 * not attached to the gem_context, so they need to be added separately.
4042 */
4043 for_each_engine(engine, gt, id)for ((id) = 0; (id) < I915_NUM_ENGINES; (id)++) if (!((engine
) = (gt)->engine[(id)])) {} else
{
4044 struct intel_context *ce;
4045
4046 list_for_each_entry(ce, &engine->pinned_contexts_list,for (ce = ({ const __typeof( ((__typeof(*ce) *)0)->pinned_contexts_link
) *__mptr = ((&engine->pinned_contexts_list)->next
); (__typeof(*ce) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*ce), pinned_contexts_link) );}); &ce->pinned_contexts_link
!= (&engine->pinned_contexts_list); ce = ({ const __typeof
( ((__typeof(*ce) *)0)->pinned_contexts_link ) *__mptr = (
ce->pinned_contexts_link.next); (__typeof(*ce) *)( (char *
)__mptr - __builtin_offsetof(__typeof(*ce), pinned_contexts_link
) );}))
4047 pinned_contexts_link)for (ce = ({ const __typeof( ((__typeof(*ce) *)0)->pinned_contexts_link
) *__mptr = ((&engine->pinned_contexts_list)->next
); (__typeof(*ce) *)( (char *)__mptr - __builtin_offsetof(__typeof
(*ce), pinned_contexts_link) );}); &ce->pinned_contexts_link
!= (&engine->pinned_contexts_list); ce = ({ const __typeof
( ((__typeof(*ce) *)0)->pinned_contexts_link ) *__mptr = (
ce->pinned_contexts_link.next); (__typeof(*ce) *)( (char *
)__mptr - __builtin_offsetof(__typeof(*ce), pinned_contexts_link
) );}))
4048 guc_kernel_context_pin(guc, ce);
4049 }
4050}
4051
4052static void guc_release(struct intel_engine_cs *engine)
4053{
4054 engine->sanitize = NULL((void *)0); /* no longer in control, nothing to sanitize */
4055
4056 intel_engine_cleanup_common(engine);
4057 lrc_fini_wa_ctx(engine);
4058}
4059
4060static void virtual_guc_bump_serial(struct intel_engine_cs *engine)
4061{
4062 struct intel_engine_cs *e;
4063 intel_engine_mask_t tmp, mask = engine->mask;
4064
4065 for_each_engine_masked(e, engine->gt, mask, tmp)for ((tmp) = (mask) & (engine->gt)->info.engine_mask
; (tmp) ? ((e) = (engine->gt)->engine[({ int __idx = ffs
(tmp) - 1; tmp &= ~(1UL << (__idx)); __idx; })]), 1
: 0;)
4066 e->serial++;
4067}
4068
4069static void guc_default_vfuncs(struct intel_engine_cs *engine)
4070{
4071 /* Default vfuncs which can be overridden by each engine. */
4072
4073 engine->resume = guc_resume;
4074
4075 engine->cops = &guc_context_ops;
4076 engine->request_alloc = guc_request_alloc;
4077 engine->add_active_request = add_to_context;
4078 engine->remove_active_request = remove_from_context;
4079
4080 engine->sched_engine->schedule = i915_schedule;
4081
4082 engine->reset.prepare = guc_engine_reset_prepare;
4083 engine->reset.rewind = guc_rewind_nop;
4084 engine->reset.cancel = guc_reset_nop;
4085 engine->reset.finish = guc_reset_nop;
4086
4087 engine->emit_flush = gen8_emit_flush_xcs;
4088 engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
4089 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
4090 if (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver) >= 12) {
4091 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
4092 engine->emit_flush = gen12_emit_flush_xcs;
4093 }
4094 engine->set_default_submission = guc_set_default_submission;
4095 engine->busyness = guc_engine_busyness;
4096
4097 engine->flags |= I915_ENGINE_SUPPORTS_STATS(1UL << (1));
4098 engine->flags |= I915_ENGINE_HAS_PREEMPTION(1UL << (2));
4099 engine->flags |= I915_ENGINE_HAS_TIMESLICES(1UL << (4));
4100
4101 /* Wa_14014475959:dg2 */
4102 if (IS_DG2(engine->i915)IS_PLATFORM(engine->i915, INTEL_DG2) && engine->class == COMPUTE_CLASS5)
4103 engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT(1UL << (12));
4104
4105 /*
4106 * TODO: GuC supports timeslicing and semaphores as well, but they're
4107 * handled by the firmware so some minor tweaks are required before
4108 * enabling.
4109 *
4110 * engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
4111 */
4112
4113 engine->emit_bb_start = gen8_emit_bb_start;
4114 if (GRAPHICS_VER_FULL(engine->i915)(((&(engine->i915)->__runtime)->graphics.ip.ver)
<< 8 | ((&(engine->i915)->__runtime)->graphics
.ip.rel))
>= IP_VER(12, 50)((12) << 8 | (50)))
4115 engine->emit_bb_start = gen125_emit_bb_start;
4116}
4117
4118static void rcs_submission_override(struct intel_engine_cs *engine)
4119{
4120 switch (GRAPHICS_VER(engine->i915)((&(engine->i915)->__runtime)->graphics.ip.ver)) {
4121 case 12:
4122 engine->emit_flush = gen12_emit_flush_rcs;
4123 engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
4124 break;
4125 case 11:
4126 engine->emit_flush = gen11_emit_flush_rcs;
4127 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
4128 break;
4129 default:
4130 engine->emit_flush = gen8_emit_flush_rcs;
4131 engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
4132 break;
4133 }
4134}
4135
4136static inline void guc_default_irqs(struct intel_engine_cs *engine)
4137{
4138 engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT(1 << 0);
4139 intel_engine_set_irq_handler(engine, cs_irq_handler);
4140}
4141
4142static void guc_sched_engine_destroy(struct kref *kref)
4143{
4144 struct i915_sched_engine *sched_engine =
4145 container_of(kref, typeof(*sched_engine), ref)({ const __typeof( ((typeof(*sched_engine) *)0)->ref ) *__mptr
= (kref); (typeof(*sched_engine) *)( (char *)__mptr - __builtin_offsetof
(typeof(*sched_engine), ref) );})
;
4146 struct intel_guc *guc = sched_engine->private_data;
4147
4148 guc->sched_engine = NULL((void *)0);
4149 tasklet_kill(&sched_engine->tasklet); /* flush the callback */
4150 kfree(sched_engine);
4151}
4152
4153int intel_guc_submission_setup(struct intel_engine_cs *engine)
4154{
4155 struct drm_i915_privateinteldrm_softc *i915 = engine->i915;
4156 struct intel_guc *guc = &engine->gt->uc.guc;
4157
4158 /*
4159 * The setup relies on several assumptions (e.g. irqs always enabled)
4160 * that are only valid on gen11+
4161 */
4162 GEM_BUG_ON(GRAPHICS_VER(i915) < 11)((void)0);
4163
4164 if (!guc->sched_engine) {
4165 guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL2);
4166 if (!guc->sched_engine)
4167 return -ENOMEM12;
4168
4169 guc->sched_engine->schedule = i915_schedule;
4170 guc->sched_engine->disabled = guc_sched_engine_disabled;
4171 guc->sched_engine->private_data = guc;
4172 guc->sched_engine->destroy = guc_sched_engine_destroy;
4173 guc->sched_engine->bump_inflight_request_prio =
4174 guc_bump_inflight_request_prio;
4175 guc->sched_engine->retire_inflight_request_prio =
4176 guc_retire_inflight_request_prio;
4177 tasklet_setup(&guc->sched_engine->tasklet,
4178 guc_submission_tasklet);
4179 }
4180 i915_sched_engine_put(engine->sched_engine);
4181 engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
4182
4183 guc_default_vfuncs(engine);
4184 guc_default_irqs(engine);
4185 guc_init_breadcrumbs(engine);
4186
4187 if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE(1UL << (9)))
4188 rcs_submission_override(engine);
4189
4190 lrc_init_wa_ctx(engine);
4191
4192 /* Finally, take ownership and responsibility for cleanup! */
4193 engine->sanitize = guc_sanitize;
4194 engine->release = guc_release;
4195
4196 return 0;
4197}
4198
4199void intel_guc_submission_enable(struct intel_guc *guc)
4200{
4201 struct intel_gt *gt = guc_to_gt(guc);
4202
4203 /* Enable and route to GuC */
4204 if (GRAPHICS_VER(gt->i915)((&(gt->i915)->__runtime)->graphics.ip.ver) >= 12)
4205 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES((const i915_reg_t){ .reg = (0xc71c) }),
4206 GUC_SEM_INTR_ROUTE_TO_GUC(1UL << (31)) |
4207 GUC_SEM_INTR_ENABLE_ALL(0xff));
4208
4209 guc_init_lrc_mapping(guc);
4210 guc_init_engine_stats(guc);
4211}
4212
4213void intel_guc_submission_disable(struct intel_guc *guc)
4214{
4215 struct intel_gt *gt = guc_to_gt(guc);
4216
4217 /* Note: By the time we're here, GuC may have already been reset */
4218
4219 /* Disable and route to host */
4220 if (GRAPHICS_VER(gt->i915)((&(gt->i915)->__runtime)->graphics.ip.ver) >= 12)
4221 intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES((const i915_reg_t){ .reg = (0xc71c) }), 0x0);
4222}
4223
4224static bool_Bool __guc_submission_supported(struct intel_guc *guc)
4225{
4226 /* GuC submission is unavailable for pre-Gen11 */
4227 return intel_guc_is_supported(guc) &&
4228 GRAPHICS_VER(guc_to_gt(guc)->i915)((&(guc_to_gt(guc)->i915)->__runtime)->graphics.
ip.ver)
>= 11;
4229}
4230
4231static bool_Bool __guc_submission_selected(struct intel_guc *guc)
4232{
4233 struct drm_i915_privateinteldrm_softc *i915 = guc_to_gt(guc)->i915;
4234
4235 if (!intel_guc_submission_is_supported(guc))
4236 return false0;
4237
4238 return i915->params.enable_guc & ENABLE_GUC_SUBMISSION(1UL << (0));
4239}
4240
4241void intel_guc_submission_init_early(struct intel_guc *guc)
4242{
4243 xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ4);
4244
4245 mtx_init(&guc->submission_state.lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&guc->
submission_state.lock), ((((0x9)) > 0x0 && ((0x9))
< 0x9) ? 0x9 : ((0x9)))); } while (0)
;
4246 INIT_LIST_HEAD(&guc->submission_state.guc_id_list);
4247 ida_init(&guc->submission_state.guc_ids);
4248 INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
4249 INIT_WORK(&guc->submission_state.destroyed_worker,
4250 destroyed_worker_func);
4251 INIT_WORK(&guc->submission_state.reset_fail_worker,
4252 reset_fail_worker_func);
4253
4254 mtx_init(&guc->timestamp.lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&guc->
timestamp.lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9
) ? 0x9 : ((0x9)))); } while (0)
;
4255 INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
4256
4257 guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID65535;
4258 guc->submission_supported = __guc_submission_supported(guc);
4259 guc->submission_selected = __guc_submission_selected(guc);
4260}
4261
4262static inline struct intel_context *
4263g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
4264{
4265 struct intel_context *ce;
4266
4267 if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)__builtin_expect(!!(ctx_id >= 65535), 0)) {
4268 drm_err(&guc_to_gt(guc)->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ctx_id
)
4269 "Invalid ctx_id %u\n", ctx_id)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ctx_id
)
;
4270 return NULL((void *)0);
4271 }
4272
4273 ce = __get_context(guc, ctx_id);
4274 if (unlikely(!ce)__builtin_expect(!!(!ce), 0)) {
4275 drm_err(&guc_to_gt(guc)->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Context is NULL, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ctx_id
)
4276 "Context is NULL, ctx_id %u\n", ctx_id)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Context is NULL, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ctx_id
)
;
4277 return NULL((void *)0);
4278 }
4279
4280 if (unlikely(intel_context_is_child(ce))__builtin_expect(!!(intel_context_is_child(ce)), 0)) {
4281 drm_err(&guc_to_gt(guc)->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Context is child, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ctx_id
)
4282 "Context is child, ctx_id %u\n", ctx_id)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Context is child, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ctx_id
)
;
4283 return NULL((void *)0);
4284 }
4285
4286 return ce;
4287}
4288
4289int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
4290 const u32 *msg,
4291 u32 len)
4292{
4293 struct intel_context *ce;
4294 u32 ctx_id;
4295
4296 if (unlikely(len < 1)__builtin_expect(!!(len < 1), 0)) {
4297 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid length %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , len)
;
4298 return -EPROTO95;
4299 }
4300 ctx_id = msg[0];
4301
4302 ce = g2h_context_lookup(guc, ctx_id);
4303 if (unlikely(!ce)__builtin_expect(!!(!ce), 0))
4304 return -EPROTO95;
4305
4306 trace_intel_context_deregister_done(ce);
4307
4308#ifdef CONFIG_DRM_I915_SELFTEST
4309 if (unlikely(ce->drop_deregister)__builtin_expect(!!(ce->drop_deregister), 0)) {
4310 ce->drop_deregister = false0;
4311 return 0;
4312 }
4313#endif
4314
4315 if (context_wait_for_deregister_to_register(ce)) {
4316 struct intel_runtime_pm *runtime_pm =
4317 &ce->engine->gt->i915->runtime_pm;
4318 intel_wakeref_t wakeref;
4319
4320 /*
4321 * Previous owner of this guc_id has been deregistered, now safe
4322 * register this context.
4323 */
4324 with_intel_runtime_pm(runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(runtime_pm); (wakeref);
intel_runtime_pm_put((runtime_pm), (wakeref)), (wakeref) = 0
)
4325 register_context(ce, true1);
4326 guc_signal_context_fence(ce);
4327 intel_context_put(ce);
4328 } else if (context_destroyed(ce)) {
4329 /* Context has been destroyed */
4330 intel_gt_pm_put_async(guc_to_gt(guc));
4331 release_guc_id(guc, ce);
4332 __guc_context_destroy(ce);
4333 }
4334
4335 decr_outstanding_submission_g2h(guc);
4336
4337 return 0;
4338}
4339
4340int intel_guc_sched_done_process_msg(struct intel_guc *guc,
4341 const u32 *msg,
4342 u32 len)
4343{
4344 struct intel_context *ce;
4345 unsigned long flags;
4346 u32 ctx_id;
4347
4348 if (unlikely(len < 2)__builtin_expect(!!(len < 2), 0)) {
4349 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid length %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , len)
;
4350 return -EPROTO95;
4351 }
4352 ctx_id = msg[0];
4353
4354 ce = g2h_context_lookup(guc, ctx_id);
4355 if (unlikely(!ce)__builtin_expect(!!(!ce), 0))
4356 return -EPROTO95;
4357
4358 if (unlikely(context_destroyed(ce) ||__builtin_expect(!!(context_destroyed(ce) || (!context_pending_enable
(ce) && !context_pending_disable(ce))), 0)
4359 (!context_pending_enable(ce) &&__builtin_expect(!!(context_destroyed(ce) || (!context_pending_enable
(ce) && !context_pending_disable(ce))), 0)
4360 !context_pending_disable(ce)))__builtin_expect(!!(context_destroyed(ce) || (!context_pending_enable
(ce) && !context_pending_disable(ce))), 0)
) {
4361 drm_err(&guc_to_gt(guc)->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Bad context sched_state 0x%x, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ce->
guc_state.sched_state, ctx_id)
4362 "Bad context sched_state 0x%x, ctx_id %u\n",printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Bad context sched_state 0x%x, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ce->
guc_state.sched_state, ctx_id)
4363 ce->guc_state.sched_state, ctx_id)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Bad context sched_state 0x%x, ctx_id %u\n"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , ce->
guc_state.sched_state, ctx_id)
;
4364 return -EPROTO95;
4365 }
4366
4367 trace_intel_context_sched_done(ce);
4368
4369 if (context_pending_enable(ce)) {
4370#ifdef CONFIG_DRM_I915_SELFTEST
4371 if (unlikely(ce->drop_schedule_enable)__builtin_expect(!!(ce->drop_schedule_enable), 0)) {
4372 ce->drop_schedule_enable = false0;
4373 return 0;
4374 }
4375#endif
4376
4377 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
4378 clr_context_pending_enable(ce);
4379 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
4380 } else if (context_pending_disable(ce)) {
4381 bool_Bool banned;
4382
4383#ifdef CONFIG_DRM_I915_SELFTEST
4384 if (unlikely(ce->drop_schedule_disable)__builtin_expect(!!(ce->drop_schedule_disable), 0)) {
4385 ce->drop_schedule_disable = false0;
4386 return 0;
4387 }
4388#endif
4389
4390 /*
4391 * Unpin must be done before __guc_signal_context_fence,
4392 * otherwise a race exists between the requests getting
4393 * submitted + retired before this unpin completes resulting in
4394 * the pin_count going to zero and the context still being
4395 * enabled.
4396 */
4397 intel_context_sched_disable_unpin(ce);
4398
4399 spin_lock_irqsave(&ce->guc_state.lock, flags)do { flags = 0; mtx_enter(&ce->guc_state.lock); } while
(0)
;
4400 banned = context_banned(ce);
4401 clr_context_banned(ce);
4402 clr_context_pending_disable(ce);
4403 __guc_signal_context_fence(ce);
4404 guc_blocked_fence_complete(ce);
4405 spin_unlock_irqrestore(&ce->guc_state.lock, flags)do { (void)(flags); mtx_leave(&ce->guc_state.lock); } while
(0)
;
4406
4407 if (banned) {
4408 guc_cancel_context_requests(ce);
4409 intel_engine_signal_breadcrumbs(ce->engine);
4410 }
4411 }
4412
4413 decr_outstanding_submission_g2h(guc);
4414 intel_context_put(ce);
4415
4416 return 0;
4417}
4418
4419static void capture_error_state(struct intel_guc *guc,
4420 struct intel_context *ce)
4421{
4422 struct intel_gt *gt = guc_to_gt(guc);
4423 struct drm_i915_privateinteldrm_softc *i915 = gt->i915;
4424 struct intel_engine_cs *engine = __context_to_physical_engine(ce);
4425 intel_wakeref_t wakeref;
4426
4427 intel_engine_set_hung_context(engine, ce);
4428 with_intel_runtime_pm(&i915->runtime_pm, wakeref)for ((wakeref) = intel_runtime_pm_get(&i915->runtime_pm
); (wakeref); intel_runtime_pm_put((&i915->runtime_pm)
, (wakeref)), (wakeref) = 0)
4429 i915_capture_error_state(gt, engine->mask, CORE_DUMP_FLAG_IS_GUC_CAPTURE(1UL << (0)));
4430 atomic_inc(&i915->gpu_error.reset_engine_count[engine->uabi_class])__sync_fetch_and_add(&i915->gpu_error.reset_engine_count
[engine->uabi_class], 1)
;
4431}
4432
4433static void guc_context_replay(struct intel_context *ce)
4434{
4435 struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
4436
4437 __guc_reset_context(ce, ce->engine->mask);
4438 tasklet_hi_schedule(&sched_engine->tasklet);
4439}
4440
4441static void guc_handle_context_reset(struct intel_guc *guc,
4442 struct intel_context *ce)
4443{
4444 trace_intel_context_reset(ce);
4445
4446 if (likely(intel_context_is_schedulable(ce))__builtin_expect(!!(intel_context_is_schedulable(ce)), 1)) {
4447 capture_error_state(guc, ce);
4448 guc_context_replay(ce);
4449 } else {
4450 drm_info(&guc_to_gt(guc)->i915->drm,do { } while(0)
4451 "Ignoring context reset notification of exiting context 0x%04X on %s",do { } while(0)
4452 ce->guc_id.id, ce->engine->name)do { } while(0);
4453 }
4454}
4455
4456int intel_guc_context_reset_process_msg(struct intel_guc *guc,
4457 const u32 *msg, u32 len)
4458{
4459 struct intel_context *ce;
4460 unsigned long flags;
4461 int ctx_id;
4462
4463 if (unlikely(len != 1)__builtin_expect(!!(len != 1), 0)) {
4464 drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid length %u"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , len)
;
4465 return -EPROTO95;
4466 }
4467
4468 ctx_id = msg[0];
4469
4470 /*
4471 * The context lookup uses the xarray but lookups only require an RCU lock
4472 * not the full spinlock. So take the lock explicitly and keep it until the
4473 * context has been reference count locked to ensure it can't be destroyed
4474 * asynchronously until the reset is done.
4475 */
4476 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
4477 ce = g2h_context_lookup(guc, ctx_id);
4478 if (ce)
4479 intel_context_get(ce);
4480 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
4481
4482 if (unlikely(!ce)__builtin_expect(!!(!ce), 0))
4483 return -EPROTO95;
4484
4485 guc_handle_context_reset(guc, ce);
4486 intel_context_put(ce);
4487
4488 return 0;
4489}
4490
4491int intel_guc_error_capture_process_msg(struct intel_guc *guc,
4492 const u32 *msg, u32 len)
4493{
4494 u32 status;
4495
4496 if (unlikely(len != 1)__builtin_expect(!!(len != 1), 0)) {
4497 drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len)__drm_dev_dbg(((void *)0), (&guc_to_gt(guc)->i915->
drm) ? (&guc_to_gt(guc)->i915->drm)->dev : ((void
*)0), DRM_UT_DRIVER, "Invalid length %u", len)
;
4498 return -EPROTO95;
4499 }
4500
4501 status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK0x000000FF;
4502 if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
4503 drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space")printf("drm:pid%d:%s *WARNING* " "[drm] " "G2H-Error capture no space"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__)
;
4504
4505 intel_guc_capture_process(guc);
4506
4507 return 0;
4508}
4509
4510struct intel_engine_cs *
4511intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
4512{
4513 struct intel_gt *gt = guc_to_gt(guc);
4514 u8 engine_class = guc_class_to_engine_class(guc_class);
4515
4516 /* Class index is checked in class converter */
4517 GEM_BUG_ON(instance > MAX_ENGINE_INSTANCE)((void)0);
4518
4519 return gt->engine_class[engine_class][instance];
4520}
4521
4522static void reset_fail_worker_func(struct work_struct *w)
4523{
4524 struct intel_guc *guc = container_of(w, struct intel_guc,({ const __typeof( ((struct intel_guc *)0)->submission_state
.reset_fail_worker ) *__mptr = (w); (struct intel_guc *)( (char
*)__mptr - __builtin_offsetof(struct intel_guc, submission_state
.reset_fail_worker) );})
4525 submission_state.reset_fail_worker)({ const __typeof( ((struct intel_guc *)0)->submission_state
.reset_fail_worker ) *__mptr = (w); (struct intel_guc *)( (char
*)__mptr - __builtin_offsetof(struct intel_guc, submission_state
.reset_fail_worker) );})
;
4526 struct intel_gt *gt = guc_to_gt(guc);
4527 intel_engine_mask_t reset_fail_mask;
4528 unsigned long flags;
4529
4530 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
4531 reset_fail_mask = guc->submission_state.reset_fail_mask;
4532 guc->submission_state.reset_fail_mask = 0;
4533 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
4534
4535 if (likely(reset_fail_mask)__builtin_expect(!!(reset_fail_mask), 1))
4536 intel_gt_handle_error(gt, reset_fail_mask,
4537 I915_ERROR_CAPTURE(1UL << (0)),
4538 "GuC failed to reset engine mask=0x%x\n",
4539 reset_fail_mask);
4540}
4541
4542int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
4543 const u32 *msg, u32 len)
4544{
4545 struct intel_engine_cs *engine;
4546 struct intel_gt *gt = guc_to_gt(guc);
4547 u8 guc_class, instance;
4548 u32 reason;
4549 unsigned long flags;
4550
4551 if (unlikely(len != 3)__builtin_expect(!!(len != 3), 0)) {
4552 drm_err(&gt->i915->drm, "Invalid length %u", len)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid length %u"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , len)
;
4553 return -EPROTO95;
4554 }
4555
4556 guc_class = msg[0];
4557 instance = msg[1];
4558 reason = msg[2];
4559
4560 engine = intel_guc_lookup_engine(guc, guc_class, instance);
4561 if (unlikely(!engine)__builtin_expect(!!(!engine), 0)) {
4562 drm_err(&gt->i915->drm,printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid engine %d:%d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , guc_class
, instance)
4563 "Invalid engine %d:%d", guc_class, instance)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "Invalid engine %d:%d"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , guc_class
, instance)
;
4564 return -EPROTO95;
4565 }
4566
4567 /*
4568 * This is an unexpected failure of a hardware feature. So, log a real
4569 * error message not just the informational that comes with the reset.
4570 */
4571 drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "GuC engine reset request failed on %d:%d (%s) because 0x%08X"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , guc_class
, instance, engine->name, reason)
4572 guc_class, instance, engine->name, reason)printf("drm:pid%d:%s *ERROR* " "[drm] " "*ERROR* " "GuC engine reset request failed on %d:%d (%s) because 0x%08X"
, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc->p_p->ps_pid, __func__ , guc_class
, instance, engine->name, reason)
;
4573
4574 spin_lock_irqsave(&guc->submission_state.lock, flags)do { flags = 0; mtx_enter(&guc->submission_state.lock)
; } while (0)
;
4575 guc->submission_state.reset_fail_mask |= engine->mask;
4576 spin_unlock_irqrestore(&guc->submission_state.lock, flags)do { (void)(flags); mtx_leave(&guc->submission_state.lock
); } while (0)
;
4577
4578 /*
4579 * A GT reset flushes this worker queue (G2H handler) so we must use
4580 * another worker to trigger a GT reset.
4581 */
4582 queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
4583
4584 return 0;
4585}
4586
4587void intel_guc_find_hung_context(struct intel_engine_cs *engine)
4588{
4589 struct intel_guc *guc = &engine->gt->uc.guc;
4590 struct intel_context *ce;
4591 struct i915_request *rq;
4592 unsigned long index;
4593 unsigned long flags;
4594
4595 /* Reset called during driver load? GuC not yet initialised! */
4596 if (unlikely(!guc_submission_initialized(guc))__builtin_expect(!!(!guc_submission_initialized(guc)), 0))
4597 return;
4598
4599 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
4600 xa_for_each(&guc->context_lookup, index, ce)for (index = 0; ((ce) = xa_get_next(&guc->context_lookup
, &(index))) != ((void *)0); index++)
{
4601 bool_Bool found;
4602
4603 if (!kref_get_unless_zero(&ce->ref))
4604 continue;
4605
4606 xa_unlock(&guc->context_lookup)do { mtx_leave(&(&guc->context_lookup)->xa_lock
); } while (0)
;
4607
4608 if (!intel_context_is_pinned(ce))
4609 goto next;
4610
4611 if (intel_engine_is_virtual(ce->engine)) {
4612 if (!(ce->engine->mask & engine->mask))
4613 goto next;
4614 } else {
4615 if (ce->engine != engine)
4616 goto next;
4617 }
4618
4619 found = false0;
4620 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
4621 list_for_each_entry(rq, &ce->guc_state.requests, sched.link)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&ce->guc_state.requests)->next); (__typeof
(*rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&ce->guc_state
.requests); rq = ({ const __typeof( ((__typeof(*rq) *)0)->
sched.link ) *__mptr = (rq->sched.link.next); (__typeof(*rq
) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}))
{
4622 if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
4623 continue;
4624
4625 found = true1;
4626 break;
4627 }
4628 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
4629
4630 if (found) {
4631 intel_engine_set_hung_context(engine, ce);
4632
4633 /* Can only cope with one hang at a time... */
4634 intel_context_put(ce);
4635 xa_lock(&guc->context_lookup)do { mtx_enter(&(&guc->context_lookup)->xa_lock
); } while (0)
;
4636 goto done;
4637 }
4638
4639next:
4640 intel_context_put(ce);
4641 xa_lock(&guc->context_lookup)do { mtx_enter(&(&guc->context_lookup)->xa_lock
); } while (0)
;
4642 }
4643done:
4644 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
4645}
4646
4647void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
4648 struct i915_request *hung_rq,
4649 struct drm_printer *m)
4650{
4651 struct intel_guc *guc = &engine->gt->uc.guc;
4652 struct intel_context *ce;
4653 unsigned long index;
4654 unsigned long flags;
4655
4656 /* Reset called during driver load? GuC not yet initialised! */
4657 if (unlikely(!guc_submission_initialized(guc))__builtin_expect(!!(!guc_submission_initialized(guc)), 0))
4658 return;
4659
4660 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
4661 xa_for_each(&guc->context_lookup, index, ce)for (index = 0; ((ce) = xa_get_next(&guc->context_lookup
, &(index))) != ((void *)0); index++)
{
4662 if (!kref_get_unless_zero(&ce->ref))
4663 continue;
4664
4665 xa_unlock(&guc->context_lookup)do { mtx_leave(&(&guc->context_lookup)->xa_lock
); } while (0)
;
4666
4667 if (!intel_context_is_pinned(ce))
4668 goto next;
4669
4670 if (intel_engine_is_virtual(ce->engine)) {
4671 if (!(ce->engine->mask & engine->mask))
4672 goto next;
4673 } else {
4674 if (ce->engine != engine)
4675 goto next;
4676 }
4677
4678 spin_lock(&ce->guc_state.lock)mtx_enter(&ce->guc_state.lock);
4679 intel_engine_dump_active_requests(&ce->guc_state.requests,
4680 hung_rq, m);
4681 spin_unlock(&ce->guc_state.lock)mtx_leave(&ce->guc_state.lock);
4682
4683next:
4684 intel_context_put(ce);
4685 xa_lock(&guc->context_lookup)do { mtx_enter(&(&guc->context_lookup)->xa_lock
); } while (0)
;
4686 }
4687 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
4688}
4689
4690void intel_guc_submission_print_info(struct intel_guc *guc,
4691 struct drm_printer *p)
4692{
4693 struct i915_sched_engine *sched_engine = guc->sched_engine;
4694 struct rb_node *rb;
4695 unsigned long flags;
4696
4697 if (!sched_engine)
4698 return;
4699
4700 drm_printf(p, "GuC Number Outstanding Submission G2H: %u\n",
4701 atomic_read(&guc->outstanding_submission_g2h)({ typeof(*(&guc->outstanding_submission_g2h)) __tmp =
*(volatile typeof(*(&guc->outstanding_submission_g2h)
) *)&(*(&guc->outstanding_submission_g2h)); membar_datadep_consumer
(); __tmp; })
);
4702 drm_printf(p, "GuC tasklet count: %u\n\n",
4703 atomic_read(&sched_engine->tasklet.count)({ typeof(*(&sched_engine->tasklet.count)) __tmp = *(volatile
typeof(*(&sched_engine->tasklet.count)) *)&(*(&
sched_engine->tasklet.count)); membar_datadep_consumer(); __tmp
; })
);
4704
4705 spin_lock_irqsave(&sched_engine->lock, flags)do { flags = 0; mtx_enter(&sched_engine->lock); } while
(0)
;
4706 drm_printf(p, "Requests in GuC submit tasklet:\n");
4707 for (rb = rb_first_cached(&sched_engine->queue)linux_root_RB_MINMAX((struct linux_root *)(&(&sched_engine
->queue)->rb_root), -1)
; rb; rb = rb_next(rb)linux_root_RB_NEXT((rb))) {
4708 struct i915_priolist *pl = to_priolist(rb);
4709 struct i915_request *rq;
4710
4711 priolist_for_each_request(rq, pl)for (rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = ((&(pl)->requests)->next); (__typeof(*
rq) *)( (char *)__mptr - __builtin_offsetof(__typeof(*rq), sched
.link) );}); &rq->sched.link != (&(pl)->requests
); rq = ({ const __typeof( ((__typeof(*rq) *)0)->sched.link
) *__mptr = (rq->sched.link.next); (__typeof(*rq) *)( (char
*)__mptr - __builtin_offsetof(__typeof(*rq), sched.link) );}
))
4712 drm_printf(p, "guc_id=%u, seqno=%llu\n",
4713 rq->context->guc_id.id,
4714 rq->fence.seqno);
4715 }
4716 spin_unlock_irqrestore(&sched_engine->lock, flags)do { (void)(flags); mtx_leave(&sched_engine->lock); } while
(0)
;
4717 drm_printf(p, "\n");
4718}
4719
4720static inline void guc_log_context_priority(struct drm_printer *p,
4721 struct intel_context *ce)
4722{
4723 int i;
4724
4725 drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio);
4726 drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n");
4727 for (i = GUC_CLIENT_PRIORITY_KMD_HIGH0;
4728 i < GUC_CLIENT_PRIORITY_NUM4; ++i) {
4729 drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n",
4730 i, ce->guc_state.prio_count[i]);
4731 }
4732 drm_printf(p, "\n");
4733}
4734
4735static inline void guc_log_context(struct drm_printer *p,
4736 struct intel_context *ce)
4737{
4738 drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id);
4739 drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca);
4740 drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n",
4741 ce->ring->head,
4742 ce->lrc_reg_state[CTX_RING_HEAD(0x04 + 1)]);
4743 drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n",
4744 ce->ring->tail,
4745 ce->lrc_reg_state[CTX_RING_TAIL(0x06 + 1)]);
4746 drm_printf(p, "\t\tContext Pin Count: %u\n",
4747 atomic_read(&ce->pin_count)({ typeof(*(&ce->pin_count)) __tmp = *(volatile typeof
(*(&ce->pin_count)) *)&(*(&ce->pin_count));
membar_datadep_consumer(); __tmp; })
);
4748 drm_printf(p, "\t\tGuC ID Ref Count: %u\n",
4749 atomic_read(&ce->guc_id.ref)({ typeof(*(&ce->guc_id.ref)) __tmp = *(volatile typeof
(*(&ce->guc_id.ref)) *)&(*(&ce->guc_id.ref)
); membar_datadep_consumer(); __tmp; })
);
4750 drm_printf(p, "\t\tSchedule State: 0x%x\n\n",
4751 ce->guc_state.sched_state);
4752}
4753
4754void intel_guc_submission_print_context_info(struct intel_guc *guc,
4755 struct drm_printer *p)
4756{
4757 struct intel_context *ce;
4758 unsigned long index;
4759 unsigned long flags;
4760
4761 xa_lock_irqsave(&guc->context_lookup, flags)do { flags = 0; mtx_enter(&(&guc->context_lookup)->
xa_lock); } while (0)
;
4762 xa_for_each(&guc->context_lookup, index, ce)for (index = 0; ((ce) = xa_get_next(&guc->context_lookup
, &(index))) != ((void *)0); index++)
{
4763 GEM_BUG_ON(intel_context_is_child(ce))((void)0);
4764
4765 guc_log_context(p, ce);
4766 guc_log_context_priority(p, ce);
4767
4768 if (intel_context_is_parent(ce)) {
4769 struct intel_context *child;
4770
4771 drm_printf(p, "\t\tNumber children: %u\n",
4772 ce->parallel.number_children);
4773
4774 if (ce->parallel.guc.wq_status) {
4775 drm_printf(p, "\t\tWQI Head: %u\n",
4776 READ_ONCE(*ce->parallel.guc.wq_head)({ typeof(*ce->parallel.guc.wq_head) __tmp = *(volatile typeof
(*ce->parallel.guc.wq_head) *)&(*ce->parallel.guc.wq_head
); membar_datadep_consumer(); __tmp; })
);
4777 drm_printf(p, "\t\tWQI Tail: %u\n",
4778 READ_ONCE(*ce->parallel.guc.wq_tail)({ typeof(*ce->parallel.guc.wq_tail) __tmp = *(volatile typeof
(*ce->parallel.guc.wq_tail) *)&(*ce->parallel.guc.wq_tail
); membar_datadep_consumer(); __tmp; })
);
4779 drm_printf(p, "\t\tWQI Status: %u\n\n",
4780 READ_ONCE(*ce->parallel.guc.wq_status)({ typeof(*ce->parallel.guc.wq_status) __tmp = *(volatile typeof
(*ce->parallel.guc.wq_status) *)&(*ce->parallel.guc
.wq_status); membar_datadep_consumer(); __tmp; })
);
4781 }
4782
4783 if (ce->engine->emit_bb_start ==
4784 emit_bb_start_parent_no_preempt_mid_batch) {
4785 u8 i;
4786
4787 drm_printf(p, "\t\tChildren Go: %u\n\n",
4788 get_children_go_value(ce));
4789 for (i = 0; i < ce->parallel.number_children; ++i)
4790 drm_printf(p, "\t\tChildren Join: %u\n",
4791 get_children_join_value(ce, i));
4792 }
4793
4794 for_each_child(ce, child)for (child = ({ const __typeof( ((__typeof(*child) *)0)->parallel
.child_link ) *__mptr = ((&(ce)->parallel.child_list)->
next); (__typeof(*child) *)( (char *)__mptr - __builtin_offsetof
(__typeof(*child), parallel.child_link) );}); &child->
parallel.child_link != (&(ce)->parallel.child_list); child
= ({ const __typeof( ((__typeof(*child) *)0)->parallel.child_link
) *__mptr = (child->parallel.child_link.next); (__typeof(
*child) *)( (char *)__mptr - __builtin_offsetof(__typeof(*child
), parallel.child_link) );}))
4795 guc_log_context(p, child);
4796 }
4797 }
4798 xa_unlock_irqrestore(&guc->context_lookup, flags)do { (void)(flags); mtx_leave(&(&guc->context_lookup
)->xa_lock); } while (0)
;
4799}
4800
4801static inline u32 get_children_go_addr(struct intel_context *ce)
4802{
4803 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
4804
4805 return i915_ggtt_offset(ce->state) +
4806 __get_parent_scratch_offset(ce) +
4807 offsetof(struct parent_scratch, go.semaphore)__builtin_offsetof(struct parent_scratch, go.semaphore);
4808}
4809
4810static inline u32 get_children_join_addr(struct intel_context *ce,
4811 u8 child_index)
4812{
4813 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
4814
4815 return i915_ggtt_offset(ce->state) +
4816 __get_parent_scratch_offset(ce) +
4817 offsetof(struct parent_scratch, join[child_index].semaphore)__builtin_offsetof(struct parent_scratch, join[child_index].semaphore
)
;
4818}
4819
4820#define PARENT_GO_BB1 1
4821#define PARENT_GO_FINI_BREADCRUMB0 0
4822#define CHILD_GO_BB1 1
4823#define CHILD_GO_FINI_BREADCRUMB0 0
4824static int emit_bb_start_parent_no_preempt_mid_batch(struct i915_request *rq,
4825 u64 offset, u32 len,
4826 const unsigned int flags)
4827{
4828 struct intel_context *ce = rq->context;
4829 u32 *cs;
4830 u8 i;
4831
4832 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
4833
4834 cs = intel_ring_begin(rq, 10 + 4 * ce->parallel.number_children);
4835 if (IS_ERR(cs))
4836 return PTR_ERR(cs);
4837
4838 /* Wait on children */
4839 for (i = 0; i < ce->parallel.number_children; ++i) {
4840 *cs++ = (MI_SEMAPHORE_WAIT(((0x0) << 29) | (0x1c) << 23 | (2)) |
4841 MI_SEMAPHORE_GLOBAL_GTT(1<<22) |
4842 MI_SEMAPHORE_POLL(1 << 15) |
4843 MI_SEMAPHORE_SAD_EQ_SDD(4 << 12));
4844 *cs++ = PARENT_GO_BB1;
4845 *cs++ = get_children_join_addr(ce, i);
4846 *cs++ = 0;
4847 }
4848
4849 /* Turn off preemption */
4850 *cs++ = MI_ARB_ON_OFF(((0x0) << 29) | (0x08) << 23 | (0)) | MI_ARB_DISABLE(0<<0);
4851 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
4852
4853 /* Tell children go */
4854 cs = gen8_emit_ggtt_write(cs,
4855 CHILD_GO_BB1,
4856 get_children_go_addr(ce),
4857 0);
4858
4859 /* Jump to batch */
4860 *cs++ = MI_BATCH_BUFFER_START_GEN8(((0x0) << 29) | (0x31) << 23 | (1)) |
4861 (flags & I915_DISPATCH_SECURE(1UL << (0)) ? 0 : BIT(8)(1UL << (8)));
4862 *cs++ = lower_32_bits(offset)((u32)(offset));
4863 *cs++ = upper_32_bits(offset)((u32)(((offset) >> 16) >> 16));
4864 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
4865
4866 intel_ring_advance(rq, cs);
4867
4868 return 0;
4869}
4870
4871static int emit_bb_start_child_no_preempt_mid_batch(struct i915_request *rq,
4872 u64 offset, u32 len,
4873 const unsigned int flags)
4874{
4875 struct intel_context *ce = rq->context;
4876 struct intel_context *parent = intel_context_to_parent(ce);
4877 u32 *cs;
4878
4879 GEM_BUG_ON(!intel_context_is_child(ce))((void)0);
4880
4881 cs = intel_ring_begin(rq, 12);
4882 if (IS_ERR(cs))
4883 return PTR_ERR(cs);
4884
4885 /* Signal parent */
4886 cs = gen8_emit_ggtt_write(cs,
4887 PARENT_GO_BB1,
4888 get_children_join_addr(parent,
4889 ce->parallel.child_index),
4890 0);
4891
4892 /* Wait on parent for go */
4893 *cs++ = (MI_SEMAPHORE_WAIT(((0x0) << 29) | (0x1c) << 23 | (2)) |
4894 MI_SEMAPHORE_GLOBAL_GTT(1<<22) |
4895 MI_SEMAPHORE_POLL(1 << 15) |
4896 MI_SEMAPHORE_SAD_EQ_SDD(4 << 12));
4897 *cs++ = CHILD_GO_BB1;
4898 *cs++ = get_children_go_addr(parent);
4899 *cs++ = 0;
4900
4901 /* Turn off preemption */
4902 *cs++ = MI_ARB_ON_OFF(((0x0) << 29) | (0x08) << 23 | (0)) | MI_ARB_DISABLE(0<<0);
4903
4904 /* Jump to batch */
4905 *cs++ = MI_BATCH_BUFFER_START_GEN8(((0x0) << 29) | (0x31) << 23 | (1)) |
4906 (flags & I915_DISPATCH_SECURE(1UL << (0)) ? 0 : BIT(8)(1UL << (8)));
4907 *cs++ = lower_32_bits(offset)((u32)(offset));
4908 *cs++ = upper_32_bits(offset)((u32)(((offset) >> 16) >> 16));
4909
4910 intel_ring_advance(rq, cs);
4911
4912 return 0;
4913}
4914
4915static u32 *
4916__emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4917 u32 *cs)
4918{
4919 struct intel_context *ce = rq->context;
4920 u8 i;
4921
4922 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
4923
4924 /* Wait on children */
4925 for (i = 0; i < ce->parallel.number_children; ++i) {
4926 *cs++ = (MI_SEMAPHORE_WAIT(((0x0) << 29) | (0x1c) << 23 | (2)) |
4927 MI_SEMAPHORE_GLOBAL_GTT(1<<22) |
4928 MI_SEMAPHORE_POLL(1 << 15) |
4929 MI_SEMAPHORE_SAD_EQ_SDD(4 << 12));
4930 *cs++ = PARENT_GO_FINI_BREADCRUMB0;
4931 *cs++ = get_children_join_addr(ce, i);
4932 *cs++ = 0;
4933 }
4934
4935 /* Turn on preemption */
4936 *cs++ = MI_ARB_ON_OFF(((0x0) << 29) | (0x08) << 23 | (0)) | MI_ARB_ENABLE(1<<0);
4937 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
4938
4939 /* Tell children go */
4940 cs = gen8_emit_ggtt_write(cs,
4941 CHILD_GO_FINI_BREADCRUMB0,
4942 get_children_go_addr(ce),
4943 0);
4944
4945 return cs;
4946}
4947
4948/*
4949 * If this true, a submission of multi-lrc requests had an error and the
4950 * requests need to be skipped. The front end (execuf IOCTL) should've called
4951 * i915_request_skip which squashes the BB but we still need to emit the fini
4952 * breadrcrumbs seqno write. At this point we don't know how many of the
4953 * requests in the multi-lrc submission were generated so we can't do the
4954 * handshake between the parent and children (e.g. if 4 requests should be
4955 * generated but 2nd hit an error only 1 would be seen by the GuC backend).
4956 * Simply skip the handshake, but still emit the breadcrumbd seqno, if an error
4957 * has occurred on any of the requests in submission / relationship.
4958 */
4959static inline bool_Bool skip_handshake(struct i915_request *rq)
4960{
4961 return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
4962}
4963
4964#define NON_SKIP_LEN 6
4965static u32 *
4966emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
4967 u32 *cs)
4968{
4969 struct intel_context *ce = rq->context;
4970 __maybe_unused__attribute__((__unused__)) u32 *before_fini_breadcrumb_user_interrupt_cs;
4971 __maybe_unused__attribute__((__unused__)) u32 *start_fini_breadcrumb_cs = cs;
4972
4973 GEM_BUG_ON(!intel_context_is_parent(ce))((void)0);
4974
4975 if (unlikely(skip_handshake(rq))__builtin_expect(!!(skip_handshake(rq)), 0)) {
4976 /*
4977 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
4978 * the NON_SKIP_LEN comes from the length of the emits below.
4979 */
4980 memset(cs, 0, sizeof(u32) *__builtin_memset((cs), (0), (sizeof(u32) * (ce->engine->
emit_fini_breadcrumb_dw - NON_SKIP_LEN)))
4981 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN))__builtin_memset((cs), (0), (sizeof(u32) * (ce->engine->
emit_fini_breadcrumb_dw - NON_SKIP_LEN)))
;
4982 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
4983 } else {
4984 cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
4985 }
4986
4987 /* Emit fini breadcrumb */
4988 before_fini_breadcrumb_user_interrupt_cs = cs;
4989 cs = gen8_emit_ggtt_write(cs,
4990 rq->fence.seqno,
4991 i915_request_active_timeline(rq)->hwsp_offset,
4992 0);
4993
4994 /* User interrupt */
4995 *cs++ = MI_USER_INTERRUPT(((0x0) << 29) | (0x02) << 23 | (0));
4996 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
4997
4998 /* Ensure our math for skip + emit is correct */
4999 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=((void)0)
5000 cs)((void)0);
5001 GEM_BUG_ON(start_fini_breadcrumb_cs +((void)0)
5002 ce->engine->emit_fini_breadcrumb_dw != cs)((void)0);
5003
5004 rq->tail = intel_ring_offset(rq, cs);
5005
5006 return cs;
5007}
5008
5009static u32 *
5010__emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5011 u32 *cs)
5012{
5013 struct intel_context *ce = rq->context;
5014 struct intel_context *parent = intel_context_to_parent(ce);
5015
5016 GEM_BUG_ON(!intel_context_is_child(ce))((void)0);
5017
5018 /* Turn on preemption */
5019 *cs++ = MI_ARB_ON_OFF(((0x0) << 29) | (0x08) << 23 | (0)) | MI_ARB_ENABLE(1<<0);
5020 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
5021
5022 /* Signal parent */
5023 cs = gen8_emit_ggtt_write(cs,
5024 PARENT_GO_FINI_BREADCRUMB0,
5025 get_children_join_addr(parent,
5026 ce->parallel.child_index),
5027 0);
5028
5029 /* Wait parent on for go */
5030 *cs++ = (MI_SEMAPHORE_WAIT(((0x0) << 29) | (0x1c) << 23 | (2)) |
5031 MI_SEMAPHORE_GLOBAL_GTT(1<<22) |
5032 MI_SEMAPHORE_POLL(1 << 15) |
5033 MI_SEMAPHORE_SAD_EQ_SDD(4 << 12));
5034 *cs++ = CHILD_GO_FINI_BREADCRUMB0;
5035 *cs++ = get_children_go_addr(parent);
5036 *cs++ = 0;
5037
5038 return cs;
5039}
5040
5041static u32 *
5042emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
5043 u32 *cs)
5044{
5045 struct intel_context *ce = rq->context;
5046 __maybe_unused__attribute__((__unused__)) u32 *before_fini_breadcrumb_user_interrupt_cs;
5047 __maybe_unused__attribute__((__unused__)) u32 *start_fini_breadcrumb_cs = cs;
5048
5049 GEM_BUG_ON(!intel_context_is_child(ce))((void)0);
5050
5051 if (unlikely(skip_handshake(rq))__builtin_expect(!!(skip_handshake(rq)), 0)) {
5052 /*
5053 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
5054 * the NON_SKIP_LEN comes from the length of the emits below.
5055 */
5056 memset(cs, 0, sizeof(u32) *__builtin_memset((cs), (0), (sizeof(u32) * (ce->engine->
emit_fini_breadcrumb_dw - NON_SKIP_LEN)))
5057 (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN))__builtin_memset((cs), (0), (sizeof(u32) * (ce->engine->
emit_fini_breadcrumb_dw - NON_SKIP_LEN)))
;
5058 cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
5059 } else {
5060 cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
5061 }
5062
5063 /* Emit fini breadcrumb */
5064 before_fini_breadcrumb_user_interrupt_cs = cs;
5065 cs = gen8_emit_ggtt_write(cs,
5066 rq->fence.seqno,
5067 i915_request_active_timeline(rq)->hwsp_offset,
5068 0);
5069
5070 /* User interrupt */
5071 *cs++ = MI_USER_INTERRUPT(((0x0) << 29) | (0x02) << 23 | (0));
5072 *cs++ = MI_NOOP(((0x0) << 29) | (0) << 23 | (0));
5073
5074 /* Ensure our math for skip + emit is correct */
5075 GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=((void)0)
5076 cs)((void)0);
5077 GEM_BUG_ON(start_fini_breadcrumb_cs +((void)0)
5078 ce->engine->emit_fini_breadcrumb_dw != cs)((void)0);
5079
5080 rq->tail = intel_ring_offset(rq, cs);
5081
5082 return cs;
5083}
5084
5085#undef NON_SKIP_LEN
5086
5087static struct intel_context *
5088guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
5089 unsigned long flags)
5090{
5091 struct guc_virtual_engine *ve;
5092 struct intel_guc *guc;
5093 unsigned int n;
5094 int err;
5095
5096 ve = kzalloc(sizeof(*ve), GFP_KERNEL(0x0001 | 0x0004));
5097 if (!ve)
5098 return ERR_PTR(-ENOMEM12);
5099
5100 guc = &siblings[0]->gt->uc.guc;
5101
5102 ve->base.i915 = siblings[0]->i915;
5103 ve->base.gt = siblings[0]->gt;
5104 ve->base.uncore = siblings[0]->uncore;
5105 ve->base.id = -1;
5106
5107 ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
5108 ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
5109 ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL-2;
5110 ve->base.saturated = ALL_ENGINES((intel_engine_mask_t)~0ul);
5111
5112 snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
5113
5114 ve->base.sched_engine = i915_sched_engine_get(guc->sched_engine);
5115
5116 ve->base.cops = &virtual_guc_context_ops;
5117 ve->base.request_alloc = guc_request_alloc;
5118 ve->base.bump_serial = virtual_guc_bump_serial;
5119
5120 ve->base.submit_request = guc_submit_request;
5121
5122 ve->base.flags = I915_ENGINE_IS_VIRTUAL(1UL << (5));
5123
5124#ifdef notyet
5125 BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES)extern char _ctassert[(!(((sizeof((1UL << ((8 * sizeof(
intel_engine_mask_t)) - 1))) <= 4) ? (fls((1UL << ((
8 * sizeof(intel_engine_mask_t)) - 1))) - 1) : (flsl((1UL <<
((8 * sizeof(intel_engine_mask_t)) - 1))) - 1)) < I915_NUM_ENGINES
)) ? 1 : -1 ] __attribute__((__unused__))
;
5126#endif
5127 ve->base.mask = VIRTUAL_ENGINES(1UL << ((8 * sizeof(intel_engine_mask_t)) - 1));
5128
5129 intel_context_init(&ve->context, &ve->base);
5130
5131 for (n = 0; n < count; n++) {
5132 struct intel_engine_cs *sibling = siblings[n];
5133
5134 GEM_BUG_ON(!is_power_of_2(sibling->mask))((void)0);
5135 if (sibling->mask & ve->base.mask) {
5136 DRM_DEBUG("duplicate %s entry in load balancer\n",___drm_dbg(((void *)0), DRM_UT_CORE, "duplicate %s entry in load balancer\n"
, sibling->name)
5137 sibling->name)___drm_dbg(((void *)0), DRM_UT_CORE, "duplicate %s entry in load balancer\n"
, sibling->name)
;
5138 err = -EINVAL22;
5139 goto err_put;
5140 }
5141
5142 ve->base.mask |= sibling->mask;
5143 ve->base.logical_mask |= sibling->logical_mask;
5144
5145 if (n != 0 && ve->base.class != sibling->class) {
5146 DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",___drm_dbg(((void *)0), DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n"
, sibling->class, ve->base.class)
5147 sibling->class, ve->base.class)___drm_dbg(((void *)0), DRM_UT_CORE, "invalid mixing of engine class, sibling %d, already %d\n"
, sibling->class, ve->base.class)
;
5148 err = -EINVAL22;
5149 goto err_put;
5150 } else if (n == 0) {
5151 ve->base.class = sibling->class;
5152 ve->base.uabi_class = sibling->uabi_class;
5153 snprintf(ve->base.name, sizeof(ve->base.name),
5154 "v%dx%d", ve->base.class, count);
5155 ve->base.context_size = sibling->context_size;
5156
5157 ve->base.add_active_request =
5158 sibling->add_active_request;
5159 ve->base.remove_active_request =
5160 sibling->remove_active_request;
5161 ve->base.emit_bb_start = sibling->emit_bb_start;
5162 ve->base.emit_flush = sibling->emit_flush;
5163 ve->base.emit_init_breadcrumb =
5164 sibling->emit_init_breadcrumb;
5165 ve->base.emit_fini_breadcrumb =
5166 sibling->emit_fini_breadcrumb;
5167 ve->base.emit_fini_breadcrumb_dw =
5168 sibling->emit_fini_breadcrumb_dw;
5169 ve->base.breadcrumbs =
5170 intel_breadcrumbs_get(sibling->breadcrumbs);
5171
5172 ve->base.flags |= sibling->flags;
5173
5174 ve->base.props.timeslice_duration_ms =
5175 sibling->props.timeslice_duration_ms;
5176 ve->base.props.preempt_timeout_ms =
5177 sibling->props.preempt_timeout_ms;
5178 }
5179 }
5180
5181 return &ve->context;
5182
5183err_put:
5184 intel_context_put(&ve->context);
5185 return ERR_PTR(err);
5186}
5187
5188bool_Bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
5189{
5190 struct intel_engine_cs *engine;
5191 intel_engine_mask_t tmp, mask = ve->mask;
5192
5193 for_each_engine_masked(engine, ve->gt, mask, tmp)for ((tmp) = (mask) & (ve->gt)->info.engine_mask; (
tmp) ? ((engine) = (ve->gt)->engine[({ int __idx = ffs(
tmp) - 1; tmp &= ~(1UL << (__idx)); __idx; })]), 1 :
0;)
5194 if (READ_ONCE(engine->props.heartbeat_interval_ms)({ typeof(engine->props.heartbeat_interval_ms) __tmp = *(volatile
typeof(engine->props.heartbeat_interval_ms) *)&(engine
->props.heartbeat_interval_ms); membar_datadep_consumer();
__tmp; })
)
5195 return true1;
5196
5197 return false0;
5198}
5199
5200#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
5201#include "selftest_guc.c"
5202#include "selftest_guc_multi_lrc.c"
5203#include "selftest_guc_hangcheck.c"
5204#endif