/usr/src/sys/dev/pci/drm/i915/i915

Bug Summary

File:	dev/pci/drm/i915/i915_perf.c
Warning:	line 701, column 8 Although the value stored to 'taken' is used in the enclosing expression, the value is never actually read from 'taken'

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name i915_perf.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/dev/pci/drm/i915/i915_perf.c

1	/*
2	* Copyright © 2015-2016 Intel Corporation
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*
23	* Authors:
24	* Robert Bragg <robert@sixbynine.org>
25	*/
26
27
28	/**
29	* DOC: i915 Perf Overview
30	*
31	* Gen graphics supports a large number of performance counters that can help
32	* driver and application developers understand and optimize their use of the
33	* GPU.
34	*
35	* This i915 perf interface enables userspace to configure and open a file
36	* descriptor representing a stream of GPU metrics which can then be read() as
37	* a stream of sample records.
38	*
39	* The interface is particularly suited to exposing buffered metrics that are
40	* captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
41	*
42	* Streams representing a single context are accessible to applications with a
43	* corresponding drm file descriptor, such that OpenGL can use the interface
44	* without special privileges. Access to system-wide metrics requires root
45	* privileges by default, unless changed via the dev.i915.perf_event_paranoid
46	* sysctl option.
47	*
48	*/
49
50	/**
51	* DOC: i915 Perf History and Comparison with Core Perf
52	*
53	* The interface was initially inspired by the core Perf infrastructure but
54	* some notable differences are:
55	*
56	* i915 perf file descriptors represent a "stream" instead of an "event"; where
57	* a perf event primarily corresponds to a single 64bit value, while a stream
58	* might sample sets of tightly-coupled counters, depending on the
59	* configuration. For example the Gen OA unit isn't designed to support
60	* orthogonal configurations of individual counters; it's configured for a set
61	* of related counters. Samples for an i915 perf stream capturing OA metrics
62	* will include a set of counter values packed in a compact HW specific format.
63	* The OA unit supports a number of different packing formats which can be
64	* selected by the user opening the stream. Perf has support for grouping
65	* events, but each event in the group is configured, validated and
66	* authenticated individually with separate system calls.
67	*
68	* i915 perf stream configurations are provided as an array of u64 (key,value)
69	* pairs, instead of a fixed struct with multiple miscellaneous config members,
70	* interleaved with event-type specific members.
71	*
72	* i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
73	* The supported metrics are being written to memory by the GPU unsynchronized
74	* with the CPU, using HW specific packing formats for counter sets. Sometimes
75	* the constraints on HW configuration require reports to be filtered before it
76	* would be acceptable to expose them to unprivileged applications - to hide
77	* the metrics of other processes/contexts. For these use cases a read() based
78	* interface is a good fit, and provides an opportunity to filter data as it
79	* gets copied from the GPU mapped buffers to userspace buffers.
80	*
81	*
82	* Issues hit with first prototype based on Core Perf
83	* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84	*
85	* The first prototype of this driver was based on the core perf
86	* infrastructure, and while we did make that mostly work, with some changes to
87	* perf, we found we were breaking or working around too many assumptions baked
88	* into perf's currently cpu centric design.
89	*
90	* In the end we didn't see a clear benefit to making perf's implementation and
91	* interface more complex by changing design assumptions while we knew we still
92	* wouldn't be able to use any existing perf based userspace tools.
93	*
94	* Also considering the Gen specific nature of the Observability hardware and
95	* how userspace will sometimes need to combine i915 perf OA metrics with
96	* side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
97	* expecting the interface to be used by a platform specific userspace such as
98	* OpenGL or tools. This is to say; we aren't inherently missing out on having
99	* a standard vendor/architecture agnostic interface by not using perf.
100	*
101	*
102	* For posterity, in case we might re-visit trying to adapt core perf to be
103	* better suited to exposing i915 metrics these were the main pain points we
104	* hit:
105	*
106	* - The perf based OA PMU driver broke some significant design assumptions:
107	*
108	* Existing perf pmus are used for profiling work on a cpu and we were
109	* introducing the idea of _IS_DEVICE pmus with different security
110	* implications, the need to fake cpu-related data (such as user/kernel
111	* registers) to fit with perf's current design, and adding _DEVICE records
112	* as a way to forward device-specific status records.
113	*
114	* The OA unit writes reports of counters into a circular buffer, without
115	* involvement from the CPU, making our PMU driver the first of a kind.
116	*
117	* Given the way we were periodically forward data from the GPU-mapped, OA
118	* buffer to perf's buffer, those bursts of sample writes looked to perf like
119	* we were sampling too fast and so we had to subvert its throttling checks.
120	*
121	* Perf supports groups of counters and allows those to be read via
122	* transactions internally but transactions currently seem designed to be
123	* explicitly initiated from the cpu (say in response to a userspace read())
124	* and while we could pull a report out of the OA buffer we can't
125	* trigger a report from the cpu on demand.
126	*
127	* Related to being report based; the OA counters are configured in HW as a
128	* set while perf generally expects counter configurations to be orthogonal.
129	* Although counters can be associated with a group leader as they are
130	* opened, there's no clear precedent for being able to provide group-wide
131	* configuration attributes (for example we want to let userspace choose the
132	* OA unit report format used to capture all counters in a set, or specify a
133	* GPU context to filter metrics on). We avoided using perf's grouping
134	* feature and forwarded OA reports to userspace via perf's 'raw' sample
135	* field. This suited our userspace well considering how coupled the counters
136	* are when dealing with normalizing. It would be inconvenient to split
137	* counters up into separate events, only to require userspace to recombine
138	* them. For Mesa it's also convenient to be forwarded raw, periodic reports
139	* for combining with the side-band raw reports it captures using
140	* MI_REPORT_PERF_COUNT commands.
141	*
142	* - As a side note on perf's grouping feature; there was also some concern
143	* that using PERF_FORMAT_GROUP as a way to pack together counter values
144	* would quite drastically inflate our sample sizes, which would likely
145	* lower the effective sampling resolutions we could use when the available
146	* memory bandwidth is limited.
147	*
148	* With the OA unit's report formats, counters are packed together as 32
149	* or 40bit values, with the largest report size being 256 bytes.
150	*
151	* PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a
152	* documented ordering to the values, implying PERF_FORMAT_ID must also be
153	* used to add a 64bit ID before each value; giving 16 bytes per counter.
154	*
155	* Related to counter orthogonality; we can't time share the OA unit, while
156	* event scheduling is a central design idea within perf for allowing
157	* userspace to open + enable more events than can be configured in HW at any
158	* one time. The OA unit is not designed to allow re-configuration while in
159	* use. We can't reconfigure the OA unit without losing internal OA unit
160	* state which we can't access explicitly to save and restore. Reconfiguring
161	* the OA unit is also relatively slow, involving ~100 register writes. From
162	* userspace Mesa also depends on a stable OA configuration when emitting
163	* MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be
164	* disabled while there are outstanding MI_RPC commands lest we hang the
165	* command streamer.
166	*
167	* The contents of sample records aren't extensible by device drivers (i.e.
168	* the sample_type bits). As an example; Sourab Gupta had been looking to
169	* attach GPU timestamps to our OA samples. We were shoehorning OA reports
170	* into sample records by using the 'raw' field, but it's tricky to pack more
171	* than one thing into this field because events/core.c currently only lets a
172	* pmu give a single raw data pointer plus len which will be copied into the
173	* ring buffer. To include more than the OA report we'd have to copy the
174	* report into an intermediate larger buffer. I'd been considering allowing a
175	* vector of data+len values to be specified for copying the raw data, but
176	* it felt like a kludge to being using the raw field for this purpose.
177	*
178	* - It felt like our perf based PMU was making some technical compromises
179	* just for the sake of using perf:
180	*
181	* perf_event_open() requires events to either relate to a pid or a specific
182	* cpu core, while our device pmu related to neither. Events opened with a
183	* pid will be automatically enabled/disabled according to the scheduling of
184	* that process - so not appropriate for us. When an event is related to a
185	* cpu id, perf ensures pmu methods will be invoked via an inter process
186	* interrupt on that core. To avoid invasive changes our userspace opened OA
187	* perf events for a specific cpu. This was workable but it meant the
188	* majority of the OA driver ran in atomic context, including all OA report
189	* forwarding, which wasn't really necessary in our case and seems to make
190	* our locking requirements somewhat complex as we handled the interaction
191	* with the rest of the i915 driver.
192	*/
193
194	#include <linux/anon_inodes.h>
195	#include <linux/sizes.h>
196	#include <linux/uuid.h>
197
198	#include "gem/i915_gem_context.h"
199	#include "gt/intel_engine_pm.h"
200	#include "gt/intel_engine_user.h"
201	#include "gt/intel_gt.h"
202	#include "gt/intel_lrc_reg.h"
203	#include "gt/intel_ring.h"
204
205	#include "i915_drv.h"
206	#include "i915_perf.h"
207
208	/* HW requires this to be a power of two, between 128k and 16M, though driver
209	* is currently generally designed assuming the largest 16M size is used such
210	* that the overflow cases are unlikely in normal operation.
211	*/
212	#define OA_BUFFER_SIZE(1024 * 1024 * 16) SZ_16M(1024 * 1024 * 16)
213
214	#define OA_TAKEN(tail, head)((tail - head) & ((1024 * 1024 * 16) - 1)) ((tail - head) & (OA_BUFFER_SIZE(1024 * 1024 * 16) - 1))
215
216	/**
217	* DOC: OA Tail Pointer Race
218	*
219	* There's a HW race condition between OA unit tail pointer register updates and
220	* writes to memory whereby the tail pointer can sometimes get ahead of what's
221	* been written out to the OA buffer so far (in terms of what's visible to the
222	* CPU).
223	*
224	* Although this can be observed explicitly while copying reports to userspace
225	* by checking for a zeroed report-id field in tail reports, we want to account
226	* for this earlier, as part of the oa_buffer_check_unlocked to avoid lots of
227	* redundant read() attempts.
228	*
229	* We workaround this issue in oa_buffer_check_unlocked() by reading the reports
230	* in the OA buffer, starting from the tail reported by the HW until we find a
231	* report with its first 2 dwords not 0 meaning its previous report is
232	* completely in memory and ready to be read. Those dwords are also set to 0
233	* once read and the whole buffer is cleared upon OA buffer initialization. The
234	* first dword is the reason for this report while the second is the timestamp,
235	* making the chances of having those 2 fields at 0 fairly unlikely. A more
236	* detailed explanation is available in oa_buffer_check_unlocked().
237	*
238	* Most of the implementation details for this workaround are in
239	* oa_buffer_check_unlocked() and _append_oa_reports()
240	*
241	* Note for posterity: previously the driver used to define an effective tail
242	* pointer that lagged the real pointer by a 'tail margin' measured in bytes
243	* derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
244	* This was flawed considering that the OA unit may also automatically generate
245	* non-periodic reports (such as on context switch) or the OA unit may be
246	* enabled without any periodic sampling.
247	*/
248	#define OA_TAIL_MARGIN_NSEC100000ULL 100000ULL
249	#define INVALID_TAIL_PTR0xffffffff 0xffffffff
250
251	/* The default frequency for checking whether the OA unit has written new
252	* reports to the circular OA buffer...
253	*/
254	#define DEFAULT_POLL_FREQUENCY_HZ200 200
255	#define DEFAULT_POLL_PERIOD_NS(1000000000L / 200) (NSEC_PER_SEC1000000000L / DEFAULT_POLL_FREQUENCY_HZ200)
256
257	/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
258	static u32 i915_perf_stream_paranoid = true1;
259
260	/* The maximum exponent the hardware accepts is 63 (essentially it selects one
261	* of the 64bit timestamp bits to trigger reports from) but there's currently
262	* no known use case for sampling as infrequently as once per 47 thousand years.
263	*
264	* Since the timestamps included in OA reports are only 32bits it seems
265	* reasonable to limit the OA exponent where it's still possible to account for
266	* overflow in OA report timestamps.
267	*/
268	#define OA_EXPONENT_MAX31 31
269
270	#define INVALID_CTX_ID0xffffffff 0xffffffff
271
272	/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
273	#define OAREPORT_REASON_MASK0x3f 0x3f
274	#define OAREPORT_REASON_MASK_EXTENDED0x7f 0x7f
275	#define OAREPORT_REASON_SHIFT19 19
276	#define OAREPORT_REASON_TIMER(1<<0) (1<<0)
277	#define OAREPORT_REASON_CTX_SWITCH(1<<3) (1<<3)
278	#define OAREPORT_REASON_CLK_RATIO(1<<5) (1<<5)
279
280
281	/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
282	*
283	* The highest sampling frequency we can theoretically program the OA unit
284	* with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
285	*
286	* Initialized just before we register the sysctl parameter.
287	*/
288	static int oa_sample_rate_hard_limit;
289
290	/* Theoretically we can program the OA unit to sample every 160ns but don't
291	* allow that by default unless root...
292	*
293	* The default threshold of 100000Hz is based on perf's similar
294	* kernel.perf_event_max_sample_rate sysctl parameter.
295	*/
296	static u32 i915_oa_max_sample_rate = 100000;
297
298	/* XXX: beware if future OA HW adds new report formats that the current
299	* code assumes all reports have a power-of-two size and ~(size - 1) can
300	* be used as a mask to align the OA tail pointer.
301	*/
302	static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
303	[I915_OA_FORMAT_A13] = { 0, 64 },
304	[I915_OA_FORMAT_A29] = { 1, 128 },
305	[I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
306	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
307	[I915_OA_FORMAT_B4_C8] = { 4, 64 },
308	[I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
309	[I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
310	[I915_OA_FORMAT_C4_B8] = { 7, 64 },
311	};
312
313	static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
314	[I915_OA_FORMAT_A12] = { 0, 64 },
315	[I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
316	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
317	[I915_OA_FORMAT_C4_B8] = { 7, 64 },
318	};
319
320	static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
321	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
322	};
323
324	#define SAMPLE_OA_REPORT(1<<0) (1<<0)
325
326	/**
327	* struct perf_open_properties - for validated properties given to open a stream
328	* @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
329	* @single_context: Whether a single or all gpu contexts should be monitored
330	* @hold_preemption: Whether the preemption is disabled for the filtered
331	* context
332	* @ctx_handle: A gem ctx handle for use with @single_context
333	* @metrics_set: An ID for an OA unit metric set advertised via sysfs
334	* @oa_format: An OA unit HW report format
335	* @oa_periodic: Whether to enable periodic OA unit sampling
336	* @oa_period_exponent: The OA unit sampling period is derived from this
337	* @engine: The engine (typically rcs0) being monitored by the OA unit
338	* @has_sseu: Whether @sseu was specified by userspace
339	* @sseu: internal SSEU configuration computed either from the userspace
340	* specified configuration in the opening parameters or a default value
341	* (see get_default_sseu_config())
342	* @poll_oa_period: The period in nanoseconds at which the CPU will check for OA
343	* data availability
344	*
345	* As read_properties_unlocked() enumerates and validates the properties given
346	* to open a stream of metrics the configuration is built up in the structure
347	* which starts out zero initialized.
348	*/
349	struct perf_open_properties {
350	u32 sample_flags;
351
352	u64 single_context:1;
353	u64 hold_preemption:1;
354	u64 ctx_handle;
355
356	/* OA sampling state */
357	int metrics_set;
358	int oa_format;
359	bool_Bool oa_periodic;
360	int oa_period_exponent;
361
362	struct intel_engine_cs *engine;
363
364	bool_Bool has_sseu;
365	struct intel_sseu sseu;
366
367	u64 poll_oa_period;
368	};
369
370	struct i915_oa_config_bo {
371	struct llist_node node;
372
373	struct i915_oa_config *oa_config;
374	struct i915_vma *vma;
375	};
376
377	static struct ctl_table_header *sysctl_header;
378
379	static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
380
381	void i915_oa_config_release(struct kref *ref)
382	{
383	struct i915_oa_config *oa_config =
384	container_of(ref, typeof(oa_config), ref)({ const __typeof( ((typeof(oa_config) )0)->ref ) __mptr = (ref); (typeof(oa_config) )( (char )__mptr - __builtin_offsetof (typeof(oa_config), ref) );});
385
386	kfree(oa_config->flex_regs);
387	kfree(oa_config->b_counter_regs);
388	kfree(oa_config->mux_regs);
389
390	kfree_rcu(oa_config, rcu)do { free((void *)oa_config, 145, 0); } while(0);
391	}
392
393	struct i915_oa_config *
394	i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
395	{
396	struct i915_oa_config *oa_config;
397
398	rcu_read_lock();
399	oa_config = idr_find(&perf->metrics_idr, metrics_set);
400	if (oa_config)
401	oa_config = i915_oa_config_get(oa_config);
402	rcu_read_unlock();
403
404	return oa_config;
405	}
406
407	static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
408	{
409	i915_oa_config_put(oa_bo->oa_config);
410	i915_vma_put(oa_bo->vma);
411	kfree(oa_bo);
412	}
413
414	static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
415	{
416	struct intel_uncore *uncore = stream->uncore;
417
418	return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR((const i915_reg_t){ .reg = (0xdb04) })) &
419	GEN12_OAG_OATAILPTR_MASK0xffffffc0;
420	}
421
422	static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
423	{
424	struct intel_uncore *uncore = stream->uncore;
425
426	return intel_uncore_read(uncore, GEN8_OATAILPTR((const i915_reg_t){ .reg = (0x2B10) })) & GEN8_OATAILPTR_MASK0xffffffc0;
427	}
428
429	static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
430	{
431	struct intel_uncore *uncore = stream->uncore;
432	u32 oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1((const i915_reg_t){ .reg = (0x2364) }));
433
434	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK0xffffffc0;
435	}
436
437	/**
438	* oa_buffer_check_unlocked - check for data and update tail ptr state
439	* @stream: i915 stream instance
440	*
441	* This is either called via fops (for blocking reads in user ctx) or the poll
442	* check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
443	* if there is data available for userspace to read.
444	*
445	* This function is central to providing a workaround for the OA unit tail
446	* pointer having a race with respect to what data is visible to the CPU.
447	* It is responsible for reading tail pointers from the hardware and giving
448	* the pointers time to 'age' before they are made available for reading.
449	* (See description of OA_TAIL_MARGIN_NSEC above for further details.)
450	*
451	* Besides returning true when there is data available to read() this function
452	* also updates the tail, aging_tail and aging_timestamp in the oa_buffer
453	* object.
454	*
455	* Note: It's safe to read OA config state here unlocked, assuming that this is
456	* only called while the stream is enabled, while the global OA configuration
457	* can't be modified.
458	*
459	* Returns: %true if the OA buffer contains data, else %false
460	*/
461	static bool_Bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
462	{
463	STUB()do { printf("%s: stub\n", __func__); } while(0);
464	return false0;
465	#ifdef notyet
466	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
467	int report_size = stream->oa_buffer.format_size;
468	unsigned long flags;
469	bool_Bool pollin;
470	u32 hw_tail;
471	u64 now;
472
473	/* We have to consider the (unlikely) possibility that read() errors
474	* could result in an OA buffer reset which might reset the head and
475	* tail state.
476	*/
477	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
478
479	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
480
481	/* The tail pointer increases in 64 byte increments,
482	* not in report_size steps...
483	*/
484	hw_tail &= ~(report_size - 1);
485
486	now = ktime_get_mono_fast_ns();
487
488	if (hw_tail == stream->oa_buffer.aging_tail &&
489	(now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC100000ULL) {
490	/* If the HW tail hasn't move since the last check and the HW
491	* tail has been aging for long enough, declare it the new
492	* tail.
493	*/
494	stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
495	} else {
496	u32 head, tail, aged_tail;
497
498	/* NB: The head we observe here might effectively be a little
499	* out of date. If a read() is in progress, the head could be
500	* anywhere between this head and stream->oa_buffer.tail.
501	*/
502	head = stream->oa_buffer.head - gtt_offset;
503	aged_tail = stream->oa_buffer.tail - gtt_offset;
504
505	hw_tail -= gtt_offset;
506	tail = hw_tail;
507
508	/* Walk the stream backward until we find a report with dword 0
509	* & 1 not at 0. Since the circular buffer pointers progress by
510	* increments of 64 bytes and that reports can be up to 256
511	* bytes long, we can't tell whether a report has fully landed
512	* in memory before the first 2 dwords of the following report
513	* have effectively landed.
514	*
515	* This is assuming that the writes of the OA unit land in
516	* memory in the order they were written to.
517	* If not : (╯°□°）╯︵ ┻━┻
518	*/
519	while (OA_TAKEN(tail, aged_tail)((tail - aged_tail) & ((1024 * 1024 * 16) - 1)) >= report_size) {
520	u32 report32 = (void )(stream->oa_buffer.vaddr + tail);
521
522	if (report32[0] != 0 \|\| report32[1] != 0)
523	break;
524
525	tail = (tail - report_size) & (OA_BUFFER_SIZE(1024 * 1024 * 16) - 1);
526	}
527
528	if (OA_TAKEN(hw_tail, tail)((hw_tail - tail) & ((1024 * 1024 * 16) - 1)) > report_size &&
529	__ratelimit(&stream->perf->tail_pointer_race)(1))
530	DRM_NOTE("unlanded report(s) head=0x%x "printk("\0015" "[" "drm" "] " "unlanded report(s) head=0x%x " "tail=0x%x hw_tail=0x%x\n", head, tail, hw_tail)
531	"tail=0x%x hw_tail=0x%x\n",printk("\0015" "[" "drm" "] " "unlanded report(s) head=0x%x " "tail=0x%x hw_tail=0x%x\n", head, tail, hw_tail)
532	head, tail, hw_tail)printk("\0015" "[" "drm" "] " "unlanded report(s) head=0x%x " "tail=0x%x hw_tail=0x%x\n", head, tail, hw_tail);
533
534	stream->oa_buffer.tail = gtt_offset + tail;
535	stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
536	stream->oa_buffer.aging_timestamp = now;
537	}
538
539	pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,((stream->oa_buffer.tail - gtt_offset - stream->oa_buffer .head - gtt_offset) & ((1024 * 1024 * 16) - 1))
540	stream->oa_buffer.head - gtt_offset)((stream->oa_buffer.tail - gtt_offset - stream->oa_buffer .head - gtt_offset) & ((1024 * 1024 * 16) - 1)) >= report_size;
541
542	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
543
544	return pollin;
545	#endif
546	}
547
548	/**
549	* append_oa_status - Appends a status record to a userspace read() buffer.
550	* @stream: An i915-perf stream opened for OA metrics
551	* @buf: destination buffer given by userspace
552	* @count: the number of bytes userspace wants to read
553	* @offset: (inout): the current position for writing into @buf
554	* @type: The kind of status to report to userspace
555	*
556	* Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
557	* into the userspace read() buffer.
558	*
559	* The @buf @offset will only be updated on success.
560	*
561	* Returns: 0 on success, negative error code on failure.
562	*/
563	static int append_oa_status(struct i915_perf_stream *stream,
564	char __user *buf,
565	size_t count,
566	size_t *offset,
567	enum drm_i915_perf_record_type type)
568	{
569	struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
570
571	if ((count - *offset) < header.size)
572	return -ENOSPC28;
573
574	if (copy_to_user(buf + *offset, &header, sizeof(header)))
575	return -EFAULT14;
576
577	(*offset) += header.size;
578
579	return 0;
580	}
581
582	/**
583	* append_oa_sample - Copies single OA report into userspace read() buffer.
584	* @stream: An i915-perf stream opened for OA metrics
585	* @buf: destination buffer given by userspace
586	* @count: the number of bytes userspace wants to read
587	* @offset: (inout): the current position for writing into @buf
588	* @report: A single OA report to (optionally) include as part of the sample
589	*
590	* The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
591	* properties when opening a stream, tracked as `stream->sample_flags`. This
592	* function copies the requested components of a single sample to the given
593	* read() @buf.
594	*
595	* The @buf @offset will only be updated on success.
596	*
597	* Returns: 0 on success, negative error code on failure.
598	*/
599	static int append_oa_sample(struct i915_perf_stream *stream,
600	char __user *buf,
601	size_t count,
602	size_t *offset,
603	const u8 *report)
604	{
605	STUB()do { printf("%s: stub\n", __func__); } while(0);
606	return false0;
607	#ifdef notyet
608	int report_size = stream->oa_buffer.format_size;
609	struct drm_i915_perf_record_header header;
610
611	header.type = DRM_I915_PERF_RECORD_SAMPLE;
612	header.pad = 0;
613	header.size = stream->sample_size;
614
615	if ((count - *offset) < header.size)
616	return -ENOSPC28;
617
618	buf += *offset;
619	if (copy_to_user(buf, &header, sizeof(header)))
620	return -EFAULT14;
621	buf += sizeof(header);
622
623	if (copy_to_user(buf, report, report_size))
624	return -EFAULT14;
625
626	(*offset) += header.size;
627
628	return 0;
629	#endif
630	}
631
632	/**
633	* Copies all buffered OA reports into userspace read() buffer.
634	* @stream: An i915-perf stream opened for OA metrics
635	* @buf: destination buffer given by userspace
636	* @count: the number of bytes userspace wants to read
637	* @offset: (inout): the current position for writing into @buf
638	*
639	* Notably any error condition resulting in a short read (-%ENOSPC or
640	* -%EFAULT) will be returned even though one or more records may
641	* have been successfully copied. In this case it's up to the caller
642	* to decide if the error should be squashed before returning to
643	* userspace.
644	*
645	* Note: reports are consumed from the head, and appended to the
646	* tail, so the tail chases the head?... If you think that's mad
647	* and back-to-front you're not alone, but this follows the
648	* Gen PRM naming convention.
649	*
650	* Returns: 0 on success, negative error code on failure.
651	*/
652	static int gen8_append_oa_reports(struct i915_perf_stream *stream,
653	char __user *buf,
654	size_t count,
655	size_t *offset)
656	{
657	struct intel_uncore *uncore = stream->uncore;
658	int report_size = stream->oa_buffer.format_size;
659	u8 *oa_buf_base = stream->oa_buffer.vaddr;
660	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
661	u32 mask = (OA_BUFFER_SIZE(1024 * 1024 * 16) - 1);
662	size_t start_offset = *offset;
663	unsigned long flags;
664	u32 head, tail;
665	u32 taken;
666	int ret = 0;
667
668	if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)({ int __ret = !!((!stream->enabled)); if (__ret) printf("%s %s: " "%s", dev_driver_string(((&uncore->i915->drm))-> dev), "", "drm_WARN_ON(" "!stream->enabled" ")"); __builtin_expect (!!(__ret), 0); }))
669	return -EIO5;
670
671	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
672
673	head = stream->oa_buffer.head;
674	tail = stream->oa_buffer.tail;
675
676	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
677
678	/*
679	* NB: oa_buffer.head/tail include the gtt_offset which we don't want
680	* while indexing relative to oa_buf_base.
681	*/
682	head -= gtt_offset;
683	tail -= gtt_offset;
684
685	/*
686	* An out of bounds or misaligned head or tail pointer implies a driver
687	* bug since we validate + align the tail pointers we read from the
688	* hardware and we are in full control of the head pointer which should
689	* only be incremented by multiples of the report size (notably also
690	* all a power of two).
691	*/
692	if (drm_WARN_ONCE(&uncore->i915->drm,({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
693	head > OA_BUFFER_SIZE \|\| head % report_size \|\|({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
694	tail > OA_BUFFER_SIZE \|\| tail % report_size,({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
695	"Inconsistent OA buffer pointers: head = %u, tail = %u\n",({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
696	head, tail)({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; }))
697	return -EIO5;
698
699
700	for (/* none */;
701	(taken = OA_TAKEN(tail, head)((tail - head) & ((1024 * 1024 * 16) - 1)));
	Although the value stored to 'taken' is used in the enclosing expression, the value is never actually read from 'taken'
702	head = (head + report_size) & mask) {
703	u8 *report = oa_buf_base + head;
704	u32 report32 = (void )report;
705	u32 ctx_id;
706	u32 reason;
707
708	/*
709	* All the report sizes factor neatly into the buffer
710	* size so we never expect to see a report split
711	* between the beginning and end of the buffer.
712	*
713	* Given the initial alignment check a misalignment
714	* here would imply a driver bug that would result
715	* in an overrun.
716	*/
717	if (drm_WARN_ON(&uncore->i915->drm,({ int __ret = !!((((1024 * 1024 * 16) - head) < report_size )); if (__ret) printf("%s %s: " "%s", dev_driver_string(((& uncore->i915->drm))->dev), "", "drm_WARN_ON(" "((1024 * 1024 * 16) - head) < report_size" ")"); __builtin_expect(!!(__ret), 0); })
718	(OA_BUFFER_SIZE - head) < report_size)({ int __ret = !!((((1024 * 1024 * 16) - head) < report_size )); if (__ret) printf("%s %s: " "%s", dev_driver_string(((& uncore->i915->drm))->dev), "", "drm_WARN_ON(" "((1024 * 1024 * 16) - head) < report_size" ")"); __builtin_expect(!!(__ret), 0); })) {
719	drm_err(&uncore->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Spurious OA head ptr: non-integral report offset\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
720	"Spurious OA head ptr: non-integral report offset\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Spurious OA head ptr: non-integral report offset\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
721	break;
722	}
723
724	/*
725	* The reason field includes flags identifying what
726	* triggered this specific report (mostly timer
727	* triggered or e.g. due to a context switch).
728	*
729	* This field is never expected to be zero so we can
730	* check that the report isn't invalid before copying
731	* it to userspace...
732	*/
733	reason = ((report32[0] >> OAREPORT_REASON_SHIFT19) &
734	(IS_GEN(stream->perf->i915, 12)(0 + (&(stream->perf->i915)->__info)->gen == ( 12)) ?
735	OAREPORT_REASON_MASK_EXTENDED0x7f :
736	OAREPORT_REASON_MASK0x3f));
737	if (reason == 0) {
738	if (__ratelimit(&stream->perf->spurious_report_rs)(1))
739	DRM_NOTE("Skipping spurious, invalid OA report\n")printk("\0015" "[" "drm" "] " "Skipping spurious, invalid OA report\n" );
740	continue;
741	}
742
743	ctx_id = report32[2] & stream->specific_ctx_id_mask;
744
745	/*
746	* Squash whatever is in the CTX_ID field if it's marked as
747	* invalid to be sure we avoid false-positive, single-context
748	* filtering below...
749	*
750	* Note: that we don't clear the valid_ctx_bit so userspace can
751	* understand that the ID has been squashed by the kernel.
752	*/
753	if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
754	INTEL_GEN(stream->perf->i915)((&(stream->perf->i915)->__info)->gen) <= 11)
755	ctx_id = report32[2] = INVALID_CTX_ID0xffffffff;
756
757	/*
758	* NB: For Gen 8 the OA unit no longer supports clock gating
759	* off for a specific context and the kernel can't securely
760	* stop the counters from updating as system-wide / global
761	* values.
762	*
763	* Automatic reports now include a context ID so reports can be
764	* filtered on the cpu but it's not worth trying to
765	* automatically subtract/hide counter progress for other
766	* contexts while filtering since we can't stop userspace
767	* issuing MI_REPORT_PERF_COUNT commands which would still
768	* provide a side-band view of the real values.
769	*
770	* To allow userspace (such as Mesa/GL_INTEL_performance_query)
771	* to normalize counters for a single filtered context then it
772	* needs be forwarded bookend context-switch reports so that it
773	* can track switches in between MI_REPORT_PERF_COUNT commands
774	* and can itself subtract/ignore the progress of counters
775	* associated with other contexts. Note that the hardware
776	* automatically triggers reports when switching to a new
777	* context which are tagged with the ID of the newly active
778	* context. To avoid the complexity (and likely fragility) of
779	* reading ahead while parsing reports to try and minimize
780	* forwarding redundant context switch reports (i.e. between
781	* other, unrelated contexts) we simply elect to forward them
782	* all.
783	*
784	* We don't rely solely on the reason field to identify context
785	* switches since it's not-uncommon for periodic samples to
786	* identify a switch before any 'context switch' report.
787	*/
788	if (!stream->perf->exclusive_stream->ctx \|\|
789	stream->specific_ctx_id == ctx_id \|\|
790	stream->oa_buffer.last_ctx_id == stream->specific_ctx_id \|\|
791	reason & OAREPORT_REASON_CTX_SWITCH(1<<3)) {
792
793	/*
794	* While filtering for a single context we avoid
795	* leaking the IDs of other contexts.
796	*/
797	if (stream->perf->exclusive_stream->ctx &&
798	stream->specific_ctx_id != ctx_id) {
799	report32[2] = INVALID_CTX_ID0xffffffff;
800	}
801
802	ret = append_oa_sample(stream, buf, count, offset,
803	report);
804	if (ret)
805	break;
806
807	stream->oa_buffer.last_ctx_id = ctx_id;
808	}
809
810	/*
811	* Clear out the first 2 dword as a mean to detect unlanded
812	* reports.
813	*/
814	report32[0] = 0;
815	report32[1] = 0;
816	}
817
818	if (start_offset != *offset) {
819	i915_reg_t oaheadptr;
820
821	oaheadptr = IS_GEN(stream->perf->i915, 12)(0 + (&(stream->perf->i915)->__info)->gen == ( 12)) ?
822	GEN12_OAG_OAHEADPTR((const i915_reg_t){ .reg = (0xdb00) }) : GEN8_OAHEADPTR((const i915_reg_t){ .reg = (0x2B0C) });
823
824	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
825
826	/*
827	* We removed the gtt_offset for the copy loop above, indexing
828	* relative to oa_buf_base so put back here...
829	*/
830	head += gtt_offset;
831	intel_uncore_write(uncore, oaheadptr,
832	head & GEN12_OAG_OAHEADPTR_MASK0xffffffc0);
833	stream->oa_buffer.head = head;
834
835	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
836	}
837
838	return ret;
839	}
840
841	/**
842	* gen8_oa_read - copy status records then buffered OA reports
843	* @stream: An i915-perf stream opened for OA metrics
844	* @buf: destination buffer given by userspace
845	* @count: the number of bytes userspace wants to read
846	* @offset: (inout): the current position for writing into @buf
847	*
848	* Checks OA unit status registers and if necessary appends corresponding
849	* status records for userspace (such as for a buffer full condition) and then
850	* initiate appending any buffered OA reports.
851	*
852	* Updates @offset according to the number of bytes successfully copied into
853	* the userspace buffer.
854	*
855	* NB: some data may be successfully copied to the userspace buffer
856	* even if an error is returned, and this is reflected in the
857	* updated @offset.
858	*
859	* Returns: zero on success or a negative error code
860	*/
861	static int gen8_oa_read(struct i915_perf_stream *stream,
862	char __user *buf,
863	size_t count,
864	size_t *offset)
865	{
866	struct intel_uncore *uncore = stream->uncore;
867	u32 oastatus;
868	i915_reg_t oastatus_reg;
869	int ret;
870
871	if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)({ int __ret = !!((!stream->oa_buffer.vaddr)); if (__ret) printf ("%s %s: " "%s", dev_driver_string(((&uncore->i915-> drm))->dev), "", "drm_WARN_ON(" "!stream->oa_buffer.vaddr" ")"); __builtin_expect(!!(__ret), 0); }))
872	return -EIO5;
873
874	oastatus_reg = IS_GEN(stream->perf->i915, 12)(0 + (&(stream->perf->i915)->__info)->gen == ( 12)) ?
875	GEN12_OAG_OASTATUS((const i915_reg_t){ .reg = (0xdafc) }) : GEN8_OASTATUS((const i915_reg_t){ .reg = (0x2b08) });
876
877	oastatus = intel_uncore_read(uncore, oastatus_reg);
878
879	/*
880	* We treat OABUFFER_OVERFLOW as a significant error:
881	*
882	* Although theoretically we could handle this more gracefully
883	* sometimes, some Gens don't correctly suppress certain
884	* automatically triggered reports in this condition and so we
885	* have to assume that old reports are now being trampled
886	* over.
887	*
888	* Considering how we don't currently give userspace control
889	* over the OA buffer size and always configure a large 16MB
890	* buffer, then a buffer overflow does anyway likely indicate
891	* that something has gone quite badly wrong.
892	*/
893	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW(1 << 1)) {
894	ret = append_oa_status(stream, buf, count, offset,
895	DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
896	if (ret)
897	return ret;
898
899	DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",__drm_dbg(DRM_UT_CORE, "OA buffer overflow (exponent = %d): force restart\n" , stream->period_exponent)
900	stream->period_exponent)__drm_dbg(DRM_UT_CORE, "OA buffer overflow (exponent = %d): force restart\n" , stream->period_exponent);
901
902	stream->perf->ops.oa_disable(stream);
903	stream->perf->ops.oa_enable(stream);
904
905	/*
906	* Note: .oa_enable() is expected to re-init the oabuffer and
907	* reset GEN8_OASTATUS for us
908	*/
909	oastatus = intel_uncore_read(uncore, oastatus_reg);
910	}
911
912	if (oastatus & GEN8_OASTATUS_REPORT_LOST(1 << 0)) {
913	ret = append_oa_status(stream, buf, count, offset,
914	DRM_I915_PERF_RECORD_OA_REPORT_LOST);
915	if (ret)
916	return ret;
917
918	intel_uncore_rmw(uncore, oastatus_reg,
919	GEN8_OASTATUS_COUNTER_OVERFLOW(1 << 2) \|
920	GEN8_OASTATUS_REPORT_LOST(1 << 0),
921	IS_GEN_RANGE(uncore->i915, 8, 10)(!!((&(uncore->i915)->__info)->gen_mask & ( 0 + 0 + (((~0UL) >> (64 - (((10)) - 1) - 1)) & ((~0UL ) << (((8)) - 1)))))) ?
922	(GEN8_OASTATUS_HEAD_POINTER_WRAP(1 << 16) \|
923	GEN8_OASTATUS_TAIL_POINTER_WRAP(1 << 17)) : 0);
924	}
925
926	return gen8_append_oa_reports(stream, buf, count, offset);
927	}
928
929	/**
930	* Copies all buffered OA reports into userspace read() buffer.
931	* @stream: An i915-perf stream opened for OA metrics
932	* @buf: destination buffer given by userspace
933	* @count: the number of bytes userspace wants to read
934	* @offset: (inout): the current position for writing into @buf
935	*
936	* Notably any error condition resulting in a short read (-%ENOSPC or
937	* -%EFAULT) will be returned even though one or more records may
938	* have been successfully copied. In this case it's up to the caller
939	* to decide if the error should be squashed before returning to
940	* userspace.
941	*
942	* Note: reports are consumed from the head, and appended to the
943	* tail, so the tail chases the head?... If you think that's mad
944	* and back-to-front you're not alone, but this follows the
945	* Gen PRM naming convention.
946	*
947	* Returns: 0 on success, negative error code on failure.
948	*/
949	static int gen7_append_oa_reports(struct i915_perf_stream *stream,
950	char __user *buf,
951	size_t count,
952	size_t *offset)
953	{
954	struct intel_uncore *uncore = stream->uncore;
955	int report_size = stream->oa_buffer.format_size;
956	u8 *oa_buf_base = stream->oa_buffer.vaddr;
957	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
958	u32 mask = (OA_BUFFER_SIZE(1024 * 1024 * 16) - 1);
959	size_t start_offset = *offset;
960	unsigned long flags;
961	u32 head, tail;
962	u32 taken;
963	int ret = 0;
964
965	if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled)({ int __ret = !!((!stream->enabled)); if (__ret) printf("%s %s: " "%s", dev_driver_string(((&uncore->i915->drm))-> dev), "", "drm_WARN_ON(" "!stream->enabled" ")"); __builtin_expect (!!(__ret), 0); }))
966	return -EIO5;
967
968	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
969
970	head = stream->oa_buffer.head;
971	tail = stream->oa_buffer.tail;
972
973	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
974
975	/* NB: oa_buffer.head/tail include the gtt_offset which we don't want
976	* while indexing relative to oa_buf_base.
977	*/
978	head -= gtt_offset;
979	tail -= gtt_offset;
980
981	/* An out of bounds or misaligned head or tail pointer implies a driver
982	* bug since we validate + align the tail pointers we read from the
983	* hardware and we are in full control of the head pointer which should
984	* only be incremented by multiples of the report size (notably also
985	* all a power of two).
986	*/
987	if (drm_WARN_ONCE(&uncore->i915->drm,({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
988	head > OA_BUFFER_SIZE \|\| head % report_size \|\|({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
989	tail > OA_BUFFER_SIZE \|\| tail % report_size,({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
990	"Inconsistent OA buffer pointers: head = %u, tail = %u\n",({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; })
991	head, tail)({ static int __warned; int __ret = !!(head > (1024 * 1024 * 16) \|\| head % report_size \|\| tail > (1024 * 1024 * 16) \|\| tail % report_size); if (__ret && !__warned) { printf ("%s %s: " "Inconsistent OA buffer pointers: head = %u, tail = %u\n" , dev_driver_string((&uncore->i915->drm)->dev), "" , head, tail); __warned = 1; } __builtin_expect(!!(__ret), 0) ; }))
992	return -EIO5;
993
994
995	for (/* none */;
996	(taken = OA_TAKEN(tail, head)((tail - head) & ((1024 * 1024 * 16) - 1)));
997	head = (head + report_size) & mask) {
998	u8 *report = oa_buf_base + head;
999	u32 report32 = (void )report;
1000
1001	/* All the report sizes factor neatly into the buffer
1002	* size so we never expect to see a report split
1003	* between the beginning and end of the buffer.
1004	*
1005	* Given the initial alignment check a misalignment
1006	* here would imply a driver bug that would result
1007	* in an overrun.
1008	*/
1009	if (drm_WARN_ON(&uncore->i915->drm,({ int __ret = !!((((1024 * 1024 * 16) - head) < report_size )); if (__ret) printf("%s %s: " "%s", dev_driver_string(((& uncore->i915->drm))->dev), "", "drm_WARN_ON(" "((1024 * 1024 * 16) - head) < report_size" ")"); __builtin_expect(!!(__ret), 0); })
1010	(OA_BUFFER_SIZE - head) < report_size)({ int __ret = !!((((1024 * 1024 * 16) - head) < report_size )); if (__ret) printf("%s %s: " "%s", dev_driver_string(((& uncore->i915->drm))->dev), "", "drm_WARN_ON(" "((1024 * 1024 * 16) - head) < report_size" ")"); __builtin_expect(!!(__ret), 0); })) {
1011	drm_err(&uncore->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Spurious OA head ptr: non-integral report offset\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
1012	"Spurious OA head ptr: non-integral report offset\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Spurious OA head ptr: non-integral report offset\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1013	break;
1014	}
1015
1016	/* The report-ID field for periodic samples includes
1017	* some undocumented flags related to what triggered
1018	* the report and is never expected to be zero so we
1019	* can check that the report isn't invalid before
1020	* copying it to userspace...
1021	*/
1022	if (report32[0] == 0) {
1023	if (__ratelimit(&stream->perf->spurious_report_rs)(1))
1024	DRM_NOTE("Skipping spurious, invalid OA report\n")printk("\0015" "[" "drm" "] " "Skipping spurious, invalid OA report\n" );
1025	continue;
1026	}
1027
1028	ret = append_oa_sample(stream, buf, count, offset, report);
1029	if (ret)
1030	break;
1031
1032	/* Clear out the first 2 dwords as a mean to detect unlanded
1033	* reports.
1034	*/
1035	report32[0] = 0;
1036	report32[1] = 0;
1037	}
1038
1039	if (start_offset != *offset) {
1040	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
1041
1042	/* We removed the gtt_offset for the copy loop above, indexing
1043	* relative to oa_buf_base so put back here...
1044	*/
1045	head += gtt_offset;
1046
1047	intel_uncore_write(uncore, GEN7_OASTATUS2((const i915_reg_t){ .reg = (0x2368) }),
1048	(head & GEN7_OASTATUS2_HEAD_MASK0xffffffc0) \|
1049	GEN7_OASTATUS2_MEM_SELECT_GGTT(1 << 0));
1050	stream->oa_buffer.head = head;
1051
1052	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
1053	}
1054
1055	return ret;
1056	}
1057
1058	/**
1059	* gen7_oa_read - copy status records then buffered OA reports
1060	* @stream: An i915-perf stream opened for OA metrics
1061	* @buf: destination buffer given by userspace
1062	* @count: the number of bytes userspace wants to read
1063	* @offset: (inout): the current position for writing into @buf
1064	*
1065	* Checks Gen 7 specific OA unit status registers and if necessary appends
1066	* corresponding status records for userspace (such as for a buffer full
1067	* condition) and then initiate appending any buffered OA reports.
1068	*
1069	* Updates @offset according to the number of bytes successfully copied into
1070	* the userspace buffer.
1071	*
1072	* Returns: zero on success or a negative error code
1073	*/
1074	static int gen7_oa_read(struct i915_perf_stream *stream,
1075	char __user *buf,
1076	size_t count,
1077	size_t *offset)
1078	{
1079	struct intel_uncore *uncore = stream->uncore;
1080	u32 oastatus1;
1081	int ret;
1082
1083	if (drm_WARN_ON(&uncore->i915->drm, !stream->oa_buffer.vaddr)({ int __ret = !!((!stream->oa_buffer.vaddr)); if (__ret) printf ("%s %s: " "%s", dev_driver_string(((&uncore->i915-> drm))->dev), "", "drm_WARN_ON(" "!stream->oa_buffer.vaddr" ")"); __builtin_expect(!!(__ret), 0); }))
1084	return -EIO5;
1085
1086	oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1((const i915_reg_t){ .reg = (0x2364) }));
1087
1088	/* XXX: On Haswell we don't have a safe way to clear oastatus1
1089	* bits while the OA unit is enabled (while the tail pointer
1090	* may be updated asynchronously) so we ignore status bits
1091	* that have already been reported to userspace.
1092	*/
1093	oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1094
1095	/* We treat OABUFFER_OVERFLOW as a significant error:
1096	*
1097	* - The status can be interpreted to mean that the buffer is
1098	* currently full (with a higher precedence than OA_TAKEN()
1099	* which will start to report a near-empty buffer after an
1100	* overflow) but it's awkward that we can't clear the status
1101	* on Haswell, so without a reset we won't be able to catch
1102	* the state again.
1103	*
1104	* - Since it also implies the HW has started overwriting old
1105	* reports it may also affect our sanity checks for invalid
1106	* reports when copying to userspace that assume new reports
1107	* are being written to cleared memory.
1108	*
1109	* - In the future we may want to introduce a flight recorder
1110	* mode where the driver will automatically maintain a safe
1111	* guard band between head/tail, avoiding this overflow
1112	* condition, but we avoid the added driver complexity for
1113	* now.
1114	*/
1115	if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)__builtin_expect(!!(oastatus1 & (1 << 1)), 0)) {
1116	ret = append_oa_status(stream, buf, count, offset,
1117	DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1118	if (ret)
1119	return ret;
1120
1121	DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",__drm_dbg(DRM_UT_CORE, "OA buffer overflow (exponent = %d): force restart\n" , stream->period_exponent)
1122	stream->period_exponent)__drm_dbg(DRM_UT_CORE, "OA buffer overflow (exponent = %d): force restart\n" , stream->period_exponent);
1123
1124	stream->perf->ops.oa_disable(stream);
1125	stream->perf->ops.oa_enable(stream);
1126
1127	oastatus1 = intel_uncore_read(uncore, GEN7_OASTATUS1((const i915_reg_t){ .reg = (0x2364) }));
1128	}
1129
1130	if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)__builtin_expect(!!(oastatus1 & (1 << 0)), 0)) {
1131	ret = append_oa_status(stream, buf, count, offset,
1132	DRM_I915_PERF_RECORD_OA_REPORT_LOST);
1133	if (ret)
1134	return ret;
1135	stream->perf->gen7_latched_oastatus1 \|=
1136	GEN7_OASTATUS1_REPORT_LOST(1 << 0);
1137	}
1138
1139	return gen7_append_oa_reports(stream, buf, count, offset);
1140	}
1141
1142	/**
1143	* i915_oa_wait_unlocked - handles blocking IO until OA data available
1144	* @stream: An i915-perf stream opened for OA metrics
1145	*
1146	* Called when userspace tries to read() from a blocking stream FD opened
1147	* for OA metrics. It waits until the hrtimer callback finds a non-empty
1148	* OA buffer and wakes us.
1149	*
1150	* Note: it's acceptable to have this return with some false positives
1151	* since any subsequent read handling will return -EAGAIN if there isn't
1152	* really data ready for userspace yet.
1153	*
1154	* Returns: zero on success or a negative error code
1155	*/
1156	static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1157	{
1158	/* We would wait indefinitely if periodic sampling is not enabled */
1159	if (!stream->periodic)
1160	return -EIO5;
1161
1162	return wait_event_interruptible(stream->poll_wq,({ int __ret = 0; if (!(oa_buffer_check_unlocked(stream))) __ret = ({ long ret = 0; do { int __error; unsigned long deadline; ((!cold) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/dev/pci/drm/i915/i915_perf.c" , 1163, "!cold")); mtx_enter(&sch_mtx); deadline = jiffies + ret; __error = msleep(&stream->poll_wq, &sch_mtx , 0x100, "drmweti", ret); ret = deadline - jiffies; if (__error == -1 \|\| __error == 4) { ret = -4; mtx_leave(&sch_mtx); break ; } if ((0) > 0 && (ret <= 0 \|\| __error == 35)) { mtx_leave(&sch_mtx); ret = ((oa_buffer_check_unlocked( stream))) ? 1 : 0; break; } mtx_leave(&sch_mtx); } while ( ret > 0 && !(oa_buffer_check_unlocked(stream))); ret ; }); __ret; })
1163	oa_buffer_check_unlocked(stream))({ int __ret = 0; if (!(oa_buffer_check_unlocked(stream))) __ret = ({ long ret = 0; do { int __error; unsigned long deadline; ((!cold) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/dev/pci/drm/i915/i915_perf.c" , 1163, "!cold")); mtx_enter(&sch_mtx); deadline = jiffies + ret; __error = msleep(&stream->poll_wq, &sch_mtx , 0x100, "drmweti", ret); ret = deadline - jiffies; if (__error == -1 \|\| __error == 4) { ret = -4; mtx_leave(&sch_mtx); break ; } if ((0) > 0 && (ret <= 0 \|\| __error == 35)) { mtx_leave(&sch_mtx); ret = ((oa_buffer_check_unlocked( stream))) ? 1 : 0; break; } mtx_leave(&sch_mtx); } while ( ret > 0 && !(oa_buffer_check_unlocked(stream))); ret ; }); __ret; });
1164	}
1165
1166	#ifdef notyet
1167	/**
1168	* i915_oa_poll_wait - call poll_wait() for an OA stream poll()
1169	* @stream: An i915-perf stream opened for OA metrics
1170	* @file: An i915 perf stream file
1171	* @wait: poll() state table
1172	*
1173	* For handling userspace polling on an i915 perf stream opened for OA metrics,
1174	* this starts a poll_wait with the wait queue that our hrtimer callback wakes
1175	* when it sees data ready to read in the circular OA buffer.
1176	*/
1177	static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1178	struct file *file,
1179	poll_table *wait)
1180	{
1181	poll_wait(file, &stream->poll_wq, wait);
1182	}
1183	#endif
1184
1185	/**
1186	* i915_oa_read - just calls through to &i915_oa_ops->read
1187	* @stream: An i915-perf stream opened for OA metrics
1188	* @buf: destination buffer given by userspace
1189	* @count: the number of bytes userspace wants to read
1190	* @offset: (inout): the current position for writing into @buf
1191	*
1192	* Updates @offset according to the number of bytes successfully copied into
1193	* the userspace buffer.
1194	*
1195	* Returns: zero on success or a negative error code
1196	*/
1197	static int i915_oa_read(struct i915_perf_stream *stream,
1198	char __user *buf,
1199	size_t count,
1200	size_t *offset)
1201	{
1202	return stream->perf->ops.read(stream, buf, count, offset);
1203	}
1204
1205	static struct intel_context oa_pin_context(struct i915_perf_stream stream)
1206	{
1207	struct i915_gem_engines_iter it;
1208	struct i915_gem_context *ctx = stream->ctx;
1209	struct intel_context *ce;
1210	struct i915_gem_ww_ctx ww;
1211	int err = -ENODEV19;
1212
1213	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)for (i915_gem_engines_iter_init(&(it), (i915_gem_context_lock_engines (ctx))); ((ce) = i915_gem_engines_iter_next(&(it)));) {
1214	if (ce->engine != stream->engine) /* first match! */
1215	continue;
1216
1217	err = 0;
1218	break;
1219	}
1220	i915_gem_context_unlock_engines(ctx);
1221
1222	if (err)
1223	return ERR_PTR(err);
1224
1225	i915_gem_ww_ctx_init(&ww, true1);
1226	retry:
1227	/*
1228	* As the ID is the gtt offset of the context's vma we
1229	* pin the vma to ensure the ID remains fixed.
1230	*/
1231	err = intel_context_pin_ww(ce, &ww);
1232	if (err == -EDEADLK11) {
1233	err = i915_gem_ww_ctx_backoff(&ww);
1234	if (!err)
1235	goto retry;
1236	}
1237	i915_gem_ww_ctx_fini(&ww);
1238
1239	if (err)
1240	return ERR_PTR(err);
1241
1242	stream->pinned_ctx = ce;
1243	return stream->pinned_ctx;
1244	}
1245
1246	/**
1247	* oa_get_render_ctx_id - determine and hold ctx hw id
1248	* @stream: An i915-perf stream opened for OA metrics
1249	*
1250	* Determine the render context hw id, and ensure it remains fixed for the
1251	* lifetime of the stream. This ensures that we don't have to worry about
1252	* updating the context ID in OACONTROL on the fly.
1253	*
1254	* Returns: zero on success or a negative error code
1255	*/
1256	static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1257	{
1258	struct intel_context *ce;
1259
1260	ce = oa_pin_context(stream);
1261	if (IS_ERR(ce))
1262	return PTR_ERR(ce);
1263
1264	switch (INTEL_GEN(ce->engine->i915)((&(ce->engine->i915)->__info)->gen)) {
1265	case 7: {
1266	/*
1267	* On Haswell we don't do any post processing of the reports
1268	* and don't need to use the mask.
1269	*/
1270	stream->specific_ctx_id = i915_ggtt_offset(ce->state);
1271	stream->specific_ctx_id_mask = 0;
1272	break;
1273	}
1274
1275	case 8:
1276	case 9:
1277	case 10:
1278	if (intel_engine_in_execlists_submission_mode(ce->engine)) {
1279	stream->specific_ctx_id_mask =
1280	(1U << GEN8_CTX_ID_WIDTH21) - 1;
1281	stream->specific_ctx_id = stream->specific_ctx_id_mask;
1282	} else {
1283	/*
1284	* When using GuC, the context descriptor we write in
1285	* i915 is read by GuC and rewritten before it's
1286	* actually written into the hardware. The LRCA is
1287	* what is put into the context id field of the
1288	* context descriptor by GuC. Because it's aligned to
1289	* a page, the lower 12bits are always at 0 and
1290	* dropped by GuC. They won't be part of the context
1291	* ID in the OA reports, so squash those lower bits.
1292	*/
1293	stream->specific_ctx_id = ce->lrc.lrca >> 12;
1294
1295	/*
1296	* GuC uses the top bit to signal proxy submission, so
1297	* ignore that bit.
1298	*/
1299	stream->specific_ctx_id_mask =
1300	(1U << (GEN8_CTX_ID_WIDTH21 - 1)) - 1;
1301	}
1302	break;
1303
1304	case 11:
1305	case 12: {
1306	stream->specific_ctx_id_mask =
1307	((1U << GEN11_SW_CTX_ID_WIDTH11) - 1) << (GEN11_SW_CTX_ID_SHIFT37 - 32);
1308	/*
1309	* Pick an unused context id
1310	* 0 - BITS_PER_LONG are used by other contexts
1311	* GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
1312	*/
1313	stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID((1<<11) - 1) - 1) << (GEN11_SW_CTX_ID_SHIFT37 - 32);
1314	break;
1315	}
1316
1317	default:
1318	MISSING_CASE(INTEL_GEN(ce->engine->i915))({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n" , "((&(ce->engine->i915)->__info)->gen)", (long )(((&(ce->engine->i915)->__info)->gen))); __builtin_expect (!!(__ret), 0); });
1319	}
1320
1321	ce->tag = stream->specific_ctx_id;
1322
1323	drm_dbg(&stream->perf->i915->drm,drm_dev_dbg((&stream->perf->i915->drm)->dev, DRM_UT_DRIVER , "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id , stream->specific_ctx_id_mask)
1324	"filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",drm_dev_dbg((&stream->perf->i915->drm)->dev, DRM_UT_DRIVER , "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id , stream->specific_ctx_id_mask)
1325	stream->specific_ctx_id,drm_dev_dbg((&stream->perf->i915->drm)->dev, DRM_UT_DRIVER , "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id , stream->specific_ctx_id_mask)
1326	stream->specific_ctx_id_mask)drm_dev_dbg((&stream->perf->i915->drm)->dev, DRM_UT_DRIVER , "filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", stream->specific_ctx_id , stream->specific_ctx_id_mask);
1327
1328	return 0;
1329	}
1330
1331	/**
1332	* oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
1333	* @stream: An i915-perf stream opened for OA metrics
1334	*
1335	* In case anything needed doing to ensure the context HW ID would remain valid
1336	* for the lifetime of the stream, then that can be undone here.
1337	*/
1338	static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1339	{
1340	struct intel_context *ce;
1341
1342	ce = fetch_and_zero(&stream->pinned_ctx)({ typeof(&stream->pinned_ctx) __T = (&stream-> pinned_ctx); (&stream->pinned_ctx) = (typeof(&stream ->pinned_ctx))0; __T; });
1343	if (ce) {
1344	ce->tag = 0; /* recomputed on next submission after parking */
1345	intel_context_unpin(ce);
1346	}
1347
1348	stream->specific_ctx_id = INVALID_CTX_ID0xffffffff;
1349	stream->specific_ctx_id_mask = 0;
1350	}
1351
1352	static void
1353	free_oa_buffer(struct i915_perf_stream *stream)
1354	{
1355	i915_vma_unpin_and_release(&stream->oa_buffer.vma,
1356	I915_VMA_RELEASE_MAP(1UL << (0)));
1357
1358	stream->oa_buffer.vaddr = NULL((void *)0);
1359	}
1360
1361	static void
1362	free_oa_configs(struct i915_perf_stream *stream)
1363	{
1364	struct i915_oa_config_bo oa_bo, tmp;
1365
1366	i915_oa_config_put(stream->oa_config);
1367	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)for (oa_bo = (((stream->oa_config_bos.first)) ? ({ const __typeof ( ((__typeof(oa_bo) )0)->node ) __mptr = ((stream->oa_config_bos .first)); (__typeof(oa_bo) )( (char )__mptr - __builtin_offsetof (__typeof(oa_bo), node) );}) : ((void )0)); oa_bo != ((void )0) && (tmp = ((oa_bo->node.next) ? ({ const __typeof ( ((__typeof(oa_bo) )0)->node ) __mptr = (oa_bo->node .next); (__typeof(oa_bo) )( (char )__mptr - __builtin_offsetof (__typeof(oa_bo), node) );}) : ((void *)0)), oa_bo); oa_bo = tmp)
1368	free_oa_config_bo(oa_bo);
1369	}
1370
1371	static void
1372	free_noa_wait(struct i915_perf_stream *stream)
1373	{
1374	i915_vma_unpin_and_release(&stream->noa_wait, 0);
1375	}
1376
1377	static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1378	{
1379	STUB()do { printf("%s: stub\n", __func__); } while(0);
1380	#ifdef notyet
1381	struct i915_perf *perf = stream->perf;
1382
1383	BUG_ON(stream != perf->exclusive_stream)((!(stream != perf->exclusive_stream)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/dev/pci/drm/i915/i915_perf.c", 1383 , "!(stream != perf->exclusive_stream)"));
1384
1385	/*
1386	* Unset exclusive_stream first, it will be checked while disabling
1387	* the metric set on gen8+.
1388	*
1389	* See i915_oa_init_reg_state() and lrc_configure_all_contexts()
1390	*/
1391	WRITE_ONCE(perf->exclusive_stream, NULL)({ typeof(perf->exclusive_stream) __tmp = (((void )0)); (volatile typeof(perf->exclusive_stream) *)&(perf-> exclusive_stream) = __tmp; __tmp; });
1392	perf->ops.disable_metric_set(stream);
1393
1394	free_oa_buffer(stream);
1395
1396	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
1397	intel_engine_pm_put(stream->engine);
1398
1399	if (stream->ctx)
1400	oa_put_render_ctx_id(stream);
1401
1402	free_oa_configs(stream);
1403	free_noa_wait(stream);
1404
1405	if (perf->spurious_report_rs.missed) {
1406	DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",printk("\0015" "[" "drm" "] " "%d spurious OA report notices suppressed due to ratelimiting\n" , perf->spurious_report_rs.missed)
1407	perf->spurious_report_rs.missed)printk("\0015" "[" "drm" "] " "%d spurious OA report notices suppressed due to ratelimiting\n" , perf->spurious_report_rs.missed);
1408	}
1409	#endif
1410	}
1411
1412	static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
1413	{
1414	struct intel_uncore *uncore = stream->uncore;
1415	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1416	unsigned long flags;
1417
1418	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
1419
1420	/* Pre-DevBDW: OABUFFER must be set with counters off,
1421	* before OASTATUS1, but after OASTATUS2
1422	*/
1423	intel_uncore_write(uncore, GEN7_OASTATUS2((const i915_reg_t){ .reg = (0x2368) }), /* head */
1424	gtt_offset \| GEN7_OASTATUS2_MEM_SELECT_GGTT(1 << 0));
1425	stream->oa_buffer.head = gtt_offset;
1426
1427	intel_uncore_write(uncore, GEN7_OABUFFER((const i915_reg_t){ .reg = (0x23B0) }), gtt_offset);
1428
1429	intel_uncore_write(uncore, GEN7_OASTATUS1((const i915_reg_t){ .reg = (0x2364) }), /* tail */
1430	gtt_offset \| OABUFFER_SIZE_16M(7 << 3));
1431
1432	/* Mark that we need updated tail pointers to read from... */
1433	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR0xffffffff;
1434	stream->oa_buffer.tail = gtt_offset;
1435
1436	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
1437
1438	/* On Haswell we have to track which OASTATUS1 flags we've
1439	* already seen since they can't be cleared while periodic
1440	* sampling is enabled.
1441	*/
1442	stream->perf->gen7_latched_oastatus1 = 0;
1443
1444	/* NB: although the OA buffer will initially be allocated
1445	* zeroed via shmfs (and so this memset is redundant when
1446	* first allocating), we may re-init the OA buffer, either
1447	* when re-enabling a stream or in error/reset paths.
1448	*
1449	* The reason we clear the buffer for each re-init is for the
1450	* sanity check in gen7_append_oa_reports() that looks at the
1451	* report-id field to make sure it's non-zero which relies on
1452	* the assumption that new reports are being written to zeroed
1453	* memory...
1454	*/
1455	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE)__builtin_memset((stream->oa_buffer.vaddr), (0), ((1024 * 1024 * 16)));
1456	}
1457
1458	static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
1459	{
1460	struct intel_uncore *uncore = stream->uncore;
1461	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1462	unsigned long flags;
1463
1464	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
1465
1466	intel_uncore_write(uncore, GEN8_OASTATUS((const i915_reg_t){ .reg = (0x2b08) }), 0);
1467	intel_uncore_write(uncore, GEN8_OAHEADPTR((const i915_reg_t){ .reg = (0x2B0C) }), gtt_offset);
1468	stream->oa_buffer.head = gtt_offset;
1469
1470	intel_uncore_write(uncore, GEN8_OABUFFER_UDW((const i915_reg_t){ .reg = (0x23b4) }), 0);
1471
1472	/*
1473	* PRM says:
1474	*
1475	* "This MMIO must be set before the OATAILPTR
1476	* register and after the OAHEADPTR register. This is
1477	* to enable proper functionality of the overflow
1478	* bit."
1479	*/
1480	intel_uncore_write(uncore, GEN8_OABUFFER((const i915_reg_t){ .reg = (0x2b14) }), gtt_offset \|
1481	OABUFFER_SIZE_16M(7 << 3) \| GEN8_OABUFFER_MEM_SELECT_GGTT(1 << 0));
1482	intel_uncore_write(uncore, GEN8_OATAILPTR((const i915_reg_t){ .reg = (0x2B10) }), gtt_offset & GEN8_OATAILPTR_MASK0xffffffc0);
1483
1484	/* Mark that we need updated tail pointers to read from... */
1485	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR0xffffffff;
1486	stream->oa_buffer.tail = gtt_offset;
1487
1488	/*
1489	* Reset state used to recognise context switches, affecting which
1490	* reports we will forward to userspace while filtering for a single
1491	* context.
1492	*/
1493	stream->oa_buffer.last_ctx_id = INVALID_CTX_ID0xffffffff;
1494
1495	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
1496
1497	/*
1498	* NB: although the OA buffer will initially be allocated
1499	* zeroed via shmfs (and so this memset is redundant when
1500	* first allocating), we may re-init the OA buffer, either
1501	* when re-enabling a stream or in error/reset paths.
1502	*
1503	* The reason we clear the buffer for each re-init is for the
1504	* sanity check in gen8_append_oa_reports() that looks at the
1505	* reason field to make sure it's non-zero which relies on
1506	* the assumption that new reports are being written to zeroed
1507	* memory...
1508	*/
1509	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE)__builtin_memset((stream->oa_buffer.vaddr), (0), ((1024 * 1024 * 16)));
1510	}
1511
1512	static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
1513	{
1514	struct intel_uncore *uncore = stream->uncore;
1515	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
1516	unsigned long flags;
1517
1518	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags)do { flags = 0; mtx_enter(&stream->oa_buffer.ptr_lock) ; } while (0);
1519
1520	intel_uncore_write(uncore, GEN12_OAG_OASTATUS((const i915_reg_t){ .reg = (0xdafc) }), 0);
1521	intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR((const i915_reg_t){ .reg = (0xdb00) }),
1522	gtt_offset & GEN12_OAG_OAHEADPTR_MASK0xffffffc0);
1523	stream->oa_buffer.head = gtt_offset;
1524
1525	/*
1526	* PRM says:
1527	*
1528	* "This MMIO must be set before the OATAILPTR
1529	* register and after the OAHEADPTR register. This is
1530	* to enable proper functionality of the overflow
1531	* bit."
1532	*/
1533	intel_uncore_write(uncore, GEN12_OAG_OABUFFER((const i915_reg_t){ .reg = (0xdb08) }), gtt_offset \|
1534	OABUFFER_SIZE_16M(7 << 3) \| GEN8_OABUFFER_MEM_SELECT_GGTT(1 << 0));
1535	intel_uncore_write(uncore, GEN12_OAG_OATAILPTR((const i915_reg_t){ .reg = (0xdb04) }),
1536	gtt_offset & GEN12_OAG_OATAILPTR_MASK0xffffffc0);
1537
1538	/* Mark that we need updated tail pointers to read from... */
1539	stream->oa_buffer.aging_tail = INVALID_TAIL_PTR0xffffffff;
1540	stream->oa_buffer.tail = gtt_offset;
1541
1542	/*
1543	* Reset state used to recognise context switches, affecting which
1544	* reports we will forward to userspace while filtering for a single
1545	* context.
1546	*/
1547	stream->oa_buffer.last_ctx_id = INVALID_CTX_ID0xffffffff;
1548
1549	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags)do { (void)(flags); mtx_leave(&stream->oa_buffer.ptr_lock ); } while (0);
1550
1551	/*
1552	* NB: although the OA buffer will initially be allocated
1553	* zeroed via shmfs (and so this memset is redundant when
1554	* first allocating), we may re-init the OA buffer, either
1555	* when re-enabling a stream or in error/reset paths.
1556	*
1557	* The reason we clear the buffer for each re-init is for the
1558	* sanity check in gen8_append_oa_reports() that looks at the
1559	* reason field to make sure it's non-zero which relies on
1560	* the assumption that new reports are being written to zeroed
1561	* memory...
1562	*/
1563	memset(stream->oa_buffer.vaddr, 0,__builtin_memset((stream->oa_buffer.vaddr), (0), (stream-> oa_buffer.vma->size))
1564	stream->oa_buffer.vma->size)__builtin_memset((stream->oa_buffer.vaddr), (0), (stream-> oa_buffer.vma->size));
1565	}
1566
1567	static int alloc_oa_buffer(struct i915_perf_stream *stream)
1568	{
1569	struct drm_i915_privateinteldrm_softc *i915 = stream->perf->i915;
1570	struct drm_i915_gem_object *bo;
1571	struct i915_vma *vma;
1572	int ret;
1573
1574	if (drm_WARN_ON(&i915->drm, stream->oa_buffer.vma)({ int __ret = !!((stream->oa_buffer.vma)); if (__ret) printf ("%s %s: " "%s", dev_driver_string(((&i915->drm))-> dev), "", "drm_WARN_ON(" "stream->oa_buffer.vma" ")"); __builtin_expect (!!(__ret), 0); }))
1575	return -ENODEV19;
1576
1577	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE)0;
1578	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K \|\| OA_BUFFER_SIZE > SZ_16M)extern char _ctassert[(!((1024 * 1024 * 16) < (1024 * 128) \|\| (1024 * 1024 * 16) > (1024 * 1024 * 16))) ? 1 : -1 ] __attribute__ ((__unused__));
1579
1580	bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE(1024 * 1024 * 16));
1581	if (IS_ERR(bo)) {
1582	drm_err(&i915->drm, "Failed to allocate OA buffer\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Failed to allocate OA buffer\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1583	return PTR_ERR(bo);
1584	}
1585
1586	i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
1587
1588	/* PreHSW required 512K alignment, HSW requires 16M */
1589	vma = i915_gem_object_ggtt_pin(bo, NULL((void )0), 0, SZ_16M(1024 1024 * 16), 0);
1590	if (IS_ERR(vma)) {
1591	ret = PTR_ERR(vma);
1592	goto err_unref;
1593	}
1594	stream->oa_buffer.vma = vma;
1595
1596	stream->oa_buffer.vaddr =
1597	i915_gem_object_pin_map(bo, I915_MAP_WB);
1598	if (IS_ERR(stream->oa_buffer.vaddr)) {
1599	ret = PTR_ERR(stream->oa_buffer.vaddr);
1600	goto err_unpin;
1601	}
1602
1603	return 0;
1604
1605	err_unpin:
1606	__i915_vma_unpin(vma);
1607
1608	err_unref:
1609	i915_gem_object_put(bo);
1610
1611	stream->oa_buffer.vaddr = NULL((void *)0);
1612	stream->oa_buffer.vma = NULL((void *)0);
1613
1614	return ret;
1615	}
1616
1617	static u32 save_restore_register(struct i915_perf_stream stream, u32 *cs,
1618	bool_Bool save, i915_reg_t reg, u32 offset,
1619	u32 dword_count)
1620	{
1621	u32 cmd;
1622	u32 d;
1623
1624	cmd = save ? MI_STORE_REGISTER_MEM(((0x24) << 23) \| (1)) : MI_LOAD_REGISTER_MEM(((0x29) << 23) \| (1));
1625	cmd \|= MI_SRM_LRM_GLOBAL_GTT(1<<22);
1626	if (INTEL_GEN(stream->perf->i915)((&(stream->perf->i915)->__info)->gen) >= 8)
1627	cmd++;
1628
1629	for (d = 0; d < dword_count; d++) {
1630	*cs++ = cmd;
1631	cs++ = i915_mmio_reg_offset(reg) + 4 d;
1632	*cs++ = intel_gt_scratch_offset(stream->engine->gt,
1633	offset) + 4 * d;
1634	*cs++ = 0;
1635	}
1636
1637	return cs;
1638	}
1639
1640	static int alloc_noa_wait(struct i915_perf_stream *stream)
1641	{
1642	struct drm_i915_privateinteldrm_softc *i915 = stream->perf->i915;
1643	struct drm_i915_gem_object *bo;
1644	struct i915_vma *vma;
1645	const u64 delay_ticks = 0xffffffffffffffff -
1646	i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay)({ typeof((&stream->perf->noa_programming_delay)) __tmp = (volatile typeof((&stream->perf->noa_programming_delay )) )&(*(&stream->perf->noa_programming_delay)) ; membar_datadep_consumer(); __tmp; }));
1647	const u32 base = stream->engine->mmio_base;
1648	#define CS_GPR(x)((const i915_reg_t){ .reg = ((base) + 0x600 + (x) * 8) }) GEN8_RING_CS_GPR(base, x)((const i915_reg_t){ .reg = ((base) + 0x600 + (x) * 8) })
1649	u32 batch, ts0, cs, jump;
1650	int ret, i;
1651	enum {
1652	START_TS,
1653	NOW_TS,
1654	DELTA_TS,
1655	JUMP_PREDICATE,
1656	DELTA_TARGET,
1657	N_CS_GPR
1658	};
1659
1660	bo = i915_gem_object_create_internal(i915, 4096);
1661	if (IS_ERR(bo)) {
1662	drm_err(&i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Failed to allocate NOA wait batchbuffer\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
1663	"Failed to allocate NOA wait batchbuffer\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "Failed to allocate NOA wait batchbuffer\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
1664	return PTR_ERR(bo);
1665	}
1666
1667	/*
1668	* We pin in GGTT because we jump into this buffer now because
1669	* multiple OA config BOs will have a jump to this address and it
1670	* needs to be fixed during the lifetime of the i915/perf stream.
1671	*/
1672	vma = i915_gem_object_ggtt_pin(bo, NULL((void *)0), 0, 0, PIN_HIGH(1ULL << (5)));
1673	if (IS_ERR(vma)) {
1674	ret = PTR_ERR(vma);
1675	goto err_unref;
1676	}
1677
1678	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
1679	if (IS_ERR(batch)) {
1680	ret = PTR_ERR(batch);
1681	goto err_unpin;
1682	}
1683
1684	/* Save registers. */
1685	for (i = 0; i < N_CS_GPR; i++)
1686	cs = save_restore_register(
1687	stream, cs, true1 /* save /, CS_GPR(i)((const i915_reg_t){ .reg = ((base) + 0x600 + (i) 8) }),
1688	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1689	cs = save_restore_register(
1690	stream, cs, true1 /* save */, MI_PREDICATE_RESULT_1((const i915_reg_t){ .reg = (0x241c) }),
1691	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1692
1693	/* First timestamp snapshot location. */
1694	ts0 = cs;
1695
1696	/*
1697	* Initial snapshot of the timestamp register to implement the wait.
1698	* We work with 32b values, so clear out the top 32b bits of the
1699	* register because the ALU works 64bits.
1700	*/
1701	cs++ = MI_LOAD_REGISTER_IMM(1)(((0x22) << 23) \| (2(1)-1));
1702	cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)((const i915_reg_t){ .reg = ((base) + 0x600 + (START_TS) 8) })) + 4;
1703	*cs++ = 0;
1704	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \| (3 - 2);
1705	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)((const i915_reg_t){ .reg = ((base) + 0x358) }));
1706	cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)((const i915_reg_t){ .reg = ((base) + 0x600 + (START_TS) 8) }));
1707
1708	/*
1709	* This is the location we're going to jump back into until the
1710	* required amount of time has passed.
1711	*/
1712	jump = cs;
1713
1714	/*
1715	* Take another snapshot of the timestamp register. Take care to clear
1716	* up the top 32bits of CS_GPR(1) as we're using it for other
1717	* operations below.
1718	*/
1719	cs++ = MI_LOAD_REGISTER_IMM(1)(((0x22) << 23) \| (2(1)-1));
1720	cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)((const i915_reg_t){ .reg = ((base) + 0x600 + (NOW_TS) 8) } )) + 4;
1721	*cs++ = 0;
1722	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \| (3 - 2);
1723	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base)((const i915_reg_t){ .reg = ((base) + 0x358) }));
1724	cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)((const i915_reg_t){ .reg = ((base) + 0x600 + (NOW_TS) 8) } ));
1725
1726	/*
1727	* Do a diff between the 2 timestamps and store the result back into
1728	* CS_GPR(1).
1729	*/
1730	*cs++ = MI_MATH(5)(((0x1a) << 23) \| ((5) - 1));
1731	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS))((0x080) << 20 \| (0x20) << 10 \| ((NOW_TS)));
1732	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS))((0x080) << 20 \| (0x21) << 10 \| ((START_TS)));
1733	*cs++ = MI_MATH_SUB((0x101) << 20 \| (0x0) << 10 \| (0x0));
1734	*cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU)((0x180) << 20 \| ((DELTA_TS)) << 10 \| (0x31));
1735	*cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF)((0x180) << 20 \| ((JUMP_PREDICATE)) << 10 \| (0x33 ));
1736
1737	/*
1738	* Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
1739	* timestamp have rolled over the 32bits) into the predicate register
1740	* to be used for the predicated jump.
1741	*/
1742	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \| (3 - 2);
1743	cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)((const i915_reg_t){ .reg = ((base) + 0x600 + (JUMP_PREDICATE ) 8) }));
1744	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1((const i915_reg_t){ .reg = (0x241c) }));
1745
1746	/* Restart from the beginning if we had timestamps roll over. */
1747	*cs++ = (INTEL_GEN(i915)((&(i915)->__info)->gen) < 8 ?
1748	MI_BATCH_BUFFER_START(((0x31) << 23) \| (0)) :
1749	MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) \| (1))) \|
1750	MI_BATCH_PREDICATE((u32)((1UL << (15)) + 0));
1751	cs++ = i915_ggtt_offset(vma) + (ts0 - batch) 4;
1752	*cs++ = 0;
1753
1754	/*
1755	* Now add the diff between to previous timestamps and add it to :
1756	* (((1 * << 64) - 1) - delay_ns)
1757	*
1758	* When the Carry Flag contains 1 this means the elapsed time is
1759	* longer than the expected delay, and we can exit the wait loop.
1760	*/
1761	cs++ = MI_LOAD_REGISTER_IMM(2)(((0x22) << 23) \| (2(2)-1));
1762	cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)((const i915_reg_t){ .reg = ((base) + 0x600 + (DELTA_TARGET) 8) }));
1763	*cs++ = lower_32_bits(delay_ticks)((u32)(delay_ticks));
1764	cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)((const i915_reg_t){ .reg = ((base) + 0x600 + (DELTA_TARGET) 8) })) + 4;
1765	*cs++ = upper_32_bits(delay_ticks)((u32)(((delay_ticks) >> 16) >> 16));
1766
1767	*cs++ = MI_MATH(4)(((0x1a) << 23) \| ((4) - 1));
1768	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS))((0x080) << 20 \| (0x20) << 10 \| ((DELTA_TS)));
1769	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET))((0x080) << 20 \| (0x21) << 10 \| ((DELTA_TARGET)));
1770	*cs++ = MI_MATH_ADD((0x100) << 20 \| (0x0) << 10 \| (0x0));
1771	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF)((0x580) << 20 \| ((JUMP_PREDICATE)) << 10 \| (0x33 ));
1772
1773	*cs++ = MI_ARB_CHECK(((0x05) << 23) \| (0));
1774
1775	/*
1776	* Transfer the result into the predicate register to be used for the
1777	* predicated jump.
1778	*/
1779	*cs++ = MI_LOAD_REGISTER_REG(((0x2A) << 23) \| (1)) \| (3 - 2);
1780	cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE)((const i915_reg_t){ .reg = ((base) + 0x600 + (JUMP_PREDICATE ) 8) }));
1781	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1((const i915_reg_t){ .reg = (0x241c) }));
1782
1783	/* Predicate the jump. */
1784	*cs++ = (INTEL_GEN(i915)((&(i915)->__info)->gen) < 8 ?
1785	MI_BATCH_BUFFER_START(((0x31) << 23) \| (0)) :
1786	MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) \| (1))) \|
1787	MI_BATCH_PREDICATE((u32)((1UL << (15)) + 0));
1788	cs++ = i915_ggtt_offset(vma) + (jump - batch) 4;
1789	*cs++ = 0;
1790
1791	/* Restore registers. */
1792	for (i = 0; i < N_CS_GPR; i++)
1793	cs = save_restore_register(
1794	stream, cs, false0 /* restore /, CS_GPR(i)((const i915_reg_t){ .reg = ((base) + 0x600 + (i) 8) }),
1795	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
1796	cs = save_restore_register(
1797	stream, cs, false0 /* restore */, MI_PREDICATE_RESULT_1((const i915_reg_t){ .reg = (0x241c) }),
1798	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
1799
1800	/* And return to the ring. */
1801	*cs++ = MI_BATCH_BUFFER_END(((0x0a) << 23) \| (0));
1802
1803	GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch))((void)0);
1804
1805	i915_gem_object_flush_map(bo);
1806	__i915_gem_object_release_map(bo);
1807
1808	stream->noa_wait = vma;
1809	return 0;
1810
1811	err_unpin:
1812	i915_vma_unpin_and_release(&vma, 0);
1813	err_unref:
1814	i915_gem_object_put(bo);
1815	return ret;
1816	}
1817
1818	static u32 write_cs_mi_lri(u32 cs,
1819	const struct i915_oa_reg *reg_data,
1820	u32 n_regs)
1821	{
1822	u32 i;
1823
1824	for (i = 0; i < n_regs; i++) {
1825	if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS(126)) == 0) {
1826	u32 n_lri = min_t(u32,({ u32 __min_a = (n_regs - i); u32 __min_b = ((126)); __min_a < __min_b ? __min_a : __min_b; })
1827	n_regs - i,({ u32 __min_a = (n_regs - i); u32 __min_b = ((126)); __min_a < __min_b ? __min_a : __min_b; })
1828	MI_LOAD_REGISTER_IMM_MAX_REGS)({ u32 __min_a = (n_regs - i); u32 __min_b = ((126)); __min_a < __min_b ? __min_a : __min_b; });
1829
1830	cs++ = MI_LOAD_REGISTER_IMM(n_lri)(((0x22) << 23) \| (2(n_lri)-1));
1831	}
1832	*cs++ = i915_mmio_reg_offset(reg_data[i].addr);
1833	*cs++ = reg_data[i].value;
1834	}
1835
1836	return cs;
1837	}
1838
1839	static int num_lri_dwords(int num_regs)
1840	{
1841	int count = 0;
1842
1843	if (num_regs > 0) {
1844	count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS)(((num_regs) + (((126)) - 1)) / ((126)));
1845	count += num_regs * 2;
1846	}
1847
1848	return count;
1849	}
1850
1851	static struct i915_oa_config_bo *
1852	alloc_oa_config_buffer(struct i915_perf_stream *stream,
1853	struct i915_oa_config *oa_config)
1854	{
1855	struct drm_i915_gem_object *obj;
1856	struct i915_oa_config_bo *oa_bo;
1857	size_t config_length = 0;
1858	u32 *cs;
1859	int err;
1860
1861	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL(0x0001 \| 0x0004));
1862	if (!oa_bo)
1863	return ERR_PTR(-ENOMEM12);
1864
1865	config_length += num_lri_dwords(oa_config->mux_regs_len);
1866	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
1867	config_length += num_lri_dwords(oa_config->flex_regs_len);
1868	config_length += 3; /* MI_BATCH_BUFFER_START */
1869	config_length = roundup2(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE)(((sizeof(u32) * config_length) + (((1ULL << (12))) - 1 )) & (~((__typeof(sizeof(u32) * config_length))((1ULL << (12))) - 1)));
1870
1871	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
1872	if (IS_ERR(obj)) {
1873	err = PTR_ERR(obj);
1874	goto err_free;
1875	}
1876
1877	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
1878	if (IS_ERR(cs)) {
1879	err = PTR_ERR(cs);
1880	goto err_oa_bo;
1881	}
1882
1883	cs = write_cs_mi_lri(cs,
1884	oa_config->mux_regs,
1885	oa_config->mux_regs_len);
1886	cs = write_cs_mi_lri(cs,
1887	oa_config->b_counter_regs,
1888	oa_config->b_counter_regs_len);
1889	cs = write_cs_mi_lri(cs,
1890	oa_config->flex_regs,
1891	oa_config->flex_regs_len);
1892
1893	/* Jump into the active wait. */
1894	*cs++ = (INTEL_GEN(stream->perf->i915)((&(stream->perf->i915)->__info)->gen) < 8 ?
1895	MI_BATCH_BUFFER_START(((0x31) << 23) \| (0)) :
1896	MI_BATCH_BUFFER_START_GEN8(((0x31) << 23) \| (1)));
1897	*cs++ = i915_ggtt_offset(stream->noa_wait);
1898	*cs++ = 0;
1899
1900	i915_gem_object_flush_map(obj);
1901	__i915_gem_object_release_map(obj);
1902
1903	oa_bo->vma = i915_vma_instance(obj,
1904	&stream->engine->gt->ggtt->vm,
1905	NULL((void *)0));
1906	if (IS_ERR(oa_bo->vma)) {
1907	err = PTR_ERR(oa_bo->vma);
1908	goto err_oa_bo;
1909	}
1910
1911	oa_bo->oa_config = i915_oa_config_get(oa_config);
1912	llist_add(&oa_bo->node, &stream->oa_config_bos);
1913
1914	return oa_bo;
1915
1916	err_oa_bo:
1917	i915_gem_object_put(obj);
1918	err_free:
1919	kfree(oa_bo);
1920	return ERR_PTR(err);
1921	}
1922
1923	static struct i915_vma *
1924	get_oa_vma(struct i915_perf_stream stream, struct i915_oa_config oa_config)
1925	{
1926	struct i915_oa_config_bo *oa_bo;
1927
1928	/*
1929	* Look for the buffer in the already allocated BOs attached
1930	* to the stream.
1931	*/
1932	llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node)for ((oa_bo) = (((stream->oa_config_bos.first)) ? ({ const __typeof( ((__typeof((oa_bo)) )0)->node ) __mptr = ((stream ->oa_config_bos.first)); (__typeof((oa_bo)) )( (char )__mptr - __builtin_offsetof(__typeof((oa_bo)), node) );}) : ((void )0)); (oa_bo) != ((void )0); (oa_bo) = (((oa_bo)->node. next) ? ({ const __typeof( ((__typeof((oa_bo)) )0)->node ) __mptr = ((oa_bo)->node.next); (__typeof((oa_bo)) )( (char )__mptr - __builtin_offsetof(__typeof((oa_bo)), node ) );}) : ((void *)0))) {
1933	if (oa_bo->oa_config == oa_config &&
1934	memcmp(oa_bo->oa_config->uuid,__builtin_memcmp((oa_bo->oa_config->uuid), (oa_config-> uuid), (sizeof(oa_config->uuid)))
1935	oa_config->uuid,__builtin_memcmp((oa_bo->oa_config->uuid), (oa_config-> uuid), (sizeof(oa_config->uuid)))
1936	sizeof(oa_config->uuid))__builtin_memcmp((oa_bo->oa_config->uuid), (oa_config-> uuid), (sizeof(oa_config->uuid))) == 0)
1937	goto out;
1938	}
1939
1940	oa_bo = alloc_oa_config_buffer(stream, oa_config);
1941	if (IS_ERR(oa_bo))
1942	return ERR_CAST(oa_bo);
1943
1944	out:
1945	return i915_vma_get(oa_bo->vma);
1946	}
1947
1948	static int
1949	emit_oa_config(struct i915_perf_stream *stream,
1950	struct i915_oa_config *oa_config,
1951	struct intel_context *ce,
1952	struct i915_active *active)
1953	{
1954	struct i915_request *rq;
1955	struct i915_vma *vma;
1956	struct i915_gem_ww_ctx ww;
1957	int err;
1958
1959	vma = get_oa_vma(stream, oa_config);
1960	if (IS_ERR(vma))
1961	return PTR_ERR(vma);
1962
1963	i915_gem_ww_ctx_init(&ww, true1);
1964	retry:
1965	err = i915_gem_object_lock(vma->obj, &ww);
1966	if (err)
1967	goto err;
1968
1969	err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL(1ULL << (10)) \| PIN_HIGH(1ULL << (5)));
1970	if (err)
1971	goto err;
1972
1973	intel_engine_pm_get(ce->engine);
1974	rq = i915_request_create(ce);
1975	intel_engine_pm_put(ce->engine);
1976	if (IS_ERR(rq)) {
1977	err = PTR_ERR(rq);
1978	goto err_vma_unpin;
1979	}
1980
1981	if (!IS_ERR_OR_NULL(active)) {
1982	/* After all individual context modifications */
1983	err = i915_request_await_active(rq, active,
1984	I915_ACTIVE_AWAIT_ACTIVE(1UL << (1)));
1985	if (err)
1986	goto err_add_request;
1987
1988	err = i915_active_add_request(active, rq);
1989	if (err)
1990	goto err_add_request;
1991	}
1992
1993	err = i915_request_await_object(rq, vma->obj, 0);
1994	if (!err)
1995	err = i915_vma_move_to_active(vma, rq, 0);
1996	if (err)
1997	goto err_add_request;
1998
1999	err = rq->engine->emit_bb_start(rq,
2000	vma->node.start, 0,
2001	I915_DISPATCH_SECURE(1UL << (0)));
2002	if (err)
2003	goto err_add_request;
2004
2005	err_add_request:
2006	i915_request_add(rq);
2007	err_vma_unpin:
2008	i915_vma_unpin(vma);
2009	err:
2010	if (err == -EDEADLK11) {
2011	err = i915_gem_ww_ctx_backoff(&ww);
2012	if (!err)
2013	goto retry;
2014	}
2015
2016	i915_gem_ww_ctx_fini(&ww);
2017	i915_vma_put(vma);
2018	return err;
2019	}
2020
2021	static struct intel_context oa_context(struct i915_perf_stream stream)
2022	{
2023	return stream->pinned_ctx ?: stream->engine->kernel_context;
2024	}
2025
2026	static int
2027	hsw_enable_metric_set(struct i915_perf_stream *stream,
2028	struct i915_active *active)
2029	{
2030	struct intel_uncore *uncore = stream->uncore;
2031
2032	/*
2033	* PRM:
2034	*
2035	* OA unit is using “crclk” for its functionality. When trunk
2036	* level clock gating takes place, OA clock would be gated,
2037	* unable to count the events from non-render clock domain.
2038	* Render clock gating must be disabled when OA is enabled to
2039	* count the events from non-render domain. Unit level clock
2040	* gating for RCS should also be disabled.
2041	*/
2042	intel_uncore_rmw(uncore, GEN7_MISCCPCTL((const i915_reg_t){ .reg = (0x9424) }),
2043	GEN7_DOP_CLOCK_GATE_ENABLE(1 << 0), 0);
2044	intel_uncore_rmw(uncore, GEN6_UCGCTL1((const i915_reg_t){ .reg = (0x9400) }),
2045	0, GEN6_CSUNIT_CLOCK_GATE_DISABLE(1 << 7));
2046
2047	return emit_oa_config(stream,
2048	stream->oa_config, oa_context(stream),
2049	active);
2050	}
2051
2052	static void hsw_disable_metric_set(struct i915_perf_stream *stream)
2053	{
2054	struct intel_uncore *uncore = stream->uncore;
2055
2056	intel_uncore_rmw(uncore, GEN6_UCGCTL1((const i915_reg_t){ .reg = (0x9400) }),
2057	GEN6_CSUNIT_CLOCK_GATE_DISABLE(1 << 7), 0);
2058	intel_uncore_rmw(uncore, GEN7_MISCCPCTL((const i915_reg_t){ .reg = (0x9424) }),
2059	0, GEN7_DOP_CLOCK_GATE_ENABLE(1 << 0));
2060
2061	intel_uncore_rmw(uncore, GDT_CHICKEN_BITS((const i915_reg_t){ .reg = (0x9840) }), GT_NOA_ENABLE0x00000080, 0);
2062	}
2063
2064	static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
2065	i915_reg_t reg)
2066	{
2067	u32 mmio = i915_mmio_reg_offset(reg);
2068	int i;
2069
2070	/*
2071	* This arbitrary default will select the 'EU FPU0 Pipeline
2072	* Active' event. In the future it's anticipated that there
2073	* will be an explicit 'No Event' we can select, but not yet...
2074	*/
2075	if (!oa_config)
2076	return 0;
2077
2078	for (i = 0; i < oa_config->flex_regs_len; i++) {
2079	if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
2080	return oa_config->flex_regs[i].value;
2081	}
2082
2083	return 0;
2084	}
2085	/*
2086	* NB: It must always remain pointer safe to run this even if the OA unit
2087	* has been disabled.
2088	*
2089	* It's fine to put out-of-date values into these per-context registers
2090	* in the case that the OA unit has been disabled.
2091	*/
2092	static void
2093	gen8_update_reg_state_unlocked(const struct intel_context *ce,
2094	const struct i915_perf_stream *stream)
2095	{
2096	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2097	u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2098	/* The MMIO offsets for Flex EU registers aren't contiguous */
2099	i915_reg_t flex_regs[] = {
2100	EU_PERF_CNTL0((const i915_reg_t){ .reg = (0xe458) }),
2101	EU_PERF_CNTL1((const i915_reg_t){ .reg = (0xe558) }),
2102	EU_PERF_CNTL2((const i915_reg_t){ .reg = (0xe658) }),
2103	EU_PERF_CNTL3((const i915_reg_t){ .reg = (0xe758) }),
2104	EU_PERF_CNTL4((const i915_reg_t){ .reg = (0xe45c) }),
2105	EU_PERF_CNTL5((const i915_reg_t){ .reg = (0xe55c) }),
2106	EU_PERF_CNTL6((const i915_reg_t){ .reg = (0xe65c) }),
2107	};
2108	u32 *reg_state = ce->lrc_reg_state;
2109	int i;
2110
2111	reg_state[ctx_oactxctrl + 1] =
2112	(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT2) \|
2113	(stream->periodic ? GEN8_OA_TIMER_ENABLE(1 << 1) : 0) \|
2114	GEN8_OA_COUNTER_RESUME(1 << 0);
2115
2116	for (i = 0; i < ARRAY_SIZE(flex_regs)(sizeof((flex_regs)) / sizeof((flex_regs)[0])); i++)
2117	reg_state[ctx_flexeu0 + i * 2 + 1] =
2118	oa_config_flex_reg(stream->oa_config, flex_regs[i]);
2119	}
2120
2121	struct flex {
2122	i915_reg_t reg;
2123	u32 offset;
2124	u32 value;
2125	};
2126
2127	static int
2128	gen8_store_flex(struct i915_request *rq,
2129	struct intel_context *ce,
2130	const struct flex *flex, unsigned int count)
2131	{
2132	u32 offset;
2133	u32 *cs;
2134
2135	cs = intel_ring_begin(rq, 4 * count);
2136	if (IS_ERR(cs))
2137	return PTR_ERR(cs);
2138
2139	offset = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET(((0) + (1)) * (1 << 12));
2140	do {
2141	*cs++ = MI_STORE_DWORD_IMM_GEN4(((0x20) << 23) \| (2)) \| MI_USE_GGTT(1 << 22);
2142	cs++ = offset + flex->offset sizeof(u32);
2143	*cs++ = 0;
2144	*cs++ = flex->value;
2145	} while (flex++, --count);
2146
2147	intel_ring_advance(rq, cs);
2148
2149	return 0;
2150	}
2151
2152	static int
2153	gen8_load_flex(struct i915_request *rq,
2154	struct intel_context *ce,
2155	const struct flex *flex, unsigned int count)
2156	{
2157	u32 *cs;
2158
2159	GEM_BUG_ON(!count \|\| count > 63)((void)0);
2160
2161	cs = intel_ring_begin(rq, 2 * count + 2);
2162	if (IS_ERR(cs))
2163	return PTR_ERR(cs);
2164
2165	cs++ = MI_LOAD_REGISTER_IMM(count)(((0x22) << 23) \| (2(count)-1));
2166	do {
2167	*cs++ = i915_mmio_reg_offset(flex->reg);
2168	*cs++ = flex->value;
2169	} while (flex++, --count);
2170	*cs++ = MI_NOOP(((0) << 23) \| (0));
2171
2172	intel_ring_advance(rq, cs);
2173
2174	return 0;
2175	}
2176
2177	static int gen8_modify_context(struct intel_context *ce,
2178	const struct flex *flex, unsigned int count)
2179	{
2180	struct i915_request *rq;
2181	int err;
2182
2183	rq = intel_engine_create_kernel_request(ce->engine);
2184	if (IS_ERR(rq))
2185	return PTR_ERR(rq);
2186
2187	/* Serialise with the remote context */
2188	err = intel_context_prepare_remote_request(ce, rq);
2189	if (err == 0)
2190	err = gen8_store_flex(rq, ce, flex, count);
2191
2192	i915_request_add(rq);
2193	return err;
2194	}
2195
2196	static int
2197	gen8_modify_self(struct intel_context *ce,
2198	const struct flex *flex, unsigned int count,
2199	struct i915_active *active)
2200	{
2201	struct i915_request *rq;
2202	int err;
2203
2204	intel_engine_pm_get(ce->engine);
2205	rq = i915_request_create(ce);
2206	intel_engine_pm_put(ce->engine);
2207	if (IS_ERR(rq))
2208	return PTR_ERR(rq);
2209
2210	if (!IS_ERR_OR_NULL(active)) {
2211	err = i915_active_add_request(active, rq);
2212	if (err)
2213	goto err_add_request;
2214	}
2215
2216	err = gen8_load_flex(rq, ce, flex, count);
2217	if (err)
2218	goto err_add_request;
2219
2220	err_add_request:
2221	i915_request_add(rq);
2222	return err;
2223	}
2224
2225	static int gen8_configure_context(struct i915_gem_context *ctx,
2226	struct flex *flex, unsigned int count)
2227	{
2228	struct i915_gem_engines_iter it;
2229	struct intel_context *ce;
2230	int err = 0;
2231
2232	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)for (i915_gem_engines_iter_init(&(it), (i915_gem_context_lock_engines (ctx))); ((ce) = i915_gem_engines_iter_next(&(it)));) {
2233	GEM_BUG_ON(ce == ce->engine->kernel_context)((void)0);
2234
2235	if (ce->engine->class != RENDER_CLASS0)
2236	continue;
2237
2238	/* Otherwise OA settings will be set upon first use */
2239	if (!intel_context_pin_if_active(ce))
2240	continue;
2241
2242	flex->value = intel_sseu_make_rpcs(ce->engine->gt, &ce->sseu);
2243	err = gen8_modify_context(ce, flex, count);
2244
2245	intel_context_unpin(ce);
2246	if (err)
2247	break;
2248	}
2249	i915_gem_context_unlock_engines(ctx);
2250
2251	return err;
2252	}
2253
2254	static int gen12_configure_oar_context(struct i915_perf_stream *stream,
2255	struct i915_active *active)
2256	{
2257	int err;
2258	struct intel_context *ce = stream->pinned_ctx;
2259	u32 format = stream->oa_buffer.format;
2260	struct flex regs_context[] = {
2261	{
2262	GEN8_OACTXCONTROL((const i915_reg_t){ .reg = (0x2360) }),
2263	stream->perf->ctx_oactxctrl_offset + 1,
2264	active ? GEN8_OA_COUNTER_RESUME(1 << 0) : 0,
2265	},
2266	};
2267	/* Offsets in regs_lri are not used since this configuration is only
2268	* applied using LRI. Initialize the correct offsets for posterity.
2269	*/
2270	#define GEN12_OAR_OACONTROL_OFFSET0x5B0 0x5B0
2271	struct flex regs_lri[] = {
2272	{
2273	GEN12_OAR_OACONTROL((const i915_reg_t){ .reg = (0x2960) }),
2274	GEN12_OAR_OACONTROL_OFFSET0x5B0 + 1,
2275	(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT1) \|
2276	(active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE(1 << 0) : 0)
2277	},
2278	{
2279	RING_CONTEXT_CONTROL(ce->engine->mmio_base)((const i915_reg_t){ .reg = ((ce->engine->mmio_base) + 0x244 ) }),
2280	CTX_CONTEXT_CONTROL(0x02 + 1),
2281	_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,({ if (__builtin_constant_p((1 << 8))) do { } while (0) ; if (__builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); if (__builtin_constant_p((1 << 8)) && __builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); (((1 << 8)) << 16 \| (active ? (1 << 8 ) : 0)); })
2282	active ?({ if (__builtin_constant_p((1 << 8))) do { } while (0) ; if (__builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); if (__builtin_constant_p((1 << 8)) && __builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); (((1 << 8)) << 16 \| (active ? (1 << 8 ) : 0)); })
2283	GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :({ if (__builtin_constant_p((1 << 8))) do { } while (0) ; if (__builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); if (__builtin_constant_p((1 << 8)) && __builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); (((1 << 8)) << 16 \| (active ? (1 << 8 ) : 0)); })
2284	0)({ if (__builtin_constant_p((1 << 8))) do { } while (0) ; if (__builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); if (__builtin_constant_p((1 << 8)) && __builtin_constant_p(active ? (1 << 8) : 0)) do { } while (0); (((1 << 8)) << 16 \| (active ? (1 << 8 ) : 0)); })
2285	},
2286	};
2287
2288	/* Modify the context image of pinned context with regs_context*/
2289	err = intel_context_lock_pinned(ce);
2290	if (err)
2291	return err;
2292
2293	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)(sizeof((regs_context)) / sizeof((regs_context)[0])));
2294	intel_context_unlock_pinned(ce);
2295	if (err)
2296	return err;
2297
2298	/* Apply regs_lri using LRI with pinned context */
2299	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri)(sizeof((regs_lri)) / sizeof((regs_lri)[0])), active);
2300	}
2301
2302	/*
2303	* Manages updating the per-context aspects of the OA stream
2304	* configuration across all contexts.
2305	*
2306	* The awkward consideration here is that OACTXCONTROL controls the
2307	* exponent for periodic sampling which is primarily used for system
2308	* wide profiling where we'd like a consistent sampling period even in
2309	* the face of context switches.
2310	*
2311	* Our approach of updating the register state context (as opposed to
2312	* say using a workaround batch buffer) ensures that the hardware
2313	* won't automatically reload an out-of-date timer exponent even
2314	* transiently before a WA BB could be parsed.
2315	*
2316	* This function needs to:
2317	* - Ensure the currently running context's per-context OA state is
2318	* updated
2319	* - Ensure that all existing contexts will have the correct per-context
2320	* OA state if they are scheduled for use.
2321	* - Ensure any new contexts will be initialized with the correct
2322	* per-context OA state.
2323	*
2324	* Note: it's only the RCS/Render context that has any OA state.
2325	* Note: the first flex register passed must always be R_PWR_CLK_STATE
2326	*/
2327	static int
2328	oa_configure_all_contexts(struct i915_perf_stream *stream,
2329	struct flex *regs,
2330	size_t num_regs,
2331	struct i915_active *active)
2332	{
2333	struct drm_i915_privateinteldrm_softc *i915 = stream->perf->i915;
2334	struct intel_engine_cs *engine;
2335	struct i915_gem_context ctx, cn;
2336	int err;
2337
2338	lockdep_assert_held(&stream->perf->lock)do { (void)(&stream->perf->lock); } while(0);
2339
2340	/*
2341	* The OA register config is setup through the context image. This image
2342	* might be written to by the GPU on context switch (in particular on
2343	* lite-restore). This means we can't safely update a context's image,
2344	* if this context is scheduled/submitted to run on the GPU.
2345	*
2346	* We could emit the OA register config through the batch buffer but
2347	* this might leave small interval of time where the OA unit is
2348	* configured at an invalid sampling period.
2349	*
2350	* Note that since we emit all requests from a single ring, there
2351	* is still an implicit global barrier here that may cause a high
2352	* priority context to wait for an otherwise independent low priority
2353	* context. Contexts idle at the time of reconfiguration are not
2354	* trapped behind the barrier.
2355	*/
2356	spin_lock(&i915->gem.contexts.lock)mtx_enter(&i915->gem.contexts.lock);
2357	list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link)for (ctx = ({ const __typeof( ((__typeof(ctx) )0)->link ) __mptr = ((&i915->gem.contexts.list)->next); (__typeof (ctx) )( (char )__mptr - __builtin_offsetof(__typeof(ctx) , link) );}), cn = ({ const __typeof( ((__typeof(ctx) )0)-> link ) __mptr = (ctx->link.next); (__typeof(ctx) )( (char )__mptr - __builtin_offsetof(__typeof(ctx), link) );}); & ctx->link != (&i915->gem.contexts.list); ctx = cn, cn = ({ const __typeof( ((__typeof(cn) )0)->link ) __mptr = (cn->link.next); (__typeof(cn) )( (char )__mptr - __builtin_offsetof (__typeof(*cn), link) );})) {
2358	if (!kref_get_unless_zero(&ctx->ref))
2359	continue;
2360
2361	spin_unlock(&i915->gem.contexts.lock)mtx_leave(&i915->gem.contexts.lock);
2362
2363	err = gen8_configure_context(ctx, regs, num_regs);
2364	if (err) {
2365	i915_gem_context_put(ctx);
2366	return err;
2367	}
2368
2369	spin_lock(&i915->gem.contexts.lock)mtx_enter(&i915->gem.contexts.lock);
2370	list_safe_reset_next(ctx, cn, link)cn = ({ const __typeof( ((typeof((ctx)) )0)->link ) __mptr = (((ctx)->link.next)); (typeof((ctx)) )( (char )__mptr - __builtin_offsetof(typeof(*(ctx)), link) );});
2371	i915_gem_context_put(ctx);
2372	}
2373	spin_unlock(&i915->gem.contexts.lock)mtx_leave(&i915->gem.contexts.lock);
2374
2375	/*
2376	* After updating all other contexts, we need to modify ourselves.
2377	* If we don't modify the kernel_context, we do not get events while
2378	* idle.
2379	*/
2380	for_each_uabi_engine(engine, i915)for ((engine) = (linux_root_RB_MINMAX((struct linux_root )(& (i915)->uabi_engines), -1) ? ({ const __typeof( ((struct intel_engine_cs )0)->uabi_node ) __mptr = (linux_root_RB_MINMAX((struct linux_root )(&(i915)->uabi_engines), -1)); (struct intel_engine_cs )( (char )__mptr - __builtin_offsetof(struct intel_engine_cs , uabi_node) );}) : ((void )0)); (engine); (engine) = (linux_root_RB_NEXT ((&(engine)->uabi_node)) ? ({ const __typeof( ((struct intel_engine_cs )0)->uabi_node ) __mptr = (linux_root_RB_NEXT ((&(engine)->uabi_node))); (struct intel_engine_cs )( (char )__mptr - __builtin_offsetof(struct intel_engine_cs, uabi_node ) );}) : ((void )0))) {
2381	struct intel_context *ce = engine->kernel_context;
2382
2383	if (engine->class != RENDER_CLASS0)
2384	continue;
2385
2386	regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu);
2387
2388	err = gen8_modify_self(ce, regs, num_regs, active);
2389	if (err)
2390	return err;
2391	}
2392
2393	return 0;
2394	}
2395
2396	static int
2397	gen12_configure_all_contexts(struct i915_perf_stream *stream,
2398	const struct i915_oa_config *oa_config,
2399	struct i915_active *active)
2400	{
2401	struct flex regs[] = {
2402	{
2403	GEN8_R_PWR_CLK_STATE((const i915_reg_t){ .reg = (0x20C8) }),
2404	CTX_R_PWR_CLK_STATE(0x42 + 1),
2405	},
2406	};
2407
2408	return oa_configure_all_contexts(stream,
2409	regs, ARRAY_SIZE(regs)(sizeof((regs)) / sizeof((regs)[0])),
2410	active);
2411	}
2412
2413	static int
2414	lrc_configure_all_contexts(struct i915_perf_stream *stream,
2415	const struct i915_oa_config *oa_config,
2416	struct i915_active *active)
2417	{
2418	/* The MMIO offsets for Flex EU registers aren't contiguous */
2419	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2420	#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
2421	struct flex regs[] = {
2422	{
2423	GEN8_R_PWR_CLK_STATE((const i915_reg_t){ .reg = (0x20C8) }),
2424	CTX_R_PWR_CLK_STATE(0x42 + 1),
2425	},
2426	{
2427	GEN8_OACTXCONTROL((const i915_reg_t){ .reg = (0x2360) }),
2428	stream->perf->ctx_oactxctrl_offset + 1,
2429	},
2430	{ EU_PERF_CNTL0((const i915_reg_t){ .reg = (0xe458) }), ctx_flexeuN(0) },
2431	{ EU_PERF_CNTL1((const i915_reg_t){ .reg = (0xe558) }), ctx_flexeuN(1) },
2432	{ EU_PERF_CNTL2((const i915_reg_t){ .reg = (0xe658) }), ctx_flexeuN(2) },
2433	{ EU_PERF_CNTL3((const i915_reg_t){ .reg = (0xe758) }), ctx_flexeuN(3) },
2434	{ EU_PERF_CNTL4((const i915_reg_t){ .reg = (0xe45c) }), ctx_flexeuN(4) },
2435	{ EU_PERF_CNTL5((const i915_reg_t){ .reg = (0xe55c) }), ctx_flexeuN(5) },
2436	{ EU_PERF_CNTL6((const i915_reg_t){ .reg = (0xe65c) }), ctx_flexeuN(6) },
2437	};
2438	#undef ctx_flexeuN
2439	int i;
2440
2441	regs[1].value =
2442	(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT2) \|
2443	(stream->periodic ? GEN8_OA_TIMER_ENABLE(1 << 1) : 0) \|
2444	GEN8_OA_COUNTER_RESUME(1 << 0);
2445
2446	for (i = 2; i < ARRAY_SIZE(regs)(sizeof((regs)) / sizeof((regs)[0])); i++)
2447	regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
2448
2449	return oa_configure_all_contexts(stream,
2450	regs, ARRAY_SIZE(regs)(sizeof((regs)) / sizeof((regs)[0])),
2451	active);
2452	}
2453
2454	static int
2455	gen8_enable_metric_set(struct i915_perf_stream *stream,
2456	struct i915_active *active)
2457	{
2458	struct intel_uncore *uncore = stream->uncore;
2459	struct i915_oa_config *oa_config = stream->oa_config;
2460	int ret;
2461
2462	/*
2463	* We disable slice/unslice clock ratio change reports on SKL since
2464	* they are too noisy. The HW generates a lot of redundant reports
2465	* where the ratio hasn't really changed causing a lot of redundant
2466	* work to processes and increasing the chances we'll hit buffer
2467	* overruns.
2468	*
2469	* Although we don't currently use the 'disable overrun' OABUFFER
2470	* feature it's worth noting that clock ratio reports have to be
2471	* disabled before considering to use that feature since the HW doesn't
2472	* correctly block these reports.
2473	*
2474	* Currently none of the high-level metrics we have depend on knowing
2475	* this ratio to normalize.
2476	*
2477	* Note: This register is not power context saved and restored, but
2478	* that's OK considering that we disable RC6 while the OA unit is
2479	* enabled.
2480	*
2481	* The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
2482	* be read back from automatically triggered reports, as part of the
2483	* RPT_ID field.
2484	*/
2485	if (IS_GEN_RANGE(stream->perf->i915, 9, 11)(!!((&(stream->perf->i915)->__info)->gen_mask & ( 0 + 0 + (((~0UL) >> (64 - (((11)) - 1) - 1)) & ((~0UL) << (((9)) - 1))))))) {
2486	intel_uncore_write(uncore, GEN8_OA_DEBUG((const i915_reg_t){ .reg = (0x2B04) }),
2487	_MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS \|({ typeof((1 << 5) \| (1 << 6)) _a = ((1 << 5 ) \| (1 << 6)); ({ if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p (_a) && __builtin_constant_p(_a)) do { } while (0); ( (_a) << 16 \| (_a)); }); })
2488	GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)({ typeof((1 << 5) \| (1 << 6)) _a = ((1 << 5 ) \| (1 << 6)); ({ if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p (_a) && __builtin_constant_p(_a)) do { } while (0); ( (_a) << 16 \| (_a)); }); }));
2489	}
2490
2491	/*
2492	* Update all contexts prior writing the mux configurations as we need
2493	* to make sure all slices/subslices are ON before writing to NOA
2494	* registers.
2495	*/
2496	ret = lrc_configure_all_contexts(stream, oa_config, active);
2497	if (ret)
2498	return ret;
2499
2500	return emit_oa_config(stream,
2501	stream->oa_config, oa_context(stream),
2502	active);
2503	}
2504
2505	static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
2506	{
2507	return _MASKED_FIELD(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS,({ if (__builtin_constant_p((1 << 1))) do { } while (0) ; if (__builtin_constant_p((stream->sample_flags & (1<< 0)) ? 0 : (1 << 1))) do { } while (0); if (__builtin_constant_p ((1 << 1)) && __builtin_constant_p((stream-> sample_flags & (1<<0)) ? 0 : (1 << 1))) do { } while (0); (((1 << 1)) << 16 \| ((stream->sample_flags & (1<<0)) ? 0 : (1 << 1))); })
2508	(stream->sample_flags & SAMPLE_OA_REPORT) ?({ if (__builtin_constant_p((1 << 1))) do { } while (0) ; if (__builtin_constant_p((stream->sample_flags & (1<< 0)) ? 0 : (1 << 1))) do { } while (0); if (__builtin_constant_p ((1 << 1)) && __builtin_constant_p((stream-> sample_flags & (1<<0)) ? 0 : (1 << 1))) do { } while (0); (((1 << 1)) << 16 \| ((stream->sample_flags & (1<<0)) ? 0 : (1 << 1))); })
2509	0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS)({ if (__builtin_constant_p((1 << 1))) do { } while (0) ; if (__builtin_constant_p((stream->sample_flags & (1<< 0)) ? 0 : (1 << 1))) do { } while (0); if (__builtin_constant_p ((1 << 1)) && __builtin_constant_p((stream-> sample_flags & (1<<0)) ? 0 : (1 << 1))) do { } while (0); (((1 << 1)) << 16 \| ((stream->sample_flags & (1<<0)) ? 0 : (1 << 1))); });
2510	}
2511
2512	static int
2513	gen12_enable_metric_set(struct i915_perf_stream *stream,
2514	struct i915_active *active)
2515	{
2516	struct intel_uncore *uncore = stream->uncore;
2517	struct i915_oa_config *oa_config = stream->oa_config;
2518	bool_Bool periodic = stream->periodic;
2519	u32 period_exponent = stream->period_exponent;
2520	int ret;
2521
2522	intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG((const i915_reg_t){ .reg = (0xdaf8) }),
2523	/* Disable clk ratio reports, like previous Gens. */
2524	_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS \|({ typeof((1 << 5) \| (1 << 6)) _a = ((1 << 5 ) \| (1 << 6)); ({ if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p (_a) && __builtin_constant_p(_a)) do { } while (0); ( (_a) << 16 \| (_a)); }); })
2525	GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO)({ typeof((1 << 5) \| (1 << 6)) _a = ((1 << 5 ) \| (1 << 6)); ({ if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p (_a) && __builtin_constant_p(_a)) do { } while (0); ( (_a) << 16 \| (_a)); }); }) \|
2526	/*
2527	* If the user didn't require OA reports, instruct
2528	* the hardware not to emit ctx switch reports.
2529	*/
2530	oag_report_ctx_switches(stream));
2531
2532	intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL((const i915_reg_t){ .reg = (0x2b28) }), periodic ?
2533	(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME(1 << 0) \|
2534	GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE(1 << 1) \|
2535	(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT2))
2536	: 0);
2537
2538	/*
2539	* Update all contexts prior writing the mux configurations as we need
2540	* to make sure all slices/subslices are ON before writing to NOA
2541	* registers.
2542	*/
2543	ret = gen12_configure_all_contexts(stream, oa_config, active);
2544	if (ret)
2545	return ret;
2546
2547	/*
2548	* For Gen12, performance counters are context
2549	* saved/restored. Only enable it for the context that
2550	* requested this.
2551	*/
2552	if (stream->ctx) {
2553	ret = gen12_configure_oar_context(stream, active);
2554	if (ret)
2555	return ret;
2556	}
2557
2558	return emit_oa_config(stream,
2559	stream->oa_config, oa_context(stream),
2560	active);
2561	}
2562
2563	static void gen8_disable_metric_set(struct i915_perf_stream *stream)
2564	{
2565	struct intel_uncore *uncore = stream->uncore;
2566
2567	/* Reset all contexts' slices/subslices configurations. */
2568	lrc_configure_all_contexts(stream, NULL((void )0), NULL((void )0));
2569
2570	intel_uncore_rmw(uncore, GDT_CHICKEN_BITS((const i915_reg_t){ .reg = (0x9840) }), GT_NOA_ENABLE0x00000080, 0);
2571	}
2572
2573	static void gen10_disable_metric_set(struct i915_perf_stream *stream)
2574	{
2575	struct intel_uncore *uncore = stream->uncore;
2576
2577	/* Reset all contexts' slices/subslices configurations. */
2578	lrc_configure_all_contexts(stream, NULL((void )0), NULL((void )0));
2579
2580	/* Make sure we disable noa to save power. */
2581	intel_uncore_rmw(uncore, RPM_CONFIG1((const i915_reg_t){ .reg = (0x0D04) }), GEN10_GT_NOA_ENABLE(1 << 9), 0);
2582	}
2583
2584	static void gen12_disable_metric_set(struct i915_perf_stream *stream)
2585	{
2586	struct intel_uncore *uncore = stream->uncore;
2587
2588	/* Reset all contexts' slices/subslices configurations. */
2589	gen12_configure_all_contexts(stream, NULL((void )0), NULL((void )0));
2590
2591	/* disable the context save/restore or OAR counters */
2592	if (stream->ctx)
2593	gen12_configure_oar_context(stream, NULL((void *)0));
2594
2595	/* Make sure we disable noa to save power. */
2596	intel_uncore_rmw(uncore, RPM_CONFIG1((const i915_reg_t){ .reg = (0x0D04) }), GEN10_GT_NOA_ENABLE(1 << 9), 0);
2597	}
2598
2599	static void gen7_oa_enable(struct i915_perf_stream *stream)
2600	{
2601	struct intel_uncore *uncore = stream->uncore;
2602	struct i915_gem_context *ctx = stream->ctx;
2603	u32 ctx_id = stream->specific_ctx_id;
2604	bool_Bool periodic = stream->periodic;
2605	u32 period_exponent = stream->period_exponent;
2606	u32 report_format = stream->oa_buffer.format;
2607
2608	/*
2609	* Reset buf pointers so we don't forward reports from before now.
2610	*
2611	* Think carefully if considering trying to avoid this, since it
2612	* also ensures status flags and the buffer itself are cleared
2613	* in error paths, and we have checks for invalid reports based
2614	* on the assumption that certain fields are written to zeroed
2615	* memory which this helps maintains.
2616	*/
2617	gen7_init_oa_buffer(stream);
2618
2619	intel_uncore_write(uncore, GEN7_OACONTROL((const i915_reg_t){ .reg = (0x2360) }),
2620	(ctx_id & GEN7_OACONTROL_CTX_MASK0xFFFFF000) \|
2621	(period_exponent <<
2622	GEN7_OACONTROL_TIMER_PERIOD_SHIFT6) \|
2623	(periodic ? GEN7_OACONTROL_TIMER_ENABLE(1 << 5) : 0) \|
2624	(report_format << GEN7_OACONTROL_FORMAT_SHIFT2) \|
2625	(ctx ? GEN7_OACONTROL_PER_CTX_ENABLE(1 << 1) : 0) \|
2626	GEN7_OACONTROL_ENABLE(1 << 0));
2627	}
2628
2629	static void gen8_oa_enable(struct i915_perf_stream *stream)
2630	{
2631	struct intel_uncore *uncore = stream->uncore;
2632	u32 report_format = stream->oa_buffer.format;
2633
2634	/*
2635	* Reset buf pointers so we don't forward reports from before now.
2636	*
2637	* Think carefully if considering trying to avoid this, since it
2638	* also ensures status flags and the buffer itself are cleared
2639	* in error paths, and we have checks for invalid reports based
2640	* on the assumption that certain fields are written to zeroed
2641	* memory which this helps maintains.
2642	*/
2643	gen8_init_oa_buffer(stream);
2644
2645	/*
2646	* Note: we don't rely on the hardware to perform single context
2647	* filtering and instead filter on the cpu based on the context-id
2648	* field of reports
2649	*/
2650	intel_uncore_write(uncore, GEN8_OACONTROL((const i915_reg_t){ .reg = (0x2B00) }),
2651	(report_format << GEN8_OA_REPORT_FORMAT_SHIFT2) \|
2652	GEN8_OA_COUNTER_ENABLE(1 << 0));
2653	}
2654
2655	static void gen12_oa_enable(struct i915_perf_stream *stream)
2656	{
2657	struct intel_uncore *uncore = stream->uncore;
2658	u32 report_format = stream->oa_buffer.format;
2659
2660	/*
2661	* If we don't want OA reports from the OA buffer, then we don't even
2662	* need to program the OAG unit.
2663	*/
2664	if (!(stream->sample_flags & SAMPLE_OA_REPORT(1<<0)))
2665	return;
2666
2667	gen12_init_oa_buffer(stream);
2668
2669	intel_uncore_write(uncore, GEN12_OAG_OACONTROL((const i915_reg_t){ .reg = (0xdaf4) }),
2670	(report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT2) \|
2671	GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE(1 << 0));
2672	}
2673
2674	/**
2675	* i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
2676	* @stream: An i915 perf stream opened for OA metrics
2677	*
2678	* [Re]enables hardware periodic sampling according to the period configured
2679	* when opening the stream. This also starts a hrtimer that will periodically
2680	* check for data in the circular OA buffer for notifying userspace (e.g.
2681	* during a read() or poll()).
2682	*/
2683	static void i915_oa_stream_enable(struct i915_perf_stream *stream)
2684	{
2685	STUB()do { printf("%s: stub\n", __func__); } while(0);
2686	#ifdef notyet
2687	stream->pollin = false0;
2688
2689	stream->perf->ops.oa_enable(stream);
2690
2691	if (stream->sample_flags & SAMPLE_OA_REPORT(1<<0))
2692	hrtimer_start(&stream->poll_check_timer,
2693	ns_to_ktime(stream->poll_oa_period),
2694	HRTIMER_MODE_REL_PINNED);
2695	#endif
2696	}
2697
2698	static void gen7_oa_disable(struct i915_perf_stream *stream)
2699	{
2700	struct intel_uncore *uncore = stream->uncore;
2701
2702	intel_uncore_write(uncore, GEN7_OACONTROL((const i915_reg_t){ .reg = (0x2360) }), 0);
2703	if (intel_wait_for_register(uncore,
2704	GEN7_OACONTROL((const i915_reg_t){ .reg = (0x2360) }), GEN7_OACONTROL_ENABLE(1 << 0), 0,
2705	50))
2706	drm_err(&stream->perf->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA to be disabled timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
2707	"wait for OA to be disabled timed out\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA to be disabled timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
2708	}
2709
2710	static void gen8_oa_disable(struct i915_perf_stream *stream)
2711	{
2712	struct intel_uncore *uncore = stream->uncore;
2713
2714	intel_uncore_write(uncore, GEN8_OACONTROL((const i915_reg_t){ .reg = (0x2B00) }), 0);
2715	if (intel_wait_for_register(uncore,
2716	GEN8_OACONTROL((const i915_reg_t){ .reg = (0x2B00) }), GEN8_OA_COUNTER_ENABLE(1 << 0), 0,
2717	50))
2718	drm_err(&stream->perf->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA to be disabled timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
2719	"wait for OA to be disabled timed out\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA to be disabled timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
2720	}
2721
2722	static void gen12_oa_disable(struct i915_perf_stream *stream)
2723	{
2724	struct intel_uncore *uncore = stream->uncore;
2725
2726	intel_uncore_write(uncore, GEN12_OAG_OACONTROL((const i915_reg_t){ .reg = (0xdaf4) }), 0);
2727	if (intel_wait_for_register(uncore,
2728	GEN12_OAG_OACONTROL((const i915_reg_t){ .reg = (0xdaf4) }),
2729	GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE(1 << 0), 0,
2730	50))
2731	drm_err(&stream->perf->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA to be disabled timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
2732	"wait for OA to be disabled timed out\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA to be disabled timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
2733
2734	intel_uncore_write(uncore, GEN12_OA_TLB_INV_CR((const i915_reg_t){ .reg = (0xceec) }), 1);
2735	if (intel_wait_for_register(uncore,
2736	GEN12_OA_TLB_INV_CR((const i915_reg_t){ .reg = (0xceec) }),
2737	1, 0,
2738	50))
2739	drm_err(&stream->perf->i915->drm,printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA tlb invalidate timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__)
2740	"wait for OA tlb invalidate timed out\n")printf("drm:pid%d:%s ERROR " "[drm] " "ERROR " "wait for OA tlb invalidate timed out\n" , ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_p->ps_pid, __func__);
2741	}
2742
2743	/**
2744	* i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
2745	* @stream: An i915 perf stream opened for OA metrics
2746	*
2747	* Stops the OA unit from periodically writing counter reports into the
2748	* circular OA buffer. This also stops the hrtimer that periodically checks for
2749	* data in the circular OA buffer, for notifying userspace.
2750	*/
2751	static void i915_oa_stream_disable(struct i915_perf_stream *stream)
2752	{
2753	STUB()do { printf("%s: stub\n", __func__); } while(0);
2754	#ifdef notyet
2755	stream->perf->ops.oa_disable(stream);
2756
2757	if (stream->sample_flags & SAMPLE_OA_REPORT(1<<0))
2758	hrtimer_cancel(&stream->poll_check_timer)timeout_del(&stream->poll_check_timer);
2759	#endif
2760	}
2761
2762	#ifdef notyet
2763	static const struct i915_perf_stream_ops i915_oa_stream_ops = {
2764	.destroy = i915_oa_stream_destroy,
2765	.enable = i915_oa_stream_enable,
2766	.disable = i915_oa_stream_disable,
2767	.wait_unlocked = i915_oa_wait_unlocked,
2768	.poll_wait = i915_oa_poll_wait,
2769	.read = i915_oa_read,
2770	};
2771	#endif
2772
2773	static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
2774	{
2775	struct i915_active *active;
2776	int err;
2777
2778	active = i915_active_create();
2779	if (!active)
2780	return -ENOMEM12;
2781
2782	err = stream->perf->ops.enable_metric_set(stream, active);
2783	if (err == 0)
2784	__i915_active_wait(active, TASK_UNINTERRUPTIBLE0);
2785
2786	i915_active_put(active);
2787	return err;
2788	}
2789
2790	static void
2791	get_default_sseu_config(struct intel_sseu *out_sseu,
2792	struct intel_engine_cs *engine)
2793	{
2794	const struct sseu_dev_info *devinfo_sseu = &engine->gt->info.sseu;
2795
2796	*out_sseu = intel_sseu_from_device_info(devinfo_sseu);
2797
2798	if (IS_GEN(engine->i915, 11)(0 + (&(engine->i915)->__info)->gen == (11))) {
2799	/*
2800	* We only need subslice count so it doesn't matter which ones
2801	* we select - just turn off low bits in the amount of half of
2802	* all available subslices per slice.
2803	*/
2804	out_sseu->subslice_mask =
2805	~(~0 << (hweight8(out_sseu->subslice_mask) / 2));
2806	out_sseu->slice_mask = 0x1;
2807	}
2808	}
2809
2810	static int
2811	get_sseu_config(struct intel_sseu *out_sseu,
2812	struct intel_engine_cs *engine,
2813	const struct drm_i915_gem_context_param_sseu *drm_sseu)
2814	{
2815	if (drm_sseu->engine.engine_class != engine->uabi_class \|\|
2816	drm_sseu->engine.engine_instance != engine->uabi_instance)
2817	return -EINVAL22;
2818
2819	return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
2820	}
2821
2822	/**
2823	* i915_oa_stream_init - validate combined props for OA stream and init
2824	* @stream: An i915 perf stream
2825	* @param: The open parameters passed to `DRM_I915_PERF_OPEN`
2826	* @props: The property state that configures stream (individually validated)
2827	*
2828	* While read_properties_unlocked() validates properties in isolation it
2829	* doesn't ensure that the combination necessarily makes sense.
2830	*
2831	* At this point it has been determined that userspace wants a stream of
2832	* OA metrics, but still we need to further validate the combined
2833	* properties are OK.
2834	*
2835	* If the configuration makes sense then we can allocate memory for
2836	* a circular OA buffer and apply the requested metric set configuration.
2837	*
2838	* Returns: zero on success or a negative error code.
2839	*/
2840	static int i915_oa_stream_init(struct i915_perf_stream *stream,
2841	struct drm_i915_perf_open_param *param,
2842	struct perf_open_properties *props)
2843	{
2844	STUB()do { printf("%s: stub\n", __func__); } while(0);
2845	return -ENOSYS78;
2846	#ifdef notyet
2847	struct drm_i915_privateinteldrm_softc *i915 = stream->perf->i915;
2848	struct i915_perf *perf = stream->perf;
2849	int format_size;
2850	int ret;
2851
2852	if (!props->engine) {
2853	DRM_DEBUG("OA engine not specified\n")__drm_dbg(DRM_UT_CORE, "OA engine not specified\n");
2854	return -EINVAL22;
2855	}
2856
2857	/*
2858	* If the sysfs metrics/ directory wasn't registered for some
2859	* reason then don't let userspace try their luck with config
2860	* IDs
2861	*/
2862	if (!perf->metrics_kobj) {
2863	DRM_DEBUG("OA metrics weren't advertised via sysfs\n")__drm_dbg(DRM_UT_CORE, "OA metrics weren't advertised via sysfs\n" );
2864	return -EINVAL22;
2865	}
2866
2867	if (!(props->sample_flags & SAMPLE_OA_REPORT(1<<0)) &&
2868	(INTEL_GEN(perf->i915)((&(perf->i915)->__info)->gen) < 12 \|\| !stream->ctx)) {
2869	DRM_DEBUG("Only OA report sampling supported\n")__drm_dbg(DRM_UT_CORE, "Only OA report sampling supported\n");
2870	return -EINVAL22;
2871	}
2872
2873	if (!perf->ops.enable_metric_set) {
2874	DRM_DEBUG("OA unit not supported\n")__drm_dbg(DRM_UT_CORE, "OA unit not supported\n");
2875	return -ENODEV19;
2876	}
2877
2878	/*
2879	* To avoid the complexity of having to accurately filter
2880	* counter reports and marshal to the appropriate client
2881	* we currently only allow exclusive access
2882	*/
2883	if (perf->exclusive_stream) {
2884	DRM_DEBUG("OA unit already in use\n")__drm_dbg(DRM_UT_CORE, "OA unit already in use\n");
2885	return -EBUSY16;
2886	}
2887
2888	if (!props->oa_format) {
2889	DRM_DEBUG("OA report format not specified\n")__drm_dbg(DRM_UT_CORE, "OA report format not specified\n");
2890	return -EINVAL22;
2891	}
2892
2893	stream->engine = props->engine;
2894	stream->uncore = stream->engine->gt->uncore;
2895
2896	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
2897
2898	format_size = perf->oa_formats[props->oa_format].size;
2899
2900	stream->sample_flags = props->sample_flags;
2901	stream->sample_size += format_size;
2902
2903	stream->oa_buffer.format_size = format_size;
2904	if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0)({ int __ret = !!((stream->oa_buffer.format_size == 0)); if (__ret) printf("%s %s: " "%s", dev_driver_string(((&i915 ->drm))->dev), "", "drm_WARN_ON(" "stream->oa_buffer.format_size == 0" ")"); __builtin_expect(!!(__ret), 0); }))
2905	return -EINVAL22;
2906
2907	stream->hold_preemption = props->hold_preemption;
2908
2909	stream->oa_buffer.format =
2910	perf->oa_formats[props->oa_format].format;
2911
2912	stream->periodic = props->oa_periodic;
2913	if (stream->periodic)
2914	stream->period_exponent = props->oa_period_exponent;
2915
2916	if (stream->ctx) {
2917	ret = oa_get_render_ctx_id(stream);
2918	if (ret) {
2919	DRM_DEBUG("Invalid context id to filter with\n")__drm_dbg(DRM_UT_CORE, "Invalid context id to filter with\n");
2920	return ret;
2921	}
2922	}
2923
2924	ret = alloc_noa_wait(stream);
2925	if (ret) {
2926	DRM_DEBUG("Unable to allocate NOA wait batch buffer\n")__drm_dbg(DRM_UT_CORE, "Unable to allocate NOA wait batch buffer\n" );
2927	goto err_noa_wait_alloc;
2928	}
2929
2930	stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
2931	if (!stream->oa_config) {
2932	DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set)__drm_dbg(DRM_UT_CORE, "Invalid OA config id=%i\n", props-> metrics_set);
2933	ret = -EINVAL22;
2934	goto err_config;
2935	}
2936
2937	/* PRM - observability performance counters:
2938	*
2939	* OACONTROL, performance counter enable, note:
2940	*
2941	* "When this bit is set, in order to have coherent counts,
2942	* RC6 power state and trunk clock gating must be disabled.
2943	* This can be achieved by programming MMIO registers as
2944	* 0xA094=0 and 0xA090[31]=1"
2945	*
2946	* In our case we are expecting that taking pm + FORCEWAKE
2947	* references will effectively disable RC6.
2948	*/
2949	intel_engine_pm_get(stream->engine);
2950	intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
2951
2952	ret = alloc_oa_buffer(stream);
2953	if (ret)
2954	goto err_oa_buf_alloc;
2955
2956	stream->ops = &i915_oa_stream_ops;
2957
2958	perf->sseu = props->sseu;
2959	WRITE_ONCE(perf->exclusive_stream, stream)({ typeof(perf->exclusive_stream) __tmp = (stream); (volatile typeof(perf->exclusive_stream) )&(perf->exclusive_stream ) = __tmp; __tmp; });
2960
2961	ret = i915_perf_stream_enable_sync(stream);
2962	if (ret) {
2963	DRM_DEBUG("Unable to enable metric set\n")__drm_dbg(DRM_UT_CORE, "Unable to enable metric set\n");
2964	goto err_enable;
2965	}
2966
2967	DRM_DEBUG("opening stream oa config uuid=%s\n",__drm_dbg(DRM_UT_CORE, "opening stream oa config uuid=%s\n", stream ->oa_config->uuid)
2968	stream->oa_config->uuid)__drm_dbg(DRM_UT_CORE, "opening stream oa config uuid=%s\n", stream ->oa_config->uuid);
2969
2970	hrtimer_init(&stream->poll_check_timer,
2971	CLOCK_MONOTONIC3, HRTIMER_MODE_REL1);
2972	stream->poll_check_timer.function = oa_poll_check_timer_cb;
2973	init_waitqueue_head(&stream->poll_wq);
2974	mtx_init(&stream->oa_buffer.ptr_lock, IPL_TTY)do { (void)(((void *)0)); (void)(0); __mtx_init((&stream-> oa_buffer.ptr_lock), ((((0x9)) > 0x0 && ((0x9)) < 0x9) ? 0x9 : ((0x9)))); } while (0);
2975
2976	return 0;
2977
2978	err_enable:
2979	WRITE_ONCE(perf->exclusive_stream, NULL)({ typeof(perf->exclusive_stream) __tmp = (((void )0)); (volatile typeof(perf->exclusive_stream) *)&(perf-> exclusive_stream) = __tmp; __tmp; });
2980	perf->ops.disable_metric_set(stream);
2981
2982	free_oa_buffer(stream);
2983
2984	err_oa_buf_alloc:
2985	free_oa_configs(stream);
2986
2987	intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
2988	intel_engine_pm_put(stream->engine);
2989
2990	err_config:
2991	free_noa_wait(stream);
2992
2993	err_noa_wait_alloc:
2994	if (stream->ctx)
2995	oa_put_render_ctx_id(stream);
2996
2997	return ret;
2998	#endif
2999	}
3000
3001	void i915_oa_init_reg_state(const struct intel_context *ce,
3002	const struct intel_engine_cs *engine)
3003	{
3004	struct i915_perf_stream *stream;
3005
3006	if (engine->class != RENDER_CLASS0)
3007	return;
3008
3009	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
3010	stream = READ_ONCE(engine->i915->perf.exclusive_stream)({ typeof(engine->i915->perf.exclusive_stream) __tmp = * (volatile typeof(engine->i915->perf.exclusive_stream) * )&(engine->i915->perf.exclusive_stream); membar_datadep_consumer (); __tmp; });
3011	if (stream && INTEL_GEN(stream->perf->i915)((&(stream->perf->i915)->__info)->gen) < 12)
3012	gen8_update_reg_state_unlocked(ce, stream);
3013	}
3014
3015	/**
3016	* i915_perf_read - handles read() FOP for i915 perf stream FDs
3017	* @file: An i915 perf stream file
3018	* @buf: destination buffer given by userspace
3019	* @count: the number of bytes userspace wants to read
3020	* @ppos: (inout) file seek position (unused)
3021	*
3022	* The entry point for handling a read() on a stream file descriptor from
3023	* userspace. Most of the work is left to the i915_perf_read_locked() and
3024	* &i915_perf_stream_ops->read but to save having stream implementations (of
3025	* which we might have multiple later) we handle blocking read here.
3026	*
3027	* We can also consistently treat trying to read from a disabled stream
3028	* as an IO error so implementations can assume the stream is enabled
3029	* while reading.
3030	*
3031	* Returns: The number of bytes copied or a negative error code on failure.
3032	*/
3033	static ssize_t i915_perf_read(struct file *file,
3034	char __user *buf,
3035	size_t count,
3036	loff_t *ppos)
3037	{
3038	STUB()do { printf("%s: stub\n", __func__); } while(0);
3039	return -ENOSYS78;
3040	#ifdef notyet
3041	struct i915_perf_stream *stream = file->private_data;
3042	struct i915_perf *perf = stream->perf;
3043	size_t offset = 0;
3044	int ret;
3045
3046	/* To ensure it's handled consistently we simply treat all reads of a
3047	* disabled stream as an error. In particular it might otherwise lead
3048	* to a deadlock for blocking file descriptors...
3049	*/
3050	if (!stream->enabled \|\| !(stream->sample_flags & SAMPLE_OA_REPORT(1<<0)))
3051	return -EIO5;
3052
3053	if (!(file->f_flags & O_NONBLOCK0x0004)) {
3054	/* There's the small chance of false positives from
3055	* stream->ops->wait_unlocked.
3056	*
3057	* E.g. with single context filtering since we only wait until
3058	* oabuffer has >= 1 report we don't immediately know whether
3059	* any reports really belong to the current context
3060	*/
3061	do {
3062	ret = stream->ops->wait_unlocked(stream);
3063	if (ret)
3064	return ret;
3065
3066	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3067	ret = stream->ops->read(stream, buf, count, &offset);
3068	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3069	} while (!offset && !ret);
3070	} else {
3071	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3072	ret = stream->ops->read(stream, buf, count, &offset);
3073	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3074	}
3075
3076	/* We allow the poll checking to sometimes report false positive EPOLLIN
3077	* events where we might actually report EAGAIN on read() if there's
3078	* not really any data available. In this situation though we don't
3079	* want to enter a busy loop between poll() reporting a EPOLLIN event
3080	* and read() returning -EAGAIN. Clearing the oa.pollin state here
3081	* effectively ensures we back off until the next hrtimer callback
3082	* before reporting another EPOLLIN event.
3083	* The exception to this is if ops->read() returned -ENOSPC which means
3084	* that more OA data is available than could fit in the user provided
3085	* buffer. In this case we want the next poll() call to not block.
3086	*/
3087	if (ret != -ENOSPC28)
3088	stream->pollin = false0;
3089
3090	/* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
3091	return offset ?: (ret ?: -EAGAIN35);
3092	#endif
3093	}
3094
3095	static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
3096	{
3097	STUB()do { printf("%s: stub\n", __func__); } while(0);
3098	return 0;
3099	#ifdef notyet
3100	struct i915_perf_stream *stream =
3101	container_of(hrtimer, typeof(stream), poll_check_timer)({ const __typeof( ((typeof(stream) )0)->poll_check_timer ) __mptr = (hrtimer); (typeof(stream) )( (char )__mptr - __builtin_offsetof(typeof(stream), poll_check_timer) );});
3102
3103	if (oa_buffer_check_unlocked(stream)) {
3104	stream->pollin = true1;
3105	wake_up(&stream->poll_wq);
3106	}
3107
3108	hrtimer_forward_now(hrtimer,
3109	ns_to_ktime(stream->poll_oa_period));
3110
3111	return HRTIMER_RESTART;
3112	#endif
3113	}
3114
3115	#ifdef notyet
3116
3117	/**
3118	* i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
3119	* @stream: An i915 perf stream
3120	* @file: An i915 perf stream file
3121	* @wait: poll() state table
3122	*
3123	* For handling userspace polling on an i915 perf stream, this calls through to
3124	* &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
3125	* will be woken for new stream data.
3126	*
3127	* Note: The &perf->lock mutex has been taken to serialize
3128	* with any non-file-operation driver hooks.
3129	*
3130	* Returns: any poll events that are ready without sleeping
3131	*/
3132	static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
3133	struct file *file,
3134	poll_table *wait)
3135	{
3136	__poll_t events = 0;
3137
3138	stream->ops->poll_wait(stream, file, wait);
3139
3140	/* Note: we don't explicitly check whether there's something to read
3141	* here since this path may be very hot depending on what else
3142	* userspace is polling, or on the timeout in use. We rely solely on
3143	* the hrtimer/oa_poll_check_timer_cb to notify us when there are
3144	* samples to read.
3145	*/
3146	if (stream->pollin)
3147	events \|= EPOLLIN;
3148
3149	return events;
3150	}
3151
3152	/**
3153	* i915_perf_poll - call poll_wait() with a suitable wait queue for stream
3154	* @file: An i915 perf stream file
3155	* @wait: poll() state table
3156	*
3157	* For handling userspace polling on an i915 perf stream, this ensures
3158	* poll_wait() gets called with a wait queue that will be woken for new stream
3159	* data.
3160	*
3161	* Note: Implementation deferred to i915_perf_poll_locked()
3162	*
3163	* Returns: any poll events that are ready without sleeping
3164	*/
3165	static __poll_t i915_perf_poll(struct file file, poll_table wait)
3166	{
3167	struct i915_perf_stream *stream = file->private_data;
3168	struct i915_perf *perf = stream->perf;
3169	__poll_t ret;
3170
3171	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3172	ret = i915_perf_poll_locked(stream, file, wait);
3173	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3174
3175	return ret;
3176	}
3177
3178	#endif /* notyet */
3179
3180	/**
3181	* i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
3182	* @stream: A disabled i915 perf stream
3183	*
3184	* [Re]enables the associated capture of data for this stream.
3185	*
3186	* If a stream was previously enabled then there's currently no intention
3187	* to provide userspace any guarantee about the preservation of previously
3188	* buffered data.
3189	*/
3190	static void i915_perf_enable_locked(struct i915_perf_stream *stream)
3191	{
3192	if (stream->enabled)
3193	return;
3194
3195	/* Allow stream->ops->enable() to refer to this */
3196	stream->enabled = true1;
3197
3198	if (stream->ops->enable)
3199	stream->ops->enable(stream);
3200
3201	if (stream->hold_preemption)
3202	intel_context_set_nopreempt(stream->pinned_ctx);
3203	}
3204
3205	/**
3206	* i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
3207	* @stream: An enabled i915 perf stream
3208	*
3209	* Disables the associated capture of data for this stream.
3210	*
3211	* The intention is that disabling an re-enabling a stream will ideally be
3212	* cheaper than destroying and re-opening a stream with the same configuration,
3213	* though there are no formal guarantees about what state or buffered data
3214	* must be retained between disabling and re-enabling a stream.
3215	*
3216	* Note: while a stream is disabled it's considered an error for userspace
3217	* to attempt to read from the stream (-EIO).
3218	*/
3219	static void i915_perf_disable_locked(struct i915_perf_stream *stream)
3220	{
3221	if (!stream->enabled)
3222	return;
3223
3224	/* Allow stream->ops->disable() to refer to this */
3225	stream->enabled = false0;
3226
3227	if (stream->hold_preemption)
3228	intel_context_clear_nopreempt(stream->pinned_ctx);
3229
3230	if (stream->ops->disable)
3231	stream->ops->disable(stream);
3232	}
3233
3234	static long i915_perf_config_locked(struct i915_perf_stream *stream,
3235	unsigned long metrics_set)
3236	{
3237	struct i915_oa_config *config;
3238	long ret = stream->oa_config->id;
3239
3240	config = i915_perf_get_oa_config(stream->perf, metrics_set);
3241	if (!config)
3242	return -EINVAL22;
3243
3244	if (config != stream->oa_config) {
3245	int err;
3246
3247	/*
3248	* If OA is bound to a specific context, emit the
3249	* reconfiguration inline from that context. The update
3250	* will then be ordered with respect to submission on that
3251	* context.
3252	*
3253	* When set globally, we use a low priority kernel context,
3254	* so it will effectively take effect when idle.
3255	*/
3256	err = emit_oa_config(stream, config, oa_context(stream), NULL((void *)0));
3257	if (!err)
3258	config = xchg(&stream->oa_config, config)__sync_lock_test_and_set(&stream->oa_config, config);
3259	else
3260	ret = err;
3261	}
3262
3263	i915_oa_config_put(config);
3264
3265	return ret;
3266	}
3267
3268	/**
3269	* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3270	* @stream: An i915 perf stream
3271	* @cmd: the ioctl request
3272	* @arg: the ioctl data
3273	*
3274	* Note: The &perf->lock mutex has been taken to serialize
3275	* with any non-file-operation driver hooks.
3276	*
3277	* Returns: zero on success or a negative error code. Returns -EINVAL for
3278	* an unknown ioctl request.
3279	*/
3280	static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
3281	unsigned int cmd,
3282	unsigned long arg)
3283	{
3284	switch (cmd) {
3285	case I915_PERF_IOCTL_ENABLE((unsigned long)0x20000000 \| ((0 & 0x1fff) << 16) \| ((('i')) << 8) \| ((0x0))):
3286	i915_perf_enable_locked(stream);
3287	return 0;
3288	case I915_PERF_IOCTL_DISABLE((unsigned long)0x20000000 \| ((0 & 0x1fff) << 16) \| ((('i')) << 8) \| ((0x1))):
3289	i915_perf_disable_locked(stream);
3290	return 0;
3291	case I915_PERF_IOCTL_CONFIG((unsigned long)0x20000000 \| ((0 & 0x1fff) << 16) \| ((('i')) << 8) \| ((0x2))):
3292	return i915_perf_config_locked(stream, arg);
3293	}
3294
3295	return -EINVAL22;
3296	}
3297
3298	/**
3299	* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3300	* @file: An i915 perf stream file
3301	* @cmd: the ioctl request
3302	* @arg: the ioctl data
3303	*
3304	* Implementation deferred to i915_perf_ioctl_locked().
3305	*
3306	* Returns: zero on success or a negative error code. Returns -EINVAL for
3307	* an unknown ioctl request.
3308	*/
3309	static long i915_perf_ioctl(struct file *file,
3310	unsigned int cmd,
3311	unsigned long arg)
3312	{
3313	STUB()do { printf("%s: stub\n", __func__); } while(0);
3314	return -ENOSYS78;
3315	#ifdef notyet
3316	struct i915_perf_stream *stream = file->private_data;
3317	struct i915_perf *perf = stream->perf;
3318	long ret;
3319
3320	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3321	ret = i915_perf_ioctl_locked(stream, cmd, arg);
3322	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3323
3324	return ret;
3325	#endif
3326	}
3327
3328	/**
3329	* i915_perf_destroy_locked - destroy an i915 perf stream
3330	* @stream: An i915 perf stream
3331	*
3332	* Frees all resources associated with the given i915 perf @stream, disabling
3333	* any associated data capture in the process.
3334	*
3335	* Note: The &perf->lock mutex has been taken to serialize
3336	* with any non-file-operation driver hooks.
3337	*/
3338	static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
3339	{
3340	if (stream->enabled)
3341	i915_perf_disable_locked(stream);
3342
3343	if (stream->ops->destroy)
3344	stream->ops->destroy(stream);
3345
3346	if (stream->ctx)
3347	i915_gem_context_put(stream->ctx);
3348
3349	kfree(stream);
3350	}
3351
3352	#ifdef notyet
3353
3354	/**
3355	* i915_perf_release - handles userspace close() of a stream file
3356	* @inode: anonymous inode associated with file
3357	* @file: An i915 perf stream file
3358	*
3359	* Cleans up any resources associated with an open i915 perf stream file.
3360	*
3361	* NB: close() can't really fail from the userspace point of view.
3362	*
3363	* Returns: zero on success or a negative error code.
3364	*/
3365	static int i915_perf_release(struct inode inode, struct file file)
3366	{
3367	struct i915_perf_stream *stream = file->private_data;
3368	struct i915_perf *perf = stream->perf;
3369
3370	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3371	i915_perf_destroy_locked(stream);
3372	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3373
3374	/* Release the reference the perf stream kept on the driver. */
3375	drm_dev_put(&perf->i915->drm);
3376
3377	return 0;
3378	}
3379
3380
3381	static const struct file_operations fops = {
3382	.owner = THIS_MODULE((void *)0),
3383	.llseek = no_llseek,
3384	.release = i915_perf_release,
3385	.poll = i915_perf_poll,
3386	.read = i915_perf_read,
3387	.unlocked_ioctl = i915_perf_ioctl,
3388	/* Our ioctl have no arguments, so it's safe to use the same function
3389	* to handle 32bits compatibility.
3390	*/
3391	.compat_ioctl = i915_perf_ioctl,
3392	};
3393
3394	#endif /* notyet */
3395
3396	/**
3397	* i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
3398	* @perf: i915 perf instance
3399	* @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
3400	* @props: individually validated u64 property value pairs
3401	* @file: drm file
3402	*
3403	* See i915_perf_ioctl_open() for interface details.
3404	*
3405	* Implements further stream config validation and stream initialization on
3406	* behalf of i915_perf_open_ioctl() with the &perf->lock mutex
3407	* taken to serialize with any non-file-operation driver hooks.
3408	*
3409	* Note: at this point the @props have only been validated in isolation and
3410	* it's still necessary to validate that the combination of properties makes
3411	* sense.
3412	*
3413	* In the case where userspace is interested in OA unit metrics then further
3414	* config validation and stream initialization details will be handled by
3415	* i915_oa_stream_init(). The code here should only validate config state that
3416	* will be relevant to all stream types / backends.
3417	*
3418	* Returns: zero on success or a negative error code.
3419	*/
3420	static int
3421	i915_perf_open_ioctl_locked(struct i915_perf *perf,
3422	struct drm_i915_perf_open_param *param,
3423	struct perf_open_properties *props,
3424	struct drm_file *file)
3425	{
3426	STUB()do { printf("%s: stub\n", __func__); } while(0);
3427	return -ENOSYS78;
3428	#ifdef notyet
3429	struct i915_gem_context specific_ctx = NULL((void )0);
3430	struct i915_perf_stream stream = NULL((void )0);
3431	unsigned long f_flags = 0;
3432	bool_Bool privileged_op = true1;
3433	int stream_fd;
3434	int ret;
3435
3436	if (props->single_context) {
3437	u32 ctx_handle = props->ctx_handle;
3438	struct drm_i915_file_private *file_priv = file->driver_priv;
3439
3440	specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
3441	if (!specific_ctx) {
3442	DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",__drm_dbg(DRM_UT_CORE, "Failed to look up context with ID %u for opening perf stream\n" , ctx_handle)
3443	ctx_handle)__drm_dbg(DRM_UT_CORE, "Failed to look up context with ID %u for opening perf stream\n" , ctx_handle);
3444	ret = -ENOENT2;
3445	goto err;
3446	}
3447	}
3448
3449	/*
3450	* On Haswell the OA unit supports clock gating off for a specific
3451	* context and in this mode there's no visibility of metrics for the
3452	* rest of the system, which we consider acceptable for a
3453	* non-privileged client.
3454	*
3455	* For Gen8->11 the OA unit no longer supports clock gating off for a
3456	* specific context and the kernel can't securely stop the counters
3457	* from updating as system-wide / global values. Even though we can
3458	* filter reports based on the included context ID we can't block
3459	* clients from seeing the raw / global counter values via
3460	* MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
3461	* enable the OA unit by default.
3462	*
3463	* For Gen12+ we gain a new OAR unit that only monitors the RCS on a
3464	* per context basis. So we can relax requirements there if the user
3465	* doesn't request global stream access (i.e. query based sampling
3466	* using MI_RECORD_PERF_COUNT.
3467	*/
3468	if (IS_HASWELL(perf->i915)IS_PLATFORM(perf->i915, INTEL_HASWELL) && specific_ctx)
3469	privileged_op = false0;
3470	else if (IS_GEN(perf->i915, 12)(0 + (&(perf->i915)->__info)->gen == (12)) && specific_ctx &&
3471	(props->sample_flags & SAMPLE_OA_REPORT(1<<0)) == 0)
3472	privileged_op = false0;
3473
3474	if (props->hold_preemption) {
3475	if (!props->single_context) {
3476	DRM_DEBUG("preemption disable with no context\n")__drm_dbg(DRM_UT_CORE, "preemption disable with no context\n" );
3477	ret = -EINVAL22;
3478	goto err;
3479	}
3480	privileged_op = true1;
3481	}
3482
3483	/*
3484	* Asking for SSEU configuration is a priviliged operation.
3485	*/
3486	if (props->has_sseu)
3487	privileged_op = true1;
3488	else
3489	get_default_sseu_config(&props->sseu, props->engine);
3490
3491	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3492	* we check a dev.i915.perf_stream_paranoid sysctl option
3493	* to determine if it's ok to access system wide OA counters
3494	* without CAP_PERFMON or CAP_SYS_ADMIN privileges.
3495	*/
3496	if (privileged_op &&
3497	i915_perf_stream_paranoid && !perfmon_capable()) {
3498	DRM_DEBUG("Insufficient privileges to open i915 perf stream\n")__drm_dbg(DRM_UT_CORE, "Insufficient privileges to open i915 perf stream\n" );
3499	ret = -EACCES13;
3500	goto err_ctx;
3501	}
3502
3503	stream = kzalloc(sizeof(*stream), GFP_KERNEL(0x0001 \| 0x0004));
3504	if (!stream) {
3505	ret = -ENOMEM12;
3506	goto err_ctx;
3507	}
3508
3509	stream->perf = perf;
3510	stream->ctx = specific_ctx;
3511	stream->poll_oa_period = props->poll_oa_period;
3512
3513	ret = i915_oa_stream_init(stream, param, props);
3514	if (ret)
3515	goto err_alloc;
3516
3517	/* we avoid simply assigning stream->sample_flags = props->sample_flags
3518	* to have _stream_init check the combination of sample flags more
3519	* thoroughly, but still this is the expected result at this point.
3520	*/
3521	if (WARN_ON(stream->sample_flags != props->sample_flags)({ int __ret = !!((stream->sample_flags != props->sample_flags )); if (__ret) printf("%s", "WARN_ON(" "stream->sample_flags != props->sample_flags" ")"); __builtin_expect(!!(__ret), 0); })) {
3522	ret = -ENODEV19;
3523	goto err_flags;
3524	}
3525
3526	if (param->flags & I915_PERF_FLAG_FD_CLOEXEC(1<<0))
3527	f_flags \|= O_CLOEXEC0x10000;
3528	if (param->flags & I915_PERF_FLAG_FD_NONBLOCK(1<<1))
3529	f_flags \|= O_NONBLOCK0x0004;
3530
3531	stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
3532	if (stream_fd < 0) {
3533	ret = stream_fd;
3534	goto err_flags;
3535	}
3536
3537	if (!(param->flags & I915_PERF_FLAG_DISABLED(1<<2)))
3538	i915_perf_enable_locked(stream);
3539
3540	/* Take a reference on the driver that will be kept with stream_fd
3541	* until its release.
3542	*/
3543	drm_dev_get(&perf->i915->drm);
3544
3545	return stream_fd;
3546
3547	err_flags:
3548	if (stream->ops->destroy)
3549	stream->ops->destroy(stream);
3550	err_alloc:
3551	kfree(stream);
3552	err_ctx:
3553	if (specific_ctx)
3554	i915_gem_context_put(specific_ctx);
3555	err:
3556	return ret;
3557	#endif
3558	}
3559
3560	static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
3561	{
3562	return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent);
3563	}
3564
3565	/**
3566	* read_properties_unlocked - validate + copy userspace stream open properties
3567	* @perf: i915 perf instance
3568	* @uprops: The array of u64 key value pairs given by userspace
3569	* @n_props: The number of key value pairs expected in @uprops
3570	* @props: The stream configuration built up while validating properties
3571	*
3572	* Note this function only validates properties in isolation it doesn't
3573	* validate that the combination of properties makes sense or that all
3574	* properties necessary for a particular kind of stream have been set.
3575	*
3576	* Note that there currently aren't any ordering requirements for properties so
3577	* we shouldn't validate or assume anything about ordering here. This doesn't
3578	* rule out defining new properties with ordering requirements in the future.
3579	*/
3580	static int read_properties_unlocked(struct i915_perf *perf,
3581	u64 __user *uprops,
3582	u32 n_props,
3583	struct perf_open_properties *props)
3584	{
3585	u64 __user *uprop = uprops;
3586	u32 i;
3587	int ret;
3588
3589	memset(props, 0, sizeof(struct perf_open_properties))__builtin_memset((props), (0), (sizeof(struct perf_open_properties )));
3590	props->poll_oa_period = DEFAULT_POLL_PERIOD_NS(1000000000L / 200);
3591
3592	if (!n_props) {
3593	DRM_DEBUG("No i915 perf properties given\n")__drm_dbg(DRM_UT_CORE, "No i915 perf properties given\n");
3594	return -EINVAL22;
3595	}
3596
3597	/* At the moment we only support using i915-perf on the RCS. */
3598	props->engine = intel_engine_lookup_user(perf->i915,
3599	I915_ENGINE_CLASS_RENDER,
3600	0);
3601	if (!props->engine) {
3602	DRM_DEBUG("No RENDER-capable engines\n")__drm_dbg(DRM_UT_CORE, "No RENDER-capable engines\n");
3603	return -EINVAL22;
3604	}
3605
3606	/* Considering that ID = 0 is reserved and assuming that we don't
3607	* (currently) expect any configurations to ever specify duplicate
3608	* values for a particular property ID then the last _PROP_MAX value is
3609	* one greater than the maximum number of properties we expect to get
3610	* from userspace.
3611	*/
3612	if (n_props >= DRM_I915_PERF_PROP_MAX) {
3613	DRM_DEBUG("More i915 perf properties specified than exist\n")__drm_dbg(DRM_UT_CORE, "More i915 perf properties specified than exist\n" );
3614	return -EINVAL22;
3615	}
3616
3617	for (i = 0; i < n_props; i++) {
3618	u64 oa_period, oa_freq_hz;
3619	u64 id, value;
3620
3621	ret = get_user(id, uprop)-copyin(uprop, &(id), sizeof(id));
3622	if (ret)
3623	return ret;
3624
3625	ret = get_user(value, uprop + 1)-copyin(uprop + 1, &(value), sizeof(value));
3626	if (ret)
3627	return ret;
3628
3629	if (id == 0 \|\| id >= DRM_I915_PERF_PROP_MAX) {
3630	DRM_DEBUG("Unknown i915 perf property ID\n")__drm_dbg(DRM_UT_CORE, "Unknown i915 perf property ID\n");
3631	return -EINVAL22;
3632	}
3633
3634	switch ((enum drm_i915_perf_property_id)id) {
3635	case DRM_I915_PERF_PROP_CTX_HANDLE:
3636	props->single_context = 1;
3637	props->ctx_handle = value;
3638	break;
3639	case DRM_I915_PERF_PROP_SAMPLE_OA:
3640	if (value)
3641	props->sample_flags \|= SAMPLE_OA_REPORT(1<<0);
3642	break;
3643	case DRM_I915_PERF_PROP_OA_METRICS_SET:
3644	if (value == 0) {
3645	DRM_DEBUG("Unknown OA metric set ID\n")__drm_dbg(DRM_UT_CORE, "Unknown OA metric set ID\n");
3646	return -EINVAL22;
3647	}
3648	props->metrics_set = value;
3649	break;
3650	case DRM_I915_PERF_PROP_OA_FORMAT:
3651	if (value == 0 \|\| value >= I915_OA_FORMAT_MAX) {
3652	DRM_DEBUG("Out-of-range OA report format %llu\n",__drm_dbg(DRM_UT_CORE, "Out-of-range OA report format %llu\n" , value)
3653	value)__drm_dbg(DRM_UT_CORE, "Out-of-range OA report format %llu\n" , value);
3654	return -EINVAL22;
3655	}
3656	if (!perf->oa_formats[value].size) {
3657	DRM_DEBUG("Unsupported OA report format %llu\n",__drm_dbg(DRM_UT_CORE, "Unsupported OA report format %llu\n", value)
3658	value)__drm_dbg(DRM_UT_CORE, "Unsupported OA report format %llu\n", value);
3659	return -EINVAL22;
3660	}
3661	props->oa_format = value;
3662	break;
3663	case DRM_I915_PERF_PROP_OA_EXPONENT:
3664	if (value > OA_EXPONENT_MAX31) {
3665	DRM_DEBUG("OA timer exponent too high (> %u)\n",__drm_dbg(DRM_UT_CORE, "OA timer exponent too high (> %u)\n" , 31)
3666	OA_EXPONENT_MAX)__drm_dbg(DRM_UT_CORE, "OA timer exponent too high (> %u)\n" , 31);
3667	return -EINVAL22;
3668	}
3669
3670	/* Theoretically we can program the OA unit to sample
3671	* e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
3672	* for BXT. We don't allow such high sampling
3673	* frequencies by default unless root.
3674	*/
3675
3676	BUILD_BUG_ON(sizeof(oa_period) != 8)extern char _ctassert[(!(sizeof(oa_period) != 8)) ? 1 : -1 ] __attribute__ ((__unused__));
3677	oa_period = oa_exponent_to_ns(perf, value);
3678
3679	/* This check is primarily to ensure that oa_period <=
3680	* UINT32_MAX (before passing to do_div which only
3681	* accepts a u32 denominator), but we can also skip
3682	* checking anything < 1Hz which implicitly can't be
3683	* limited via an integer oa_max_sample_rate.
3684	*/
3685	if (oa_period <= NSEC_PER_SEC1000000000L) {
3686	u64 tmp = NSEC_PER_SEC1000000000L;
3687	do_div(tmp, oa_period)({ uint32_t __base = (oa_period); uint32_t __rem = ((uint64_t )(tmp)) % __base; (tmp) = ((uint64_t)(tmp)) / __base; __rem; } );
3688	oa_freq_hz = tmp;
3689	} else
3690	oa_freq_hz = 0;
3691
3692	if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) {
3693	DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n",__drm_dbg(DRM_UT_CORE, "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n" , i915_oa_max_sample_rate)
3694	i915_oa_max_sample_rate)__drm_dbg(DRM_UT_CORE, "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n" , i915_oa_max_sample_rate);
3695	return -EACCES13;
3696	}
3697
3698	props->oa_periodic = true1;
3699	props->oa_period_exponent = value;
3700	break;
3701	case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
3702	props->hold_preemption = !!value;
3703	break;
3704	case DRM_I915_PERF_PROP_GLOBAL_SSEU: {
3705	struct drm_i915_gem_context_param_sseu user_sseu;
3706
3707	if (copy_from_user(&user_sseu,
3708	u64_to_user_ptr(value)((void *)(uintptr_t)(value)),
3709	sizeof(user_sseu))) {
3710	DRM_DEBUG("Unable to copy global sseu parameter\n")__drm_dbg(DRM_UT_CORE, "Unable to copy global sseu parameter\n" );
3711	return -EFAULT14;
3712	}
3713
3714	ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
3715	if (ret) {
3716	DRM_DEBUG("Invalid SSEU configuration\n")__drm_dbg(DRM_UT_CORE, "Invalid SSEU configuration\n");
3717	return ret;
3718	}
3719	props->has_sseu = true1;
3720	break;
3721	}
3722	case DRM_I915_PERF_PROP_POLL_OA_PERIOD:
3723	if (value < 100000 /* 100us */) {
3724	DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n",__drm_dbg(DRM_UT_CORE, "OA availability timer too small (%lluns < 100us)\n" , value)
3725	value)__drm_dbg(DRM_UT_CORE, "OA availability timer too small (%lluns < 100us)\n" , value);
3726	return -EINVAL22;
3727	}
3728	props->poll_oa_period = value;
3729	break;
3730	case DRM_I915_PERF_PROP_MAX:
3731	MISSING_CASE(id)({ int __ret = !!(1); if (__ret) printf("Missing case (%s == %ld)\n" , "id", (long)(id)); __builtin_expect(!!(__ret), 0); });
3732	return -EINVAL22;
3733	}
3734
3735	uprop += 2;
3736	}
3737
3738	return 0;
3739	}
3740
3741	/**
3742	* i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
3743	* @dev: drm device
3744	* @data: ioctl data copied from userspace (unvalidated)
3745	* @file: drm file
3746	*
3747	* Validates the stream open parameters given by userspace including flags
3748	* and an array of u64 key, value pair properties.
3749	*
3750	* Very little is assumed up front about the nature of the stream being
3751	* opened (for instance we don't assume it's for periodic OA unit metrics). An
3752	* i915-perf stream is expected to be a suitable interface for other forms of
3753	* buffered data written by the GPU besides periodic OA metrics.
3754	*
3755	* Note we copy the properties from userspace outside of the i915 perf
3756	* mutex to avoid an awkward lockdep with mmap_lock.
3757	*
3758	* Most of the implementation details are handled by
3759	* i915_perf_open_ioctl_locked() after taking the &perf->lock
3760	* mutex for serializing with any non-file-operation driver hooks.
3761	*
3762	* Return: A newly opened i915 Perf stream file descriptor or negative
3763	* error code on failure.
3764	*/
3765	int i915_perf_open_ioctl(struct drm_device dev, void data,
3766	struct drm_file *file)
3767	{
3768	struct i915_perf *perf = &to_i915(dev)->perf;
3769	struct drm_i915_perf_open_param *param = data;
3770	struct perf_open_properties props;
3771	u32 known_open_flags;
3772	int ret;
3773
3774	if (!perf->i915) {
3775	DRM_DEBUG("i915 perf interface not available for this system\n")__drm_dbg(DRM_UT_CORE, "i915 perf interface not available for this system\n" );
3776	return -ENOTSUPP91;
3777	}
3778
3779	known_open_flags = I915_PERF_FLAG_FD_CLOEXEC(1<<0) \|
3780	I915_PERF_FLAG_FD_NONBLOCK(1<<1) \|
3781	I915_PERF_FLAG_DISABLED(1<<2);
3782	if (param->flags & ~known_open_flags) {
3783	DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n")__drm_dbg(DRM_UT_CORE, "Unknown drm_i915_perf_open_param flag\n" );
3784	return -EINVAL22;
3785	}
3786
3787	ret = read_properties_unlocked(perf,
3788	u64_to_user_ptr(param->properties_ptr)((void *)(uintptr_t)(param->properties_ptr)),
3789	param->num_properties,
3790	&props);
3791	if (ret)
3792	return ret;
3793
3794	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3795	ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
3796	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3797
3798	return ret;
3799	}
3800
3801	/**
3802	* i915_perf_register - exposes i915-perf to userspace
3803	* @i915: i915 device instance
3804	*
3805	* In particular OA metric sets are advertised under a sysfs metrics/
3806	* directory allowing userspace to enumerate valid IDs that can be
3807	* used to open an i915-perf stream.
3808	*/
3809	void i915_perf_register(struct drm_i915_privateinteldrm_softc *i915)
3810	{
3811	#ifdef __linux__
3812	struct i915_perf *perf = &i915->perf;
3813
3814	if (!perf->i915)
3815	return;
3816
3817	/* To be sure we're synchronized with an attempted
3818	* i915_perf_open_ioctl(); considering that we register after
3819	* being exposed to userspace.
3820	*/
3821	mutex_lock(&perf->lock)rw_enter_write(&perf->lock);
3822
3823	perf->metrics_kobj =
3824	kobject_create_and_add("metrics",
3825	&i915->drm.primary->kdev->kobj);
3826
3827	mutex_unlock(&perf->lock)rw_exit_write(&perf->lock);
3828	#endif
3829	}
3830
3831	/**
3832	* i915_perf_unregister - hide i915-perf from userspace
3833	* @i915: i915 device instance
3834	*
3835	* i915-perf state cleanup is split up into an 'unregister' and
3836	* 'deinit' phase where the interface is first hidden from
3837	* userspace by i915_perf_unregister() before cleaning up
3838	* remaining state in i915_perf_fini().
3839	*/
3840	void i915_perf_unregister(struct drm_i915_privateinteldrm_softc *i915)
3841	{
3842	struct i915_perf *perf = &i915->perf;
3843
3844	if (!perf->metrics_kobj)
3845	return;
3846
3847	kobject_put(perf->metrics_kobj);
3848	perf->metrics_kobj = NULL((void *)0);
3849	}
3850
3851	static bool_Bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
3852	{
3853	static const i915_reg_t flex_eu_regs[] = {
3854	EU_PERF_CNTL0((const i915_reg_t){ .reg = (0xe458) }),
3855	EU_PERF_CNTL1((const i915_reg_t){ .reg = (0xe558) }),
3856	EU_PERF_CNTL2((const i915_reg_t){ .reg = (0xe658) }),
3857	EU_PERF_CNTL3((const i915_reg_t){ .reg = (0xe758) }),
3858	EU_PERF_CNTL4((const i915_reg_t){ .reg = (0xe45c) }),
3859	EU_PERF_CNTL5((const i915_reg_t){ .reg = (0xe55c) }),
3860	EU_PERF_CNTL6((const i915_reg_t){ .reg = (0xe65c) }),
3861	};
3862	int i;
3863
3864	for (i = 0; i < ARRAY_SIZE(flex_eu_regs)(sizeof((flex_eu_regs)) / sizeof((flex_eu_regs)[0])); i++) {
3865	if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr)
3866	return true1;
3867	}
3868	return false0;
3869	}
3870
3871	#define ADDR_IN_RANGE(addr, start, end)((addr) >= (start) && (addr) <= (end)) \
3872	((addr) >= (start) && \
3873	(addr) <= (end))
3874
3875	#define REG_IN_RANGE(addr, start, end)((addr) >= i915_mmio_reg_offset(start) && (addr) <= i915_mmio_reg_offset(end)) \
3876	((addr) >= i915_mmio_reg_offset(start) && \
3877	(addr) <= i915_mmio_reg_offset(end))
3878
3879	#define REG_EQUAL(addr, mmio)((addr) == i915_mmio_reg_offset(mmio)) \
3880	((addr) == i915_mmio_reg_offset(mmio))
3881
3882	static bool_Bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3883	{
3884	return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x2710) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x272c) }))) \|\|
3885	REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x2740) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x275c) }))) \|\|
3886	REG_IN_RANGE(addr, OACEC0_0, OACEC7_1)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x2770) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x27ac) })));
3887	}
3888
3889	static bool_Bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3890	{
3891	return REG_EQUAL(addr, HALF_SLICE_CHICKEN2)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0xe180) }))) \|\|
3892	REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x9800) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x9888) }))) \|\|
3893	REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x91B8) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x91C4) }))) \|\|
3894	REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x91C8) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x91CC) })));
3895	}
3896
3897	static bool_Bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3898	{
3899	return gen7_is_valid_mux_addr(perf, addr) \|\|
3900	REG_EQUAL(addr, WAIT_FOR_RC6_EXIT)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x20CC) }))) \|\|
3901	REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8))((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x0D00) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x0D0C + (8) * 4) })));
3902	}
3903
3904	static bool_Bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3905	{
3906	return gen8_is_valid_mux_addr(perf, addr) \|\|
3907	REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x9884) }))) \|\|
3908	REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x91C8) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x91DC) })));
3909	}
3910
3911	static bool_Bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3912	{
3913	return gen7_is_valid_mux_addr(perf, addr) \|\|
3914	ADDR_IN_RANGE(addr, 0x25100, 0x2FF90)((addr) >= (0x25100) && (addr) <= (0x2FF90)) \|\|
3915	REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x9E80) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0x9EA4) }))) \|\|
3916	REG_EQUAL(addr, HSW_MBVID2_MISR0)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x9EC0) })));
3917	}
3918
3919	static bool_Bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3920	{
3921	return gen7_is_valid_mux_addr(perf, addr) \|\|
3922	ADDR_IN_RANGE(addr, 0x182300, 0x1823A4)((addr) >= (0x182300) && (addr) <= (0x1823A4));
3923	}
3924
3925	static bool_Bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
3926	{
3927	return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0xd900) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0xd91c) }))) \|\|
3928	REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0xd920) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0xd93c) }))) \|\|
3929	REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0xd940) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0xd97c) }))) \|\|
3930	REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1)((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0xdc00) })) && (addr) <= i915_mmio_reg_offset((( const i915_reg_t){ .reg = (0xdc3c) }))) \|\|
3931	REG_EQUAL(addr, GEN12_OAA_DBG_REG)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0xdc44) }))) \|\|
3932	REG_EQUAL(addr, GEN12_OAG_OA_PESS)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x2b2c) }))) \|\|
3933	REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0xdc40) })));
3934	}
3935
3936	static bool_Bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
3937	{
3938	return REG_EQUAL(addr, NOA_WRITE)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x9888) }))) \|\|
3939	REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x9884) }))) \|\|
3940	REG_EQUAL(addr, GDT_CHICKEN_BITS)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x9840) }))) \|\|
3941	REG_EQUAL(addr, WAIT_FOR_RC6_EXIT)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x20CC) }))) \|\|
3942	REG_EQUAL(addr, RPM_CONFIG0)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x0D00) }))) \|\|
3943	REG_EQUAL(addr, RPM_CONFIG1)((addr) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = ( 0x0D04) }))) \|\|
3944	REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8))((addr) >= i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x0D0C + (0) * 4) })) && (addr) <= i915_mmio_reg_offset (((const i915_reg_t){ .reg = (0x0D0C + (8) * 4) })));
3945	}
3946
3947	static u32 mask_reg_value(u32 reg, u32 val)
3948	{
3949	/* HALF_SLICE_CHICKEN2 is programmed with a the
3950	* WaDisableSTUnitPowerOptimization workaround. Make sure the value
3951	* programmed by userspace doesn't change this.
3952	*/
3953	if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)((reg) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0xe180 ) }))))
3954	val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE)({ typeof((1 << 13)) _a = ((1 << 13)); ({ if (__builtin_constant_p (_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a) && __builtin_constant_p (_a)) do { } while (0); ((_a) << 16 \| (_a)); }); });
3955
3956	/* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function
3957	* indicated by its name and a bunch of selection fields used by OA
3958	* configs.
3959	*/
3960	if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)((reg) == i915_mmio_reg_offset(((const i915_reg_t){ .reg = (0x20CC ) }))))
3961	val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE)({ typeof((1 << 0)) _a = ((1 << 0)); ({ if (__builtin_constant_p (_a)) do { } while (0); if (__builtin_constant_p(_a)) do { } while (0); if (__builtin_constant_p(_a) && __builtin_constant_p (_a)) do { } while (0); ((_a) << 16 \| (_a)); }); });
3962
3963	return val;
3964	}
3965
3966	static struct i915_oa_reg alloc_oa_regs(struct i915_perf perf,
3967	bool_Bool (is_valid)(struct i915_perf perf, u32 addr),
3968	u32 __user *regs,
3969	u32 n_regs)
3970	{
3971	struct i915_oa_reg *oa_regs;
3972	int err;
3973	u32 i;
3974
3975	if (!n_regs)
3976	return NULL((void *)0);
3977
3978	/* No is_valid function means we're not allowing any register to be programmed. */
3979	GEM_BUG_ON(!is_valid)((void)0);
3980	if (!is_valid)
3981	return ERR_PTR(-EINVAL22);
3982
3983	oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL(0x0001 \| 0x0004));
3984	if (!oa_regs)
3985	return ERR_PTR(-ENOMEM12);
3986
3987	for (i = 0; i < n_regs; i++) {
3988	u32 addr, value;
3989
3990	err = get_user(addr, regs)-copyin(regs, &(addr), sizeof(addr));
3991	if (err)
3992	goto addr_err;
3993
3994	if (!is_valid(perf, addr)) {
3995	DRM_DEBUG("Invalid oa_reg address: %X\n", addr)__drm_dbg(DRM_UT_CORE, "Invalid oa_reg address: %X\n", addr);
3996	err = -EINVAL22;
3997	goto addr_err;
3998	}
3999
4000	err = get_user(value, regs + 1)-copyin(regs + 1, &(value), sizeof(value));
4001	if (err)
4002	goto addr_err;
4003
4004	oa_regs[i].addr = _MMIO(addr)((const i915_reg_t){ .reg = (addr) });
4005	oa_regs[i].value = mask_reg_value(addr, value);
4006
4007	regs += 2;
4008	}
4009
4010	return oa_regs;
4011
4012	addr_err:
4013	kfree(oa_regs);
4014	return ERR_PTR(err);
4015	}
4016
4017	static ssize_t show_dynamic_id(struct device *dev,
4018	struct device_attribute *attr,
4019	char *buf)
4020	{
4021	STUB()do { printf("%s: stub\n", __func__); } while(0);
4022	return -ENOSYS78;
4023	#ifdef notyet
4024	struct i915_oa_config *oa_config =
4025	container_of(attr, typeof(oa_config), sysfs_metric_id)({ const __typeof( ((typeof(oa_config) )0)->sysfs_metric_id ) __mptr = (attr); (typeof(oa_config) )( (char )__mptr - __builtin_offsetof(typeof(oa_config), sysfs_metric_id) );});
4026
4027	return sprintf(buf, "%d\n", oa_config->id);
4028	#endif
4029	}
4030
4031	static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
4032	struct i915_oa_config *oa_config)
4033	{
4034	STUB()do { printf("%s: stub\n", __func__); } while(0);
4035	return -ENOSYS78;
4036	#ifdef notyet
4037	sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
4038	oa_config->sysfs_metric_id.attr.name = "id";
4039	oa_config->sysfs_metric_id.attr.mode = S_IRUGO;
4040	oa_config->sysfs_metric_id.show = show_dynamic_id;
4041	oa_config->sysfs_metric_id.store = NULL((void *)0);
4042
4043	oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
4044	oa_config->attrs[1] = NULL((void *)0);
4045
4046	oa_config->sysfs_metric.name = oa_config->uuid;
4047	oa_config->sysfs_metric.attrs = oa_config->attrs;
4048
4049	return sysfs_create_group(perf->metrics_kobj,0
4050	&oa_config->sysfs_metric)0;
4051	#endif
4052	}
4053
4054	/**
4055	* i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config
4056	* @dev: drm device
4057	* @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from
4058	* userspace (unvalidated)
4059	* @file: drm file
4060	*
4061	* Validates the submitted OA register to be saved into a new OA config that
4062	* can then be used for programming the OA unit and its NOA network.
4063	*
4064	* Returns: A new allocated config number to be used with the perf open ioctl
4065	* or a negative error code on failure.
4066	*/
4067	int i915_perf_add_config_ioctl(struct drm_device dev, void data,
4068	struct drm_file *file)
4069	{
4070	STUB()do { printf("%s: stub\n", __func__); } while(0);
4071	return -ENOSYS78;
4072	#ifdef notyet
4073	struct i915_perf *perf = &to_i915(dev)->perf;
4074	struct drm_i915_perf_oa_config *args = data;
4075	struct i915_oa_config oa_config, tmp;
4076	struct i915_oa_reg *regs;
4077	int err, id;
4078
4079	if (!perf->i915) {
4080	DRM_DEBUG("i915 perf interface not available for this system\n")__drm_dbg(DRM_UT_CORE, "i915 perf interface not available for this system\n" );
4081	return -ENOTSUPP91;
4082	}
4083
4084	if (!perf->metrics_kobj) {
4085	DRM_DEBUG("OA metrics weren't advertised via sysfs\n")__drm_dbg(DRM_UT_CORE, "OA metrics weren't advertised via sysfs\n" );
4086	return -EINVAL22;
4087	}
4088
4089	if (i915_perf_stream_paranoid && !perfmon_capable()) {
4090	DRM_DEBUG("Insufficient privileges to add i915 OA config\n")__drm_dbg(DRM_UT_CORE, "Insufficient privileges to add i915 OA config\n" );
4091	return -EACCES13;
4092	}
4093
4094	if ((!args->mux_regs_ptr \|\| !args->n_mux_regs) &&
4095	(!args->boolean_regs_ptr \|\| !args->n_boolean_regs) &&
4096	(!args->flex_regs_ptr \|\| !args->n_flex_regs)) {
4097	DRM_DEBUG("No OA registers given\n")__drm_dbg(DRM_UT_CORE, "No OA registers given\n");
4098	return -EINVAL22;
4099	}
4100
4101	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL(0x0001 \| 0x0004));
4102	if (!oa_config) {
4103	DRM_DEBUG("Failed to allocate memory for the OA config\n")__drm_dbg(DRM_UT_CORE, "Failed to allocate memory for the OA config\n" );
4104	return -ENOMEM12;
4105	}
4106
4107	oa_config->perf = perf;
4108	kref_init(&oa_config->ref);
4109
4110	if (!uuid_is_valid(args->uuid)) {
4111	DRM_DEBUG("Invalid uuid format for OA config\n")__drm_dbg(DRM_UT_CORE, "Invalid uuid format for OA config\n");
4112	err = -EINVAL22;
4113	goto reg_err;
4114	}
4115
4116	/* Last character in oa_config->uuid will be 0 because oa_config is
4117	* kzalloc.
4118	*/
4119	memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid))__builtin_memcpy((oa_config->uuid), (args->uuid), (sizeof (args->uuid)));
4120
4121	oa_config->mux_regs_len = args->n_mux_regs;
4122	regs = alloc_oa_regs(perf,
4123	perf->ops.is_valid_mux_reg,
4124	u64_to_user_ptr(args->mux_regs_ptr)((void *)(uintptr_t)(args->mux_regs_ptr)),
4125	args->n_mux_regs);
4126
4127	if (IS_ERR(regs)) {
4128	DRM_DEBUG("Failed to create OA config for mux_regs\n")__drm_dbg(DRM_UT_CORE, "Failed to create OA config for mux_regs\n" );
4129	err = PTR_ERR(regs);
4130	goto reg_err;
4131	}
4132	oa_config->mux_regs = regs;
4133
4134	oa_config->b_counter_regs_len = args->n_boolean_regs;
4135	regs = alloc_oa_regs(perf,
4136	perf->ops.is_valid_b_counter_reg,
4137	u64_to_user_ptr(args->boolean_regs_ptr)((void *)(uintptr_t)(args->boolean_regs_ptr)),
4138	args->n_boolean_regs);
4139
4140	if (IS_ERR(regs)) {
4141	DRM_DEBUG("Failed to create OA config for b_counter_regs\n")__drm_dbg(DRM_UT_CORE, "Failed to create OA config for b_counter_regs\n" );
4142	err = PTR_ERR(regs);
4143	goto reg_err;
4144	}
4145	oa_config->b_counter_regs = regs;
4146
4147	if (INTEL_GEN(perf->i915)((&(perf->i915)->__info)->gen) < 8) {
4148	if (args->n_flex_regs != 0) {
4149	err = -EINVAL22;
4150	goto reg_err;
4151	}
4152	} else {
4153	oa_config->flex_regs_len = args->n_flex_regs;
4154	regs = alloc_oa_regs(perf,
4155	perf->ops.is_valid_flex_reg,
4156	u64_to_user_ptr(args->flex_regs_ptr)((void *)(uintptr_t)(args->flex_regs_ptr)),
4157	args->n_flex_regs);
4158
4159	if (IS_ERR(regs)) {
4160	DRM_DEBUG("Failed to create OA config for flex_regs\n")__drm_dbg(DRM_UT_CORE, "Failed to create OA config for flex_regs\n" );
4161	err = PTR_ERR(regs);
4162	goto reg_err;
4163	}
4164	oa_config->flex_regs = regs;
4165	}
4166
4167	err = mutex_lock_interruptible(&perf->metrics_lock);
4168	if (err)
4169	goto reg_err;
4170
4171	/* We shouldn't have too many configs, so this iteration shouldn't be
4172	* too costly.
4173	*/
4174	idr_for_each_entry(&perf->metrics_idr, tmp, id)for (id = 0; ((tmp) = idr_get_next(&perf->metrics_idr, &(id))) != ((void *)0); id++) {
4175	if (!strcmp(tmp->uuid, oa_config->uuid)) {
4176	DRM_DEBUG("OA config already exists with this uuid\n")__drm_dbg(DRM_UT_CORE, "OA config already exists with this uuid\n" );
4177	err = -EADDRINUSE48;
4178	goto sysfs_err;
4179	}
4180	}
4181
4182	err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4183	if (err) {
4184	DRM_DEBUG("Failed to create sysfs entry for OA config\n")__drm_dbg(DRM_UT_CORE, "Failed to create sysfs entry for OA config\n" );
4185	goto sysfs_err;
4186	}
4187
4188	/* Config id 0 is invalid, id 1 for kernel stored test config. */
4189	oa_config->id = idr_alloc(&perf->metrics_idr,
4190	oa_config, 2,
4191	0, GFP_KERNEL(0x0001 \| 0x0004));
4192	if (oa_config->id < 0) {
4193	DRM_DEBUG("Failed to create sysfs entry for OA config\n")__drm_dbg(DRM_UT_CORE, "Failed to create sysfs entry for OA config\n" );
4194	err = oa_config->id;
4195	goto sysfs_err;
4196	}
4197
4198	mutex_unlock(&perf->metrics_lock)rw_exit_write(&perf->metrics_lock);
4199
4200	DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id)__drm_dbg(DRM_UT_CORE, "Added config %s id=%i\n", oa_config-> uuid, oa_config->id);
4201
4202	return oa_config->id;
4203
4204	sysfs_err:
4205	mutex_unlock(&perf->metrics_lock)rw_exit_write(&perf->metrics_lock);
4206	reg_err:
4207	i915_oa_config_put(oa_config);
4208	DRM_DEBUG("Failed to add new OA config\n")__drm_dbg(DRM_UT_CORE, "Failed to add new OA config\n");
4209	return err;
4210	#endif
4211	}
4212
4213	/**
4214	* i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config
4215	* @dev: drm device
4216	* @data: ioctl data (pointer to u64 integer) copied from userspace
4217	* @file: drm file
4218	*
4219	* Configs can be removed while being used, the will stop appearing in sysfs
4220	* and their content will be freed when the stream using the config is closed.
4221	*
4222	* Returns: 0 on success or a negative error code on failure.
4223	*/
4224	int i915_perf_remove_config_ioctl(struct drm_device dev, void data,
4225	struct drm_file *file)
4226	{
4227	struct i915_perf *perf = &to_i915(dev)->perf;
4228	u64 *arg = data;
4229	struct i915_oa_config *oa_config;
4230	int ret;
4231
4232	if (!perf->i915) {
4233	DRM_DEBUG("i915 perf interface not available for this system\n")__drm_dbg(DRM_UT_CORE, "i915 perf interface not available for this system\n" );
4234	return -ENOTSUPP91;
4235	}
4236
4237	if (i915_perf_stream_paranoid && !perfmon_capable()) {
4238	DRM_DEBUG("Insufficient privileges to remove i915 OA config\n")__drm_dbg(DRM_UT_CORE, "Insufficient privileges to remove i915 OA config\n" );
4239	return -EACCES13;
4240	}
4241
4242	ret = mutex_lock_interruptible(&perf->metrics_lock);
4243	if (ret)
4244	return ret;
4245
4246	oa_config = idr_find(&perf->metrics_idr, *arg);
4247	if (!oa_config) {
4248	DRM_DEBUG("Failed to remove unknown OA config\n")__drm_dbg(DRM_UT_CORE, "Failed to remove unknown OA config\n" );
4249	ret = -ENOENT2;
4250	goto err_unlock;
4251	}
4252
4253	GEM_BUG_ON(*arg != oa_config->id)((void)0);
4254
4255	sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4256
4257	idr_remove(&perf->metrics_idr, *arg);
4258
4259	mutex_unlock(&perf->metrics_lock)rw_exit_write(&perf->metrics_lock);
4260
4261	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id)__drm_dbg(DRM_UT_CORE, "Removed config %s id=%i\n", oa_config ->uuid, oa_config->id);
4262
4263	i915_oa_config_put(oa_config);
4264
4265	return 0;
4266
4267	err_unlock:
4268	mutex_unlock(&perf->metrics_lock)rw_exit_write(&perf->metrics_lock);
4269	return ret;
4270	}
4271
4272	#ifdef notyet
4273	static struct ctl_table oa_table[] = {
4274	{
4275	.procname = "perf_stream_paranoid",
4276	.data = &i915_perf_stream_paranoid,
4277	.maxlen = sizeof(i915_perf_stream_paranoid),
4278	.mode = 0644,
4279	.proc_handler = proc_dointvec_minmax,
4280	.extra1 = SYSCTL_ZERO,
4281	.extra2 = SYSCTL_ONE,
4282	},
4283	{
4284	.procname = "oa_max_sample_rate",
4285	.data = &i915_oa_max_sample_rate,
4286	.maxlen = sizeof(i915_oa_max_sample_rate),
4287	.mode = 0644,
4288	.proc_handler = proc_dointvec_minmax,
4289	.extra1 = SYSCTL_ZERO,
4290	.extra2 = &oa_sample_rate_hard_limit,
4291	},
4292	{}
4293	};
4294
4295	static struct ctl_table i915_root[] = {
4296	{
4297	.procname = "i915",
4298	.maxlen = 0,
4299	.mode = 0555,
4300	.child = oa_table,
4301	},
4302	{}
4303	};
4304
4305	static struct ctl_table dev_root[] = {
4306	{
4307	.procname = "dev",
4308	.maxlen = 0,
4309	.mode = 0555,
4310	.child = i915_root,
4311	},
4312	{}
4313	};
4314	#endif
4315
4316	/**
4317	* i915_perf_init - initialize i915-perf state on module bind
4318	* @i915: i915 device instance
4319	*
4320	* Initializes i915-perf state without exposing anything to userspace.
4321	*
4322	* Note: i915-perf initialization is split into an 'init' and 'register'
4323	* phase with the i915_perf_register() exposing state to userspace.
4324	*/
4325	void i915_perf_init(struct drm_i915_privateinteldrm_softc *i915)
4326	{
4327	struct i915_perf *perf = &i915->perf;
4328
4329	/* XXX const struct i915_perf_ops! */
4330
4331	if (IS_HASWELL(i915)IS_PLATFORM(i915, INTEL_HASWELL)) {
4332	perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
4333	perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
4334	perf->ops.is_valid_flex_reg = NULL((void *)0);
4335	perf->ops.enable_metric_set = hsw_enable_metric_set;
4336	perf->ops.disable_metric_set = hsw_disable_metric_set;
4337	perf->ops.oa_enable = gen7_oa_enable;
4338	perf->ops.oa_disable = gen7_oa_disable;
4339	perf->ops.read = gen7_oa_read;
4340	perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
4341
4342	perf->oa_formats = hsw_oa_formats;
4343	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)((&(i915)->__info)->has_logical_ring_contexts)) {
4344	/* Note: that although we could theoretically also support the
4345	* legacy ringbuffer mode on BDW (and earlier iterations of
4346	* this driver, before upstreaming did this) it didn't seem
4347	* worth the complexity to maintain now that BDW+ enable
4348	* execlist mode by default.
4349	*/
4350	perf->ops.read = gen8_oa_read;
4351
4352	if (IS_GEN_RANGE(i915, 8, 9)(!!((&(i915)->__info)->gen_mask & ( 0 + 0 + ((( ~0UL) >> (64 - (((9)) - 1) - 1)) & ((~0UL) << (((8)) - 1))))))) {
4353	perf->oa_formats = gen8_plus_oa_formats;
4354
4355	perf->ops.is_valid_b_counter_reg =
4356	gen7_is_valid_b_counter_addr;
4357	perf->ops.is_valid_mux_reg =
4358	gen8_is_valid_mux_addr;
4359	perf->ops.is_valid_flex_reg =
4360	gen8_is_valid_flex_addr;
4361
4362	if (IS_CHERRYVIEW(i915)IS_PLATFORM(i915, INTEL_CHERRYVIEW)) {
4363	perf->ops.is_valid_mux_reg =
4364	chv_is_valid_mux_addr;
4365	}
4366
4367	perf->ops.oa_enable = gen8_oa_enable;
4368	perf->ops.oa_disable = gen8_oa_disable;
4369	perf->ops.enable_metric_set = gen8_enable_metric_set;
4370	perf->ops.disable_metric_set = gen8_disable_metric_set;
4371	perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4372
4373	if (IS_GEN(i915, 8)(0 + (&(i915)->__info)->gen == (8))) {
4374	perf->ctx_oactxctrl_offset = 0x120;
4375	perf->ctx_flexeu0_offset = 0x2ce;
4376
4377	perf->gen8_valid_ctx_bit = BIT(25)(1UL << (25));
4378	} else {
4379	perf->ctx_oactxctrl_offset = 0x128;
4380	perf->ctx_flexeu0_offset = 0x3de;
4381
4382	perf->gen8_valid_ctx_bit = BIT(16)(1UL << (16));
4383	}
4384	} else if (IS_GEN_RANGE(i915, 10, 11)(!!((&(i915)->__info)->gen_mask & ( 0 + 0 + ((( ~0UL) >> (64 - (((11)) - 1) - 1)) & ((~0UL) << (((10)) - 1))))))) {
4385	perf->oa_formats = gen8_plus_oa_formats;
4386
4387	perf->ops.is_valid_b_counter_reg =
4388	gen7_is_valid_b_counter_addr;
4389	perf->ops.is_valid_mux_reg =
4390	gen10_is_valid_mux_addr;
4391	perf->ops.is_valid_flex_reg =
4392	gen8_is_valid_flex_addr;
4393
4394	perf->ops.oa_enable = gen8_oa_enable;
4395	perf->ops.oa_disable = gen8_oa_disable;
4396	perf->ops.enable_metric_set = gen8_enable_metric_set;
4397	perf->ops.disable_metric_set = gen10_disable_metric_set;
4398	perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
4399
4400	if (IS_GEN(i915, 10)(0 + (&(i915)->__info)->gen == (10))) {
4401	perf->ctx_oactxctrl_offset = 0x128;
4402	perf->ctx_flexeu0_offset = 0x3de;
4403	} else {
4404	perf->ctx_oactxctrl_offset = 0x124;
4405	perf->ctx_flexeu0_offset = 0x78e;
4406	}
4407	perf->gen8_valid_ctx_bit = BIT(16)(1UL << (16));
4408	} else if (IS_GEN(i915, 12)(0 + (&(i915)->__info)->gen == (12))) {
4409	perf->oa_formats = gen12_oa_formats;
4410
4411	perf->ops.is_valid_b_counter_reg =
4412	gen12_is_valid_b_counter_addr;
4413	perf->ops.is_valid_mux_reg =
4414	gen12_is_valid_mux_addr;
4415	perf->ops.is_valid_flex_reg =
4416	gen8_is_valid_flex_addr;
4417
4418	perf->ops.oa_enable = gen12_oa_enable;
4419	perf->ops.oa_disable = gen12_oa_disable;
4420	perf->ops.enable_metric_set = gen12_enable_metric_set;
4421	perf->ops.disable_metric_set = gen12_disable_metric_set;
4422	perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
4423
4424	perf->ctx_flexeu0_offset = 0;
4425	perf->ctx_oactxctrl_offset = 0x144;
4426	}
4427	}
4428
4429	if (perf->ops.enable_metric_set) {
4430	rw_init(&perf->lock, "perflk")_rw_init_flags(&perf->lock, "perflk", 0, ((void *)0));
4431
4432	oa_sample_rate_hard_limit =
4433	RUNTIME_INFO(i915)(&(i915)->__runtime)->cs_timestamp_frequency_hz / 2;
4434
4435	rw_init(&perf->metrics_lock, "metricslk")_rw_init_flags(&perf->metrics_lock, "metricslk", 0, (( void *)0));
4436	idr_init(&perf->metrics_idr);
4437
4438	/* We set up some ratelimit state to potentially throttle any
4439	* _NOTES about spurious, invalid OA reports which we don't
4440	* forward to userspace.
4441	*
4442	* We print a _NOTE about any throttling when closing the
4443	* stream instead of waiting until driver _fini which no one
4444	* would ever see.
4445	*
4446	* Using the same limiting factors as printk_ratelimit()
4447	*/
4448	#ifdef notyet
4449	ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
4450	/* Since we use a DRM_NOTE for spurious reports it would be
4451	* inconsistent to let __ratelimit() automatically print a
4452	* warning for throttling.
4453	*/
4454	ratelimit_set_flags(&perf->spurious_report_rs,
4455	RATELIMIT_MSG_ON_RELEASE);
4456	#endif
4457
4458	ratelimit_state_init(&perf->tail_pointer_race,
4459	5 * HZ, 10);
4460	ratelimit_set_flags(&perf->tail_pointer_race,
4461	RATELIMIT_MSG_ON_RELEASE);
4462
4463	atomic64_set(&perf->noa_programming_delay,({ typeof((&perf->noa_programming_delay)) __tmp = ((500 1000)); (volatile typeof((&perf->noa_programming_delay )) )&((&perf->noa_programming_delay)) = __tmp; __tmp ; })
4464	500 * 1000 /* 500us /)({ typeof((&perf->noa_programming_delay)) __tmp = ((500 * 1000)); (volatile typeof((&perf->noa_programming_delay )) )&((&perf->noa_programming_delay)) = __tmp; __tmp ; });
4465
4466	perf->i915 = i915;
4467	}
4468	}
4469
4470	static int destroy_config(int id, void p, void data)
4471	{
4472	i915_oa_config_put(p);
4473	return 0;
4474	}
4475
4476	void i915_perf_sysctl_register(void)
4477	{
4478	#ifdef notyet
4479	sysctl_header = register_sysctl_table(dev_root);
4480	#endif
4481	}
4482
4483	void i915_perf_sysctl_unregister(void)
4484	{
4485	#ifdef notyet
4486	unregister_sysctl_table(sysctl_header);
4487	#endif
4488	}
4489
4490	/**
4491	* i915_perf_fini - Counter part to i915_perf_init()
4492	* @i915: i915 device instance
4493	*/
4494	void i915_perf_fini(struct drm_i915_privateinteldrm_softc *i915)
4495	{
4496	struct i915_perf *perf = &i915->perf;
4497
4498	if (!perf->i915)
4499	return;
4500
4501	idr_for_each(&perf->metrics_idr, destroy_config, perf);
4502	idr_destroy(&perf->metrics_idr);
4503
4504	memset(&perf->ops, 0, sizeof(perf->ops))__builtin_memset((&perf->ops), (0), (sizeof(perf->ops )));
4505	perf->i915 = NULL((void *)0);
4506	}
4507
4508	/**
4509	* i915_perf_ioctl_version - Version of the i915-perf subsystem
4510	*
4511	* This version number is used by userspace to detect available features.
4512	*/
4513	int i915_perf_ioctl_version(void)
4514	{
4515	/*
4516	* 1: Initial version
4517	* I915_PERF_IOCTL_ENABLE
4518	* I915_PERF_IOCTL_DISABLE
4519	*
4520	* 2: Added runtime modification of OA config.
4521	* I915_PERF_IOCTL_CONFIG
4522	*
4523	* 3: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
4524	* preemption on a particular context so that performance data is
4525	* accessible from a delta of MI_RPC reports without looking at the
4526	* OA buffer.
4527	*
4528	* 4: Add DRM_I915_PERF_PROP_ALLOWED_SSEU to limit what contexts can
4529	* be run for the duration of the performance recording based on
4530	* their SSEU configuration.
4531	*
4532	* 5: Add DRM_I915_PERF_PROP_POLL_OA_PERIOD parameter that controls the
4533	* interval for the hrtimer used to check for OA data.
4534	*/
4535	return 5;
4536	}
4537
4538	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)0
4539	#include "selftests/i915_perf.c"
4540	#endif