Bug Summary

File:dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
Warning:line 4076, column 3
Value stored to 'TimeForFetchingMetaPTE' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name display_mode_vba_util_32.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -target-feature +sse -target-feature +sse2 -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
1/*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25#include "display_mode_vba_util_32.h"
26#include "../dml_inline_defs.h"
27#include "display_mode_vba_32.h"
28#include "../display_mode_lib.h"
29
30#define DCN32_MAX_FMT_420_BUFFER_WIDTH4096 4096
31
32unsigned int dml32_dscceComputeDelay(
33 unsigned int bpc,
34 double BPP,
35 unsigned int sliceWidth,
36 unsigned int numSlices,
37 enum output_format_class pixelFormat,
38 enum output_encoder_class Output)
39{
40 // valid bpc = source bits per component in the set of {8, 10, 12}
41 // valid bpp = increments of 1/16 of a bit
42 // min = 6/7/8 in N420/N422/444, respectively
43 // max = such that compression is 1:1
44 //valid sliceWidth = number of pixels per slice line,
45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49 // fixed value
50 unsigned int rcModelSize = 8192;
51
52 // N422/N420 operate at 2 pixels per clock
53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 Delay, pixels;
55
56 if (pixelFormat == dm_420)
57 pixelsPerClock = 2;
58 else if (pixelFormat == dm_n422)
59 pixelsPerClock = 2;
60 // #all other modes operate at 1 pixel per clock
61 else
62 pixelsPerClock = 1;
63
64 //initial transmit delay as per PPS
65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67 //compute ssm delay
68 if (bpc == 8)
69 D = 81;
70 else if (bpc == 10)
71 D = 89;
72 else
73 D = 113;
74
75 //divide by pixel per cycle to compute slice width as seen by DSC
76 w = sliceWidth / pixelsPerClock;
77
78 //422 mode has an additional cycle of delay
79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 s = 0;
81 else
82 s = 1;
83
84 //main calculation for the dscce
85 ix = initalXmitDelay + 45;
86 wx = (w + 2) / 3;
87 p = 3 * wx - w;
88 l0 = ix / w;
89 a = ix + p * l0;
90 ax = (a + 2) / 3 + D + 6 + 1;
91 L = (ax + wx - 1) / wx;
92 if ((ix % w) == 0 && p != 0)
93 lstall = 1;
94 else
95 lstall = 0;
96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 pixels = Delay * 3 * pixelsPerClock;
100
101#ifdef __DML_VBA_DEBUG__
102 dml_print("DML::%s: bpc: %d\n", __func__, bpc){do { } while(0); };
103 dml_print("DML::%s: BPP: %f\n", __func__, BPP){do { } while(0); };
104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth){do { } while(0); };
105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices){do { } while(0); };
106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat){do { } while(0); };
107 dml_print("DML::%s: Output: %d\n", __func__, Output){do { } while(0); };
108 dml_print("DML::%s: pixels: %d\n", __func__, pixels){do { } while(0); };
109#endif
110
111 return pixels;
112}
113
114unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115{
116 unsigned int Delay = 0;
117
118 if (pixelFormat == dm_420) {
119 // sfr
120 Delay = Delay + 2;
121 // dsccif
122 Delay = Delay + 0;
123 // dscc - input deserializer
124 Delay = Delay + 3;
125 // dscc gets pixels every other cycle
126 Delay = Delay + 2;
127 // dscc - input cdc fifo
128 Delay = Delay + 12;
129 // dscc gets pixels every other cycle
130 Delay = Delay + 13;
131 // dscc - cdc uncertainty
132 Delay = Delay + 2;
133 // dscc - output cdc fifo
134 Delay = Delay + 7;
135 // dscc gets pixels every other cycle
136 Delay = Delay + 3;
137 // dscc - cdc uncertainty
138 Delay = Delay + 2;
139 // dscc - output serializer
140 Delay = Delay + 1;
141 // sft
142 Delay = Delay + 1;
143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 // sfr
145 Delay = Delay + 2;
146 // dsccif
147 Delay = Delay + 1;
148 // dscc - input deserializer
149 Delay = Delay + 5;
150 // dscc - input cdc fifo
151 Delay = Delay + 25;
152 // dscc - cdc uncertainty
153 Delay = Delay + 2;
154 // dscc - output cdc fifo
155 Delay = Delay + 10;
156 // dscc - cdc uncertainty
157 Delay = Delay + 2;
158 // dscc - output serializer
159 Delay = Delay + 1;
160 // sft
161 Delay = Delay + 1;
162 } else {
163 // sfr
164 Delay = Delay + 2;
165 // dsccif
166 Delay = Delay + 0;
167 // dscc - input deserializer
168 Delay = Delay + 3;
169 // dscc - input cdc fifo
170 Delay = Delay + 12;
171 // dscc - cdc uncertainty
172 Delay = Delay + 2;
173 // dscc - output cdc fifo
174 Delay = Delay + 7;
175 // dscc - output serializer
176 Delay = Delay + 1;
177 // dscc - cdc uncertainty
178 Delay = Delay + 2;
179 // sft
180 Delay = Delay + 1;
181 }
182
183 return Delay;
184}
185
186
187bool_Bool IsVertical(enum dm_rotation_angle Scan)
188{
189 bool_Bool is_vert = false0;
190
191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 is_vert = true1;
193 else
194 is_vert = false0;
195 return is_vert;
196}
197
198void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 double HRatio,
200 double HRatioChroma,
201 double VRatio,
202 double VRatioChroma,
203 double MaxDCHUBToPSCLThroughput,
204 double MaxPSCLToLBThroughput,
205 double PixelClock,
206 enum source_format_class SourcePixelFormat,
207 unsigned int HTaps,
208 unsigned int HTapsChroma,
209 unsigned int VTaps,
210 unsigned int VTapsChroma,
211
212 /* output */
213 double *PSCL_THROUGHPUT,
214 double *PSCL_THROUGHPUT_CHROMA,
215 double *DPPCLKUsingSingleDPP)
216{
217 double DPPCLKUsingSingleDPPLuma;
218 double DPPCLKUsingSingleDPPChroma;
219
220 if (HRatio > 1) {
221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 dml_ceil((double) HTaps / 6.0, 1.0));
223 } else {
224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 }
226
227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 *PSCL_THROUGHPUT, 1);
229
230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 SourcePixelFormat != dm_rgbe_alpha)) {
235 *PSCL_THROUGHPUT_CHROMA = 0;
236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 } else {
238 if (HRatioChroma > 1) {
239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 } else {
242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 }
244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 }
250}
251
252void dml32_CalculateBytePerPixelAndBlockSizes(
253 enum source_format_class SourcePixelFormat,
254 enum dm_swizzle_mode SurfaceTiling,
255
256 /* Output */
257 unsigned int *BytePerPixelY,
258 unsigned int *BytePerPixelC,
259 double *BytePerPixelDETY,
260 double *BytePerPixelDETC,
261 unsigned int *BlockHeight256BytesY,
262 unsigned int *BlockHeight256BytesC,
263 unsigned int *BlockWidth256BytesY,
264 unsigned int *BlockWidth256BytesC,
265 unsigned int *MacroTileHeightY,
266 unsigned int *MacroTileHeightC,
267 unsigned int *MacroTileWidthY,
268 unsigned int *MacroTileWidthC)
269{
270 if (SourcePixelFormat == dm_444_64) {
271 *BytePerPixelDETY = 8;
272 *BytePerPixelDETC = 0;
273 *BytePerPixelY = 8;
274 *BytePerPixelC = 0;
275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 *BytePerPixelDETY = 4;
277 *BytePerPixelDETC = 0;
278 *BytePerPixelY = 4;
279 *BytePerPixelC = 0;
280 } else if (SourcePixelFormat == dm_444_16) {
281 *BytePerPixelDETY = 2;
282 *BytePerPixelDETC = 0;
283 *BytePerPixelY = 2;
284 *BytePerPixelC = 0;
285 } else if (SourcePixelFormat == dm_444_8) {
286 *BytePerPixelDETY = 1;
287 *BytePerPixelDETC = 0;
288 *BytePerPixelY = 1;
289 *BytePerPixelC = 0;
290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
291 *BytePerPixelDETY = 4;
292 *BytePerPixelDETC = 1;
293 *BytePerPixelY = 4;
294 *BytePerPixelC = 1;
295 } else if (SourcePixelFormat == dm_420_8) {
296 *BytePerPixelDETY = 1;
297 *BytePerPixelDETC = 2;
298 *BytePerPixelY = 1;
299 *BytePerPixelC = 2;
300 } else if (SourcePixelFormat == dm_420_12) {
301 *BytePerPixelDETY = 2;
302 *BytePerPixelDETC = 4;
303 *BytePerPixelY = 2;
304 *BytePerPixelC = 4;
305 } else {
306 *BytePerPixelDETY = 4.0 / 3;
307 *BytePerPixelDETC = 8.0 / 3;
308 *BytePerPixelY = 2;
309 *BytePerPixelC = 4;
310 }
311#ifdef __DML_VBA_DEBUG__
312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat){do { } while(0); };
313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY){do { } while(0); };
314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC){do { } while(0); };
315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY){do { } while(0); };
316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC){do { } while(0); };
317#endif
318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 || SourcePixelFormat == dm_444_16
320 || SourcePixelFormat == dm_444_8
321 || SourcePixelFormat == dm_mono_16
322 || SourcePixelFormat == dm_mono_8
323 || SourcePixelFormat == dm_rgbe)) {
324 if (SurfaceTiling == dm_sw_linear)
325 *BlockHeight256BytesY = 1;
326 else if (SourcePixelFormat == dm_444_64)
327 *BlockHeight256BytesY = 4;
328 else if (SourcePixelFormat == dm_444_8)
329 *BlockHeight256BytesY = 16;
330 else
331 *BlockHeight256BytesY = 8;
332
333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 *BlockHeight256BytesC = 0;
335 *BlockWidth256BytesC = 0;
336 } else {
337 if (SurfaceTiling == dm_sw_linear) {
338 *BlockHeight256BytesY = 1;
339 *BlockHeight256BytesC = 1;
340 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341 *BlockHeight256BytesY = 8;
342 *BlockHeight256BytesC = 16;
343 } else if (SourcePixelFormat == dm_420_8) {
344 *BlockHeight256BytesY = 16;
345 *BlockHeight256BytesC = 8;
346 } else {
347 *BlockHeight256BytesY = 8;
348 *BlockHeight256BytesC = 8;
349 }
350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 }
353#ifdef __DML_VBA_DEBUG__
354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY){do { } while(0); };
355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY){do { } while(0); };
356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC){do { } while(0); };
357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC){do { } while(0); };
358#endif
359
360 if (SurfaceTiling == dm_sw_linear) {
361 *MacroTileHeightY = *BlockHeight256BytesY;
362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 *MacroTileHeightC = *BlockHeight256BytesC;
364 if (*MacroTileHeightC == 0)
365 *MacroTileWidthC = 0;
366 else
367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 if (*MacroTileHeightC == 0)
374 *MacroTileWidthC = 0;
375 else
376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 } else {
378 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 if (*MacroTileHeightC == 0)
382 *MacroTileWidthC = 0;
383 else
384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 }
386
387#ifdef __DML_VBA_DEBUG__
388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY){do { } while(0); };
389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY){do { } while(0); };
390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC){do { } while(0); };
391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC){do { } while(0); };
392#endif
393} // CalculateBytePerPixelAndBlockSizes
394
395void dml32_CalculateSwathAndDETConfiguration(
396 unsigned int DETSizeOverride[],
397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 unsigned int ConfigReturnBufferSizeInKByte,
399 unsigned int MaxTotalDETInKByte,
400 unsigned int MinCompressedBufferSizeInKByte,
401 double ForceSingleDPP,
402 unsigned int NumberOfActiveSurfaces,
403 unsigned int nomDETInKByte,
404 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 bool_Bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 unsigned int PixelChunkSizeKBytes,
407 unsigned int ROBSizeKBytes,
408 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 enum output_encoder_class Output[],
410 double ReadBandwidthLuma[],
411 double ReadBandwidthChroma[],
412 double MaximumSwathWidthLuma[],
413 double MaximumSwathWidthChroma[],
414 enum dm_rotation_angle SourceRotation[],
415 bool_Bool ViewportStationary[],
416 enum source_format_class SourcePixelFormat[],
417 enum dm_swizzle_mode SurfaceTiling[],
418 unsigned int ViewportWidth[],
419 unsigned int ViewportHeight[],
420 unsigned int ViewportXStart[],
421 unsigned int ViewportYStart[],
422 unsigned int ViewportXStartC[],
423 unsigned int ViewportYStartC[],
424 unsigned int SurfaceWidthY[],
425 unsigned int SurfaceWidthC[],
426 unsigned int SurfaceHeightY[],
427 unsigned int SurfaceHeightC[],
428 unsigned int Read256BytesBlockHeightY[],
429 unsigned int Read256BytesBlockHeightC[],
430 unsigned int Read256BytesBlockWidthY[],
431 unsigned int Read256BytesBlockWidthC[],
432 enum odm_combine_mode ODMMode[],
433 unsigned int BlendingAndTiming[],
434 unsigned int BytePerPixY[],
435 unsigned int BytePerPixC[],
436 double BytePerPixDETY[],
437 double BytePerPixDETC[],
438 unsigned int HActive[],
439 double HRatio[],
440 double HRatioChroma[],
441 unsigned int DPPPerSurface[],
442
443 /* Output */
444 unsigned int swath_width_luma_ub[],
445 unsigned int swath_width_chroma_ub[],
446 double SwathWidth[],
447 double SwathWidthChroma[],
448 unsigned int SwathHeightY[],
449 unsigned int SwathHeightC[],
450 unsigned int DETBufferSizeInKByte[],
451 unsigned int DETBufferSizeY[],
452 unsigned int DETBufferSizeC[],
453 bool_Bool *UnboundedRequestEnabled,
454 unsigned int *CompressedBufferSizeInkByte,
455 unsigned int *CompBufReservedSpaceKBytes,
456 bool_Bool *CompBufReservedSpaceNeedAdjustment,
457 bool_Bool ViewportSizeSupportPerSurface[],
458 bool_Bool *ViewportSizeSupport)
459{
460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX8];
461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX8];
462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX8];
463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX8];
464 unsigned int RoundedUpSwathSizeBytesY;
465 unsigned int RoundedUpSwathSizeBytesC;
466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX8];
467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX8];
468 unsigned int k;
469 unsigned int TotalActiveDPP = 0;
470 bool_Bool NoChromaSurfaces = true1;
471 unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473#ifdef __DML_VBA_DEBUG__
474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP){do { } while(0); };
475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes){do { } while(0); };
476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes){do { } while(0); };
477#endif
478 dml32_CalculateSwathWidth(ForceSingleDPP,
479 NumberOfActiveSurfaces,
480 SourcePixelFormat,
481 SourceRotation,
482 ViewportStationary,
483 ViewportWidth,
484 ViewportHeight,
485 ViewportXStart,
486 ViewportYStart,
487 ViewportXStartC,
488 ViewportYStartC,
489 SurfaceWidthY,
490 SurfaceWidthC,
491 SurfaceHeightY,
492 SurfaceHeightC,
493 ODMMode,
494 BytePerPixY,
495 BytePerPixC,
496 Read256BytesBlockHeightY,
497 Read256BytesBlockHeightC,
498 Read256BytesBlockWidthY,
499 Read256BytesBlockWidthC,
500 BlendingAndTiming,
501 HActive,
502 HRatio,
503 DPPPerSurface,
504
505 /* Output */
506 SwathWidthdoubleDPP,
507 SwathWidthdoubleDPPChroma,
508 SwathWidth,
509 SwathWidthChroma,
510 MaximumSwathHeightY,
511 MaximumSwathHeightC,
512 swath_width_luma_ub,
513 swath_width_chroma_ub);
514
515 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518#ifdef __DML_VBA_DEBUG__
519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]){do { } while(0); };
520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]){do { } while(0); };
521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]){do { } while(0); };
522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]){do { } while(0); };
523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,{do { } while(0); }
524 RoundedUpMaxSwathSizeBytesY[k]){do { } while(0); };
525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]){do { } while(0); };
526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]){do { } while(0); };
527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]){do { } while(0); };
528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,{do { } while(0); }
529 RoundedUpMaxSwathSizeBytesC[k]){do { } while(0); };
530#endif
531
532 if (SourcePixelFormat[k] == dm_420_10) {
533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 }
536 }
537
538 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 NoChromaSurfaces = false0;
543 }
544 }
545
546 // By default, just set the reserved space to 2 pixel chunks size
547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554 if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 }
557
558 #ifdef __DML_VBA_DEBUG__
559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes){do { } while(0); };
560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment){do { } while(0); };
561 #endif
562
563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565 dml32_CalculateDETBufferSize(DETSizeOverride,
566 UseMALLForPStateChange,
567 ForceSingleDPP,
568 NumberOfActiveSurfaces,
569 *UnboundedRequestEnabled,
570 nomDETInKByte,
571 MaxTotalDETInKByte,
572 ConfigReturnBufferSizeInKByte,
573 MinCompressedBufferSizeInKByte,
574 CompressedBufferSegmentSizeInkByteFinal,
575 SourcePixelFormat,
576 ReadBandwidthLuma,
577 ReadBandwidthChroma,
578 RoundedUpMaxSwathSizeBytesY,
579 RoundedUpMaxSwathSizeBytesC,
580 DPPPerSurface,
581
582 /* Output */
583 DETBufferSizeInKByte, // per hubp pipe
584 CompressedBufferSizeInkByte);
585
586#ifdef __DML_VBA_DEBUG__
587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP){do { } while(0); };
588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte){do { } while(0); };
589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte){do { } while(0); };
590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal){do { } while(0); };
591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled){do { } while(0); };
592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte){do { } while(0); };
593#endif
594
595 *ViewportSizeSupport = true1;
596 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600#ifdef __DML_VBA_DEBUG__
601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,{do { } while(0); }
602 DETBufferSizeInKByteForSwathCalculation){do { } while(0); };
603#endif
604
605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 SwathHeightY[k] = MaximumSwathHeightY[k];
608 SwathHeightC[k] = MaximumSwathHeightC[k];
609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 SwathHeightC[k] = MaximumSwathHeightC[k];
616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 SwathHeightY[k] = MaximumSwathHeightY[k];
622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 } else {
626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 }
631
632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 *ViewportSizeSupport = false0;
637 ViewportSizeSupportPerSurface[k] = false0;
638 } else {
639 ViewportSizeSupportPerSurface[k] = true1;
640 }
641
642 if (SwathHeightC[k] == 0) {
643#ifdef __DML_VBA_DEBUG__
644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k){do { } while(0); };
645#endif
646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 DETBufferSizeC[k] = 0;
648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649#ifdef __DML_VBA_DEBUG__
650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k){do { } while(0); };
651#endif
652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 } else {
655#ifdef __DML_VBA_DEBUG__
656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k){do { } while(0); };
657#endif
658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 }
661
662#ifdef __DML_VBA_DEBUG__
663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]){do { } while(0); };
664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]){do { } while(0); };
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,{do { } while(0); }
666 k, RoundedUpMaxSwathSizeBytesY[k]){do { } while(0); };
667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,{do { } while(0); }
668 k, RoundedUpMaxSwathSizeBytesC[k]){do { } while(0); };
669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY){do { } while(0); };
670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC){do { } while(0); };
671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]){do { } while(0); };
672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]){do { } while(0); };
673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]){do { } while(0); };
674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,{do { } while(0); }
675 ViewportSizeSupportPerSurface[k]){do { } while(0); };
676#endif
677
678 }
679} // CalculateSwathAndDETConfiguration
680
681void dml32_CalculateSwathWidth(
682 bool_Bool ForceSingleDPP,
683 unsigned int NumberOfActiveSurfaces,
684 enum source_format_class SourcePixelFormat[],
685 enum dm_rotation_angle SourceRotation[],
686 bool_Bool ViewportStationary[],
687 unsigned int ViewportWidth[],
688 unsigned int ViewportHeight[],
689 unsigned int ViewportXStart[],
690 unsigned int ViewportYStart[],
691 unsigned int ViewportXStartC[],
692 unsigned int ViewportYStartC[],
693 unsigned int SurfaceWidthY[],
694 unsigned int SurfaceWidthC[],
695 unsigned int SurfaceHeightY[],
696 unsigned int SurfaceHeightC[],
697 enum odm_combine_mode ODMMode[],
698 unsigned int BytePerPixY[],
699 unsigned int BytePerPixC[],
700 unsigned int Read256BytesBlockHeightY[],
701 unsigned int Read256BytesBlockHeightC[],
702 unsigned int Read256BytesBlockWidthY[],
703 unsigned int Read256BytesBlockWidthC[],
704 unsigned int BlendingAndTiming[],
705 unsigned int HActive[],
706 double HRatio[],
707 unsigned int DPPPerSurface[],
708
709 /* Output */
710 double SwathWidthdoubleDPPY[],
711 double SwathWidthdoubleDPPC[],
712 double SwathWidthY[], // per-pipe
713 double SwathWidthC[], // per-pipe
714 unsigned int MaximumSwathHeightY[],
715 unsigned int MaximumSwathHeightC[],
716 unsigned int swath_width_luma_ub[], // per-pipe
717 unsigned int swath_width_chroma_ub[]) // per-pipe
718{
719 unsigned int k, j;
720 enum odm_combine_mode MainSurfaceODMMode;
721
722 unsigned int surface_width_ub_l;
723 unsigned int surface_height_ub_l;
724 unsigned int surface_width_ub_c = 0;
725 unsigned int surface_height_ub_c = 0;
726
727#ifdef __DML_VBA_DEBUG__
728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP){do { } while(0); };
729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces){do { } while(0); };
730#endif
731
732 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 if (!IsVertical(SourceRotation[k]))
734 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 else
736 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738#ifdef __DML_VBA_DEBUG__
739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]){do { } while(0); };
740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]){do { } while(0); };
741#endif
742
743 MainSurfaceODMMode = ODMMode[k];
744 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 if (BlendingAndTiming[k] == j)
746 MainSurfaceODMMode = ODMMode[j];
747 }
748
749 if (ForceSingleDPP) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 } else {
752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 dml_round(HActive[k] / 4.0 * HRatio[k]));
755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 dml_round(HActive[k] / 2.0 * HRatio[k]));
758 } else if (DPPPerSurface[k] == 2) {
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 } else {
761 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 }
763 }
764
765#ifdef __DML_VBA_DEBUG__
766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]){do { } while(0); };
767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]){do { } while(0); };
768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode){do { } while(0); };
769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]){do { } while(0); };
770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]){do { } while(0); };
771#endif
772
773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 SourcePixelFormat[k] == dm_420_12) {
775 SwathWidthC[k] = SwathWidthY[k] / 2;
776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 } else {
778 SwathWidthC[k] = SwathWidthY[k];
779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 }
781
782 if (ForceSingleDPP == true1) {
783 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 }
786
787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790 if (!IsVertical(SourceRotation[k])) {
791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 dml_floor(ViewportXStart[k] +
796 SwathWidthY[k] +
797 Read256BytesBlockWidthY[k] - 1,
798 Read256BytesBlockWidthY[k]) -
799 dml_floor(ViewportXStart[k],
800 Read256BytesBlockWidthY[k]));
801 } else {
802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 dml_ceil(SwathWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) +
805 Read256BytesBlockWidthY[k]);
806 }
807 if (BytePerPixC[k] > 0) {
808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 Read256BytesBlockWidthC[k] - 1,
813 Read256BytesBlockWidthC[k]) -
814 dml_floor(ViewportXStartC[k],
815 Read256BytesBlockWidthC[k]));
816 } else {
817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 dml_ceil(SwathWidthC[k] - 1,
819 Read256BytesBlockWidthC[k]) +
820 Read256BytesBlockWidthC[k]);
821 }
822 } else {
823 swath_width_chroma_ub[k] = 0;
824 }
825 } else {
826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 Read256BytesBlockHeightY[k]) -
833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 } else {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 }
838 if (BytePerPixC[k] > 0) {
839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 Read256BytesBlockHeightC[k] - 1,
844 Read256BytesBlockHeightC[k]) -
845 dml_floor(ViewportYStartC[k],
846 Read256BytesBlockHeightC[k]));
847 } else {
848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 Read256BytesBlockHeightC[k]);
851 }
852 } else {
853 swath_width_chroma_ub[k] = 0;
854 }
855 }
856
857#ifdef __DML_VBA_DEBUG__
858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l){do { } while(0); };
859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l){do { } while(0); };
860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c){do { } while(0); };
861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c){do { } while(0); };
862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]){do { } while(0); };
863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]){do { } while(0); };
864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]){do { } while(0); };
865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]){do { } while(0); };
866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]){do { } while(0); };
867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]){do { } while(0); };
868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]){do { } while(0); };
869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]){do { } while(0); };
870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]){do { } while(0); };
871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]){do { } while(0); };
872#endif
873
874 }
875} // CalculateSwathWidth
876
877bool_Bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 unsigned int TotalNumberOfActiveDPP,
879 bool_Bool NoChroma,
880 enum output_encoder_class Output,
881 enum dm_swizzle_mode SurfaceTiling,
882 bool_Bool CompBufReservedSpaceNeedAdjustment,
883 bool_Bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884{
885 bool_Bool ret_val = false0;
886
887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 TotalNumberOfActiveDPP == 1 && NoChroma);
889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 ret_val = false0;
891
892 if (SurfaceTiling == dm_sw_linear)
893 ret_val = false0;
894
895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 ret_val = false0;
897
898#ifdef __DML_VBA_DEBUG__
899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment){do { } while(0); };
900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment){do { } while(0); };
901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val){do { } while(0); };
902#endif
903
904 return (ret_val);
905}
906
907void dml32_CalculateDETBufferSize(
908 unsigned int DETSizeOverride[],
909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 bool_Bool ForceSingleDPP,
911 unsigned int NumberOfActiveSurfaces,
912 bool_Bool UnboundedRequestEnabled,
913 unsigned int nomDETInKByte,
914 unsigned int MaxTotalDETInKByte,
915 unsigned int ConfigReturnBufferSizeInKByte,
916 unsigned int MinCompressedBufferSizeInKByte,
917 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 enum source_format_class SourcePixelFormat[],
919 double ReadBandwidthLuma[],
920 double ReadBandwidthChroma[],
921 unsigned int RoundedUpMaxSwathSizeBytesY[],
922 unsigned int RoundedUpMaxSwathSizeBytesC[],
923 unsigned int DPPPerSurface[],
924 /* Output */
925 unsigned int DETBufferSizeInKByte[],
926 unsigned int *CompressedBufferSizeInkByte)
927{
928 unsigned int DETBufferSizePoolInKByte;
929 unsigned int NextDETBufferPieceInKByte;
930 bool_Bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX8];
931 bool_Bool NextPotentialSurfaceToAssignDETPieceFound;
932 unsigned int NextSurfaceToAssignDETPiece;
933 double TotalBandwidth;
934 double BandwidthOfSurfacesNotAssignedDETPiece;
935 unsigned int max_minDET;
936 unsigned int minDET;
937 unsigned int minDET_pipe;
938 unsigned int j, k;
939
940#ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP){do { } while(0); };
942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte){do { } while(0); };
943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces){do { } while(0); };
944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled){do { } while(0); };
945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte){do { } while(0); };
946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte){do { } while(0); };
947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte){do { } while(0); };
948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,{do { } while(0); }
949 CompressedBufferSegmentSizeInkByteFinal){do { } while(0); };
950#endif
951
952 // Note: Will use default det size if that fits 2 swaths
953 if (UnboundedRequestEnabled) {
954 if (DETSizeOverride[0] > 0) {
955 DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 } else {
957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 ((double) RoundedUpMaxSwathSizeBytesY[0] +
959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 }
961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 } else {
963 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 DETBufferSizeInKByte[k] = nomDETInKByte;
966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 SourcePixelFormat[k] == dm_420_12) {
968 max_minDET = nomDETInKByte - 64;
969 } else {
970 max_minDET = nomDETInKByte;
971 }
972 minDET = 128;
973 minDET_pipe = 0;
974
975 // add DET resource until can hold 2 full swaths
976 while (minDET <= max_minDET && minDET_pipe == 0) {
977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 minDET_pipe = minDET;
980 minDET = minDET + 64;
981 }
982
983#ifdef __DML_VBA_DEBUG__
984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET){do { } while(0); };
985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET){do { } while(0); };
986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe){do { } while(0); };
987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,{do { } while(0); }
988 RoundedUpMaxSwathSizeBytesY[k]){do { } while(0); };
989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,{do { } while(0); }
990 RoundedUpMaxSwathSizeBytesC[k]){do { } while(0); };
991#endif
992
993 if (minDET_pipe == 0) {
994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996#ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",{do { } while(0); }
998 __func__, k, minDET_pipe){do { } while(0); };
999#endif
1000 }
1001
1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 DETBufferSizeInKByte[k] = 0;
1004 } else if (DETSizeOverride[k] > 0) {
1005 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 DETBufferSizeInKByte[k] = minDET_pipe;
1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 }
1013
1014#ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]){do { } while(0); };
1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]){do { } while(0); };
1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]){do { } while(0); };
1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte){do { } while(0); };
1019#endif
1020 }
1021
1022 TotalBandwidth = 0;
1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 }
1027#ifdef __DML_VBA_DEBUG__
1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__){do { } while(0); };
1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]){do { } while(0); };
1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__){do { } while(0); };
1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth){do { } while(0); };
1033#endif
1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 DETPieceAssignedToThisSurfaceAlready[k] = true1;
1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 DETPieceAssignedToThisSurfaceAlready[k] = true1;
1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 } else {
1046 DETPieceAssignedToThisSurfaceAlready[k] = false0;
1047 }
1048#ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,{do { } while(0); }
1050 DETPieceAssignedToThisSurfaceAlready[k]){do { } while(0); };
1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,{do { } while(0); }
1052 BandwidthOfSurfacesNotAssignedDETPiece){do { } while(0); };
1053#endif
1054 }
1055
1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 NextPotentialSurfaceToAssignDETPieceFound = false0;
1058 NextSurfaceToAssignDETPiece = 0;
1059
1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061#ifdef __DML_VBA_DEBUG__
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,{do { } while(0); }
1063 ReadBandwidthLuma[k]){do { } while(0); };
1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,{do { } while(0); }
1065 ReadBandwidthChroma[k]){do { } while(0); };
1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,{do { } while(0); }
1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]){do { } while(0); };
1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,{do { } while(0); }
1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]){do { } while(0); };
1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,{do { } while(0); }
1071 NextSurfaceToAssignDETPiece){do { } while(0); };
1072#endif
1073 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 NextSurfaceToAssignDETPiece = k;
1079 NextPotentialSurfaceToAssignDETPieceFound = true1;
1080 }
1081#ifdef __DML_VBA_DEBUG__
1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",{do { } while(0); }
1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]){do { } while(0); };
1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",{do { } while(0); }
1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound){do { } while(0); };
1086#endif
1087 }
1088
1089 if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 // Note: To show the banker's rounding behavior in VBA and also the fact
1091 // that the DET buffer size varies due to precision issue
1092 //
1093 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 // BandwidthOfSurfacesNotAssignedDETPiece /
1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 //
1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107 NextDETBufferPieceInKByte = dml_min(
1108 dml_round((double) DETBufferSizePoolInKByte *
1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 BandwidthOfSurfacesNotAssignedDETPiece /
1112 ((ForceSingleDPP ? 1 :
1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 (ForceSingleDPP ? 1 :
1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 dml_floor((double) DETBufferSizePoolInKByte,
1117 (ForceSingleDPP ? 1 :
1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 } else {
1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 // already has the max per-pipe value
1130 NextDETBufferPieceInKByte = 0;
1131 }
1132 }
1133
1134#ifdef __DML_VBA_DEBUG__
1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,{do { } while(0); }
1136 DETBufferSizePoolInKByte){do { } while(0); };
1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,{do { } while(0); }
1138 NextSurfaceToAssignDETPiece){do { } while(0); };
1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,{do { } while(0); }
1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]){do { } while(0); };
1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,{do { } while(0); }
1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]){do { } while(0); };
1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",{do { } while(0); }
1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece){do { } while(0); };
1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,{do { } while(0); }
1146 NextDETBufferPieceInKByte){do { } while(0); };
1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",{do { } while(0); }
1148 __func__, j, NextSurfaceToAssignDETPiece,{do { } while(0); }
1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]){do { } while(0); };
1150#endif
1151
1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 + NextDETBufferPieceInKByte
1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156#ifdef __DML_VBA_DEBUG__
1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]){do { } while(0); };
1158#endif
1159
1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true1;
1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 }
1166 }
1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 }
1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171#ifdef __DML_VBA_DEBUG__
1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__){do { } while(0); };
1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte){do { } while(0); };
1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",{do { } while(0); }
1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]){do { } while(0); };
1177 }
1178#endif
1179} // CalculateDETBufferSize
1180
1181void dml32_CalculateODMMode(
1182 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 unsigned int HActive,
1184 enum output_format_class OutFormat,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1188 double MaxDispclk,
1189 bool_Bool DSCEnable,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1192 double PixelClock,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1197
1198 /* Output */
1199 bool_Bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1203{
1204
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 MaxDispclk);
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 MaxDispclk);
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 MaxDispclk);
1218 *TotalAvailablePipesSupport = true1;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 *NumberOfDPP = 0;
1226
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 *NumberOfDPP = 4;
1238 } else {
1239 *TotalAvailablePipesSupport = false0;
1240 }
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 *NumberOfDPP = 2;
1250 } else {
1251 *TotalAvailablePipesSupport = false0;
1252 }
1253 } else {
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 *NumberOfDPP = 1;
1256 else
1257 *TotalAvailablePipesSupport = false0;
1258 }
1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH4096 &&
1260 ODMUse != dm_odm_combine_policy_4to1) {
1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH4096 * 4) {
1262 *ODMMode = dm_odm_combine_mode_disabled;
1263 *NumberOfDPP = 0;
1264 *TotalAvailablePipesSupport = false0;
1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH4096 * 2 ||
1266 *ODMMode == dm_odm_combine_mode_4to1) {
1267 *ODMMode = dm_odm_combine_mode_4to1;
1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 *NumberOfDPP = 4;
1270 } else {
1271 *ODMMode = dm_odm_combine_mode_2to1;
1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 *NumberOfDPP = 2;
1274 }
1275 }
1276 if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH4096) {
1278 *ODMMode = dm_odm_combine_mode_disabled;
1279 *NumberOfDPP = 0;
1280 *TotalAvailablePipesSupport = false0;
1281 }
1282}
1283
1284double dml32_CalculateRequiredDispclk(
1285 enum odm_combine_mode ODMMode,
1286 double PixelClock,
1287 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 double DISPCLKRampingMargin,
1289 double DISPCLKDPPCLKVCOSpeed,
1290 double MaxDispclk)
1291{
1292 double RequiredDispclk = 0.;
1293 double PixelClockAfterODM;
1294 double DISPCLKWithRampingRoundedToDFSGranularity;
1295 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 double MaxDispclkRoundedDownToDFSGranularity;
1297
1298 if (ODMMode == dm_odm_combine_mode_4to1)
1299 PixelClockAfterODM = PixelClock / 4;
1300 else if (ODMMode == dm_odm_combine_mode_2to1)
1301 PixelClockAfterODM = PixelClock / 2;
1302 else
1303 PixelClockAfterODM = PixelClock;
1304
1305
1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 else
1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322 return RequiredDispclk;
1323}
1324
1325double dml32_RoundToDFSGranularity(double Clock, bool_Bool round_up, double VCOSpeed)
1326{
1327 if (Clock <= 0.0)
1328 return 0.0;
1329
1330 if (round_up)
1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 else
1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334}
1335
1336void dml32_CalculateOutputLink(
1337 double PHYCLKPerState,
1338 double PHYCLKD18PerState,
1339 double PHYCLKD32PerState,
1340 double Downspreading,
1341 bool_Bool IsMainSurfaceUsingTheIndicatedTiming,
1342 enum output_encoder_class Output,
1343 enum output_format_class OutputFormat,
1344 unsigned int HTotal,
1345 unsigned int HActive,
1346 double PixelClockBackEnd,
1347 double ForcedOutputLinkBPP,
1348 unsigned int DSCInputBitPerComponent,
1349 unsigned int NumberOfDSCSlices,
1350 double AudioSampleRate,
1351 unsigned int AudioSampleLayout,
1352 enum odm_combine_mode ODMModeNoDSC,
1353 enum odm_combine_mode ODMModeDSC,
1354 bool_Bool DSCEnable,
1355 unsigned int OutputLinkDPLanes,
1356 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358 /* Output */
1359 bool_Bool *RequiresDSC,
1360 double *RequiresFEC,
1361 double *OutBpp,
1362 enum dm_output_type *OutputType,
1363 enum dm_output_rate *OutputRate,
1364 unsigned int *RequiredSlots)
1365{
1366 bool_Bool LinkDSCEnable;
1367 unsigned int dummy;
1368 *RequiresDSC = false0;
1369 *RequiresFEC = false0;
1370 *OutBpp = 0;
1371 *OutputType = dm_output_type_unknown;
1372 *OutputRate = dm_output_rate_unknown;
1373
1374 if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 if (Output == dm_hdmi) {
1376 *RequiresDSC = false0;
1377 *RequiresFEC = false0;
1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 PixelClockBackEnd, ForcedOutputLinkBPP, false0, Output, OutputFormat,
1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 ODMModeNoDSC, ODMModeDSC, &dummy);
1382 //OutputTypeAndRate = "HDMI";
1383 *OutputType = dm_output_type_hdmi;
1384
1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 if (DSCEnable == true1) {
1387 *RequiresDSC = true1;
1388 LinkDSCEnable = true1;
1389 if (Output == dm_dp || Output == dm_dp2p0)
1390 *RequiresFEC = true1;
1391 else
1392 *RequiresFEC = false0;
1393 } else {
1394 *RequiresDSC = false0;
1395 LinkDSCEnable = false0;
1396 if (Output == dm_dp2p0)
1397 *RequiresFEC = true1;
1398 else
1399 *RequiresFEC = false0;
1400 }
1401 if (Output == dm_dp2p0) {
1402 *OutBpp = 0;
1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 PHYCLKD32PerState >= 10000 / 32) {
1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true1 &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true1;
1413 LinkDSCEnable = true1;
1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 OutputFormat, DSCInputBitPerComponent,
1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 }
1421 //OutputTypeAndRate = Output & " UHBR10";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 }
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true1 &&
1434 ForcedOutputLinkBPP == 0) {
1435 *RequiresDSC = true1;
1436 LinkDSCEnable = true1;
1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 OutputFormat, DSCInputBitPerComponent,
1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 }
1444 //OutputTypeAndRate = Output & " UHBR13p5";
1445 *OutputType = dm_output_type_dp2p0;
1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 }
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && DSCEnable == true1 && ForcedOutputLinkBPP == 0) {
1456 *RequiresDSC = true1;
1457 LinkDSCEnable = true1;
1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 OutputFormat, DSCInputBitPerComponent,
1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 }
1465 //OutputTypeAndRate = Output & " UHBR20";
1466 *OutputType = dm_output_type_dp2p0;
1467 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 }
1469 } else {
1470 *OutBpp = 0;
1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 PHYCLKPerState >= 270) {
1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true1 &&
1479 ForcedOutputLinkBPP == 0) {
1480 *RequiresDSC = true1;
1481 LinkDSCEnable = true1;
1482 if (Output == dm_dp)
1483 *RequiresFEC = true1;
1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 OutputFormat, DSCInputBitPerComponent,
1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 }
1491 //OutputTypeAndRate = Output & " HBR";
1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 *OutputRate = dm_output_rate_dp_rate_hbr;
1494 }
1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true1 &&
1504 ForcedOutputLinkBPP == 0) {
1505 *RequiresDSC = true1;
1506 LinkDSCEnable = true1;
1507 if (Output == dm_dp)
1508 *RequiresFEC = true1;
1509
1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 OutputFormat, DSCInputBitPerComponent,
1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 }
1517 //OutputTypeAndRate = Output & " HBR2";
1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 *OutputRate = dm_output_rate_dp_rate_hbr2;
1520 }
1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 RequiredSlots);
1528
1529 if (*OutBpp == 0 && DSCEnable == true1 && ForcedOutputLinkBPP == 0) {
1530 *RequiresDSC = true1;
1531 LinkDSCEnable = true1;
1532 if (Output == dm_dp)
1533 *RequiresFEC = true1;
1534
1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 OutputFormat, DSCInputBitPerComponent,
1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 }
1542 //OutputTypeAndRate = Output & " HBR3";
1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 *OutputRate = dm_output_rate_dp_rate_hbr3;
1545 }
1546 }
1547 }
1548 }
1549}
1550
1551void dml32_CalculateDPPCLK(
1552 unsigned int NumberOfActiveSurfaces,
1553 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 double DISPCLKDPPCLKVCOSpeed,
1555 double DPPCLKUsingSingleDPP[],
1556 unsigned int DPPPerSurface[],
1557
1558 /* output */
1559 double *GlobalDPPCLK,
1560 double Dppclk[])
1561{
1562 unsigned int k;
1563 *GlobalDPPCLK = 0;
1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 }
1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571}
1572
1573double dml32_TruncToValidBPP(
1574 double LinkBitRate,
1575 unsigned int Lanes,
1576 unsigned int HTotal,
1577 unsigned int HActive,
1578 double PixelClock,
1579 double DesiredBPP,
1580 bool_Bool DSCEnable,
1581 enum output_encoder_class Output,
1582 enum output_format_class Format,
1583 unsigned int DSCInputBitPerComponent,
1584 unsigned int DSCSlices,
1585 unsigned int AudioRate,
1586 unsigned int AudioLayout,
1587 enum odm_combine_mode ODMModeNoDSC,
1588 enum odm_combine_mode ODMModeDSC,
1589 /* Output */
1590 unsigned int *RequiredSlots)
1591{
1592 double MaxLinkBPP;
1593 unsigned int MinDSCBPP;
1594 double MaxDSCBPP;
1595 unsigned int NonDSCBPP0;
1596 unsigned int NonDSCBPP1;
1597 unsigned int NonDSCBPP2;
1598 unsigned int NonDSCBPP3;
1599
1600 if (Format == dm_420) {
1601 NonDSCBPP0 = 12;
1602 NonDSCBPP1 = 15;
1603 NonDSCBPP2 = 18;
1604 MinDSCBPP = 6;
1605 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1606 } else if (Format == dm_444) {
1607 NonDSCBPP0 = 18;
1608 NonDSCBPP1 = 24;
1609 NonDSCBPP2 = 30;
1610 NonDSCBPP3 = 36;
1611 MinDSCBPP = 8;
1612 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 } else {
1614 if (Output == dm_hdmi) {
1615 NonDSCBPP0 = 24;
1616 NonDSCBPP1 = 24;
1617 NonDSCBPP2 = 24;
1618 } else {
1619 NonDSCBPP0 = 16;
1620 NonDSCBPP1 = 20;
1621 NonDSCBPP2 = 24;
1622 }
1623 if (Format == dm_n422) {
1624 MinDSCBPP = 7;
1625 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 } else {
1627 MinDSCBPP = 8;
1628 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 }
1630 }
1631 if (Output == dm_dp2p0) {
1632 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 } else if (DSCEnable && Output == dm_dp) {
1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 } else {
1636 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 }
1638
1639 if (DSCEnable) {
1640 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 MaxLinkBPP = 2 * MaxLinkBPP;
1646 } else {
1647 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 MaxLinkBPP = 2 * MaxLinkBPP;
1653 }
1654
1655 if (DesiredBPP == 0) {
1656 if (DSCEnable) {
1657 if (MaxLinkBPP < MinDSCBPP)
1658 return BPP_INVALID0;
1659 else if (MaxLinkBPP >= MaxDSCBPP)
1660 return MaxDSCBPP;
1661 else
1662 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 } else {
1664 if (MaxLinkBPP >= NonDSCBPP3)
1665 return NonDSCBPP3;
1666 else if (MaxLinkBPP >= NonDSCBPP2)
1667 return NonDSCBPP2;
1668 else if (MaxLinkBPP >= NonDSCBPP1)
1669 return NonDSCBPP1;
1670 else if (MaxLinkBPP >= NonDSCBPP0)
1671 return 16.0;
1672 else
1673 return BPP_INVALID0;
1674 }
1675 } else {
1676 if (!((DSCEnable == false0 && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1677 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1678 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1679 return BPP_INVALID0;
1680 else
1681 return DesiredBPP;
1682 }
1683
1684 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1685
1686 return BPP_INVALID0;
1687} // TruncToValidBPP
1688
1689double dml32_RequiredDTBCLK(
1690 bool_Bool DSCEnable,
1691 double PixelClock,
1692 enum output_format_class OutputFormat,
1693 double OutputBpp,
1694 unsigned int DSCSlices,
1695 unsigned int HTotal,
1696 unsigned int HActive,
1697 unsigned int AudioRate,
1698 unsigned int AudioLayout)
1699{
1700 double PixelWordRate;
1701 double HCActive;
1702 double HCBlank;
1703 double AverageTribyteRate;
1704 double HActiveTribyteRate;
1705
1706 if (DSCEnable != true1)
1707 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1708
1709 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1710 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1711 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1712 HCBlank = 64 + 32 *
1713 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1714 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1715 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1716 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1717}
1718
1719unsigned int dml32_DSCDelayRequirement(bool_Bool DSCEnabled,
1720 enum odm_combine_mode ODMMode,
1721 unsigned int DSCInputBitPerComponent,
1722 double OutputBpp,
1723 unsigned int HActive,
1724 unsigned int HTotal,
1725 unsigned int NumberOfDSCSlices,
1726 enum output_format_class OutputFormat,
1727 enum output_encoder_class Output,
1728 double PixelClock,
1729 double PixelClockBackEnd,
1730 double dsc_delay_factor_wa)
1731{
1732 unsigned int DSCDelayRequirement_val;
1733
1734 if (DSCEnabled == true1 && OutputBpp != 0) {
1735 if (ODMMode == dm_odm_combine_mode_4to1) {
1736 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1740 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1743 } else {
1744 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1745 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1746 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1747 }
1748
1749 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1750 dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1751
1752 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1753
1754 } else {
1755 DSCDelayRequirement_val = 0;
1756 }
1757
1758#ifdef __DML_VBA_DEBUG__
1759 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled){do { } while(0); };
1760 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp){do { } while(0); };
1761 dml_print("DML::%s: HActive = %d\n", __func__, HActive){do { } while(0); };
1762 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat){do { } while(0); };
1763 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent){do { } while(0); };
1764 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices){do { } while(0); };
1765 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val){do { } while(0); };
1766#endif
1767
1768 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1769}
1770
1771void dml32_CalculateSurfaceSizeInMall(
1772 unsigned int NumberOfActiveSurfaces,
1773 unsigned int MALLAllocatedForDCN,
1774 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1775 bool_Bool DCCEnable[],
1776 bool_Bool ViewportStationary[],
1777 unsigned int ViewportXStartY[],
1778 unsigned int ViewportYStartY[],
1779 unsigned int ViewportXStartC[],
1780 unsigned int ViewportYStartC[],
1781 unsigned int ViewportWidthY[],
1782 unsigned int ViewportHeightY[],
1783 unsigned int BytesPerPixelY[],
1784 unsigned int ViewportWidthC[],
1785 unsigned int ViewportHeightC[],
1786 unsigned int BytesPerPixelC[],
1787 unsigned int SurfaceWidthY[],
1788 unsigned int SurfaceWidthC[],
1789 unsigned int SurfaceHeightY[],
1790 unsigned int SurfaceHeightC[],
1791 unsigned int Read256BytesBlockWidthY[],
1792 unsigned int Read256BytesBlockWidthC[],
1793 unsigned int Read256BytesBlockHeightY[],
1794 unsigned int Read256BytesBlockHeightC[],
1795 unsigned int ReadBlockWidthY[],
1796 unsigned int ReadBlockWidthC[],
1797 unsigned int ReadBlockHeightY[],
1798 unsigned int ReadBlockHeightC[],
1799
1800 /* Output */
1801 unsigned int SurfaceSizeInMALL[],
1802 bool_Bool *ExceededMALLSize)
1803{
1804 unsigned int TotalSurfaceSizeInMALL = 0;
1805 unsigned int k;
1806
1807 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1808 if (ViewportStationary[k]) {
1809 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1810 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1811 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1812 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1813 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1814 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1815 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1816
1817 if (ReadBlockWidthC[k] > 0) {
1818 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1819 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1820 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1821 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1822 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1823 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1824 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1825 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1826 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1827 BytesPerPixelC[k];
1828 }
1829 if (DCCEnable[k] == true1) {
1830 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1831 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1832 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1833 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1834 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1835 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1836 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1837 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1838 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1839 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1840 if (Read256BytesBlockWidthC[k] > 0) {
1841 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1842 dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1843 Read256BytesBlockWidthC[k]),
1844 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1845 * Read256BytesBlockWidthC[k] - 1, 8 *
1846 Read256BytesBlockWidthC[k]) -
1847 dml_floor(ViewportXStartC[k], 8 *
1848 Read256BytesBlockWidthC[k])) *
1849 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1850 Read256BytesBlockHeightC[k]),
1851 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1852 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1853 Read256BytesBlockHeightC[k]) -
1854 dml_floor(ViewportYStartC[k], 8 *
1855 Read256BytesBlockHeightC[k])) *
1856 BytesPerPixelC[k] / 256;
1857 }
1858 }
1859 } else {
1860 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1861 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1862 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1863 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1864 BytesPerPixelY[k];
1865 if (ReadBlockWidthC[k] > 0) {
1866 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1867 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1868 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1869 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1870 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1871 BytesPerPixelC[k];
1872 }
1873 if (DCCEnable[k] == true1) {
1874 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1875 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1876 Read256BytesBlockWidthY[k] - 1), 8 *
1877 Read256BytesBlockWidthY[k]) *
1878 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1879 Read256BytesBlockHeightY[k] - 1), 8 *
1880 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1881
1882 if (Read256BytesBlockWidthC[k] > 0) {
1883 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1884 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1885 Read256BytesBlockWidthC[k] - 1), 8 *
1886 Read256BytesBlockWidthC[k]) *
1887 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1888 Read256BytesBlockHeightC[k] - 1), 8 *
1889 Read256BytesBlockHeightC[k]) *
1890 BytesPerPixelC[k] / 256;
1891 }
1892 }
1893 }
1894 }
1895
1896 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1897 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1898 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1899 }
1900 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
1901} // CalculateSurfaceSizeInMall
1902
1903void dml32_CalculateVMRowAndSwath(
1904 unsigned int NumberOfActiveSurfaces,
1905 DmlPipe myPipe[],
1906 unsigned int SurfaceSizeInMALL[],
1907 unsigned int PTEBufferSizeInRequestsLuma,
1908 unsigned int PTEBufferSizeInRequestsChroma,
1909 unsigned int DCCMetaBufferSizeBytes,
1910 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1911 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1912 unsigned int MALLAllocatedForDCN,
1913 double SwathWidthY[],
1914 double SwathWidthC[],
1915 bool_Bool GPUVMEnable,
1916 bool_Bool HostVMEnable,
1917 unsigned int HostVMMaxNonCachedPageTableLevels,
1918 unsigned int GPUVMMaxPageTableLevels,
1919 unsigned int GPUVMMinPageSizeKBytes[],
1920 unsigned int HostVMMinPageSize,
1921
1922 /* Output */
1923 bool_Bool PTEBufferSizeNotExceeded[],
1924 bool_Bool DCCMetaBufferSizeNotExceeded[],
1925 unsigned int dpte_row_width_luma_ub[],
1926 unsigned int dpte_row_width_chroma_ub[],
1927 unsigned int dpte_row_height_luma[],
1928 unsigned int dpte_row_height_chroma[],
1929 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1930 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1931 unsigned int meta_req_width[],
1932 unsigned int meta_req_width_chroma[],
1933 unsigned int meta_req_height[],
1934 unsigned int meta_req_height_chroma[],
1935 unsigned int meta_row_width[],
1936 unsigned int meta_row_width_chroma[],
1937 unsigned int meta_row_height[],
1938 unsigned int meta_row_height_chroma[],
1939 unsigned int vm_group_bytes[],
1940 unsigned int dpte_group_bytes[],
1941 unsigned int PixelPTEReqWidthY[],
1942 unsigned int PixelPTEReqHeightY[],
1943 unsigned int PTERequestSizeY[],
1944 unsigned int PixelPTEReqWidthC[],
1945 unsigned int PixelPTEReqHeightC[],
1946 unsigned int PTERequestSizeC[],
1947 unsigned int dpde0_bytes_per_frame_ub_l[],
1948 unsigned int meta_pte_bytes_per_frame_ub_l[],
1949 unsigned int dpde0_bytes_per_frame_ub_c[],
1950 unsigned int meta_pte_bytes_per_frame_ub_c[],
1951 double PrefetchSourceLinesY[],
1952 double PrefetchSourceLinesC[],
1953 double VInitPreFillY[],
1954 double VInitPreFillC[],
1955 unsigned int MaxNumSwathY[],
1956 unsigned int MaxNumSwathC[],
1957 double meta_row_bw[],
1958 double dpte_row_bw[],
1959 double PixelPTEBytesPerRow[],
1960 double PDEAndMetaPTEBytesFrame[],
1961 double MetaRowByte[],
1962 bool_Bool use_one_row_for_frame[],
1963 bool_Bool use_one_row_for_frame_flip[],
1964 bool_Bool UsesMALLForStaticScreen[],
1965 bool_Bool PTE_BUFFER_MODE[],
1966 unsigned int BIGK_FRAGMENT_SIZE[])
1967{
1968 unsigned int k;
1969 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX8];
1970 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX8];
1971 unsigned int PDEAndMetaPTEBytesFrameY;
1972 unsigned int PDEAndMetaPTEBytesFrameC;
1973 unsigned int MetaRowByteY[DC__NUM_DPP__MAX8];
1974 unsigned int MetaRowByteC[DC__NUM_DPP__MAX8];
1975 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX8];
1976 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX8];
1977 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX8];
1978 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX8];
1979 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX8];
1980 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX8];
1981 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX8];
1982 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX8];
1983 bool_Bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX8];
1984
1985 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1986 if (HostVMEnable == true1) {
1987 vm_group_bytes[k] = 512;
1988 dpte_group_bytes[k] = 512;
1989 } else if (GPUVMEnable == true1) {
1990 vm_group_bytes[k] = 2048;
1991 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1992 dpte_group_bytes[k] = 512;
1993 else
1994 dpte_group_bytes[k] = 2048;
1995 } else {
1996 vm_group_bytes[k] = 0;
1997 dpte_group_bytes[k] = 0;
1998 }
1999
2000 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2001 myPipe[k].SourcePixelFormat == dm_420_12 ||
2002 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2003 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2004 !IsVertical(myPipe[k].SourceRotation)) {
2005 PTEBufferSizeInRequestsForLuma[k] =
2006 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2007 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2008 } else {
2009 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2010 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2011 }
2012
2013 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2014 myPipe[k].ViewportStationary,
2015 myPipe[k].DCCEnable,
2016 myPipe[k].DPPPerSurface,
2017 myPipe[k].BlockHeight256BytesC,
2018 myPipe[k].BlockWidth256BytesC,
2019 myPipe[k].SourcePixelFormat,
2020 myPipe[k].SurfaceTiling,
2021 myPipe[k].BytePerPixelC,
2022 myPipe[k].SourceRotation,
2023 SwathWidthC[k],
2024 myPipe[k].ViewportHeightChroma,
2025 myPipe[k].ViewportXStartC,
2026 myPipe[k].ViewportYStartC,
2027 GPUVMEnable,
2028 HostVMEnable,
2029 HostVMMaxNonCachedPageTableLevels,
2030 GPUVMMaxPageTableLevels,
2031 GPUVMMinPageSizeKBytes[k],
2032 HostVMMinPageSize,
2033 PTEBufferSizeInRequestsForChroma[k],
2034 myPipe[k].PitchC,
2035 myPipe[k].DCCMetaPitchC,
2036 myPipe[k].BlockWidthC,
2037 myPipe[k].BlockHeightC,
2038
2039 /* Output */
2040 &MetaRowByteC[k],
2041 &PixelPTEBytesPerRowC[k],
2042 &dpte_row_width_chroma_ub[k],
2043 &dpte_row_height_chroma[k],
2044 &dpte_row_height_linear_chroma[k],
2045 &PixelPTEBytesPerRowC_one_row_per_frame[k],
2046 &dpte_row_width_chroma_ub_one_row_per_frame[k],
2047 &dpte_row_height_chroma_one_row_per_frame[k],
2048 &meta_req_width_chroma[k],
2049 &meta_req_height_chroma[k],
2050 &meta_row_width_chroma[k],
2051 &meta_row_height_chroma[k],
2052 &PixelPTEReqWidthC[k],
2053 &PixelPTEReqHeightC[k],
2054 &PTERequestSizeC[k],
2055 &dpde0_bytes_per_frame_ub_c[k],
2056 &meta_pte_bytes_per_frame_ub_c[k]);
2057
2058 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2059 myPipe[k].VRatioChroma,
2060 myPipe[k].VTapsChroma,
2061 myPipe[k].InterlaceEnable,
2062 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2063 myPipe[k].SwathHeightC,
2064 myPipe[k].SourceRotation,
2065 myPipe[k].ViewportStationary,
2066 SwathWidthC[k],
2067 myPipe[k].ViewportHeightChroma,
2068 myPipe[k].ViewportXStartC,
2069 myPipe[k].ViewportYStartC,
2070
2071 /* Output */
2072 &VInitPreFillC[k],
2073 &MaxNumSwathC[k]);
2074 } else {
2075 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2076 PTEBufferSizeInRequestsForChroma[k] = 0;
2077 PixelPTEBytesPerRowC[k] = 0;
2078 PDEAndMetaPTEBytesFrameC = 0;
2079 MetaRowByteC[k] = 0;
2080 MaxNumSwathC[k] = 0;
2081 PrefetchSourceLinesC[k] = 0;
2082 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2083 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2084 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2085 }
2086
2087 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2088 myPipe[k].ViewportStationary,
2089 myPipe[k].DCCEnable,
2090 myPipe[k].DPPPerSurface,
2091 myPipe[k].BlockHeight256BytesY,
2092 myPipe[k].BlockWidth256BytesY,
2093 myPipe[k].SourcePixelFormat,
2094 myPipe[k].SurfaceTiling,
2095 myPipe[k].BytePerPixelY,
2096 myPipe[k].SourceRotation,
2097 SwathWidthY[k],
2098 myPipe[k].ViewportHeight,
2099 myPipe[k].ViewportXStart,
2100 myPipe[k].ViewportYStart,
2101 GPUVMEnable,
2102 HostVMEnable,
2103 HostVMMaxNonCachedPageTableLevels,
2104 GPUVMMaxPageTableLevels,
2105 GPUVMMinPageSizeKBytes[k],
2106 HostVMMinPageSize,
2107 PTEBufferSizeInRequestsForLuma[k],
2108 myPipe[k].PitchY,
2109 myPipe[k].DCCMetaPitchY,
2110 myPipe[k].BlockWidthY,
2111 myPipe[k].BlockHeightY,
2112
2113 /* Output */
2114 &MetaRowByteY[k],
2115 &PixelPTEBytesPerRowY[k],
2116 &dpte_row_width_luma_ub[k],
2117 &dpte_row_height_luma[k],
2118 &dpte_row_height_linear_luma[k],
2119 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2120 &dpte_row_width_luma_ub_one_row_per_frame[k],
2121 &dpte_row_height_luma_one_row_per_frame[k],
2122 &meta_req_width[k],
2123 &meta_req_height[k],
2124 &meta_row_width[k],
2125 &meta_row_height[k],
2126 &PixelPTEReqWidthY[k],
2127 &PixelPTEReqHeightY[k],
2128 &PTERequestSizeY[k],
2129 &dpde0_bytes_per_frame_ub_l[k],
2130 &meta_pte_bytes_per_frame_ub_l[k]);
2131
2132 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2133 myPipe[k].VRatio,
2134 myPipe[k].VTaps,
2135 myPipe[k].InterlaceEnable,
2136 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2137 myPipe[k].SwathHeightY,
2138 myPipe[k].SourceRotation,
2139 myPipe[k].ViewportStationary,
2140 SwathWidthY[k],
2141 myPipe[k].ViewportHeight,
2142 myPipe[k].ViewportXStart,
2143 myPipe[k].ViewportYStart,
2144
2145 /* Output */
2146 &VInitPreFillY[k],
2147 &MaxNumSwathY[k]);
2148
2149 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2150 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2151
2152 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2153 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2154 PTEBufferSizeNotExceeded[k] = true1;
2155 } else {
2156 PTEBufferSizeNotExceeded[k] = false0;
2157 }
2158
2159 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2160 PTEBufferSizeInRequestsForLuma[k] &&
2161 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2162 }
2163
2164 dml32_CalculateMALLUseForStaticScreen(
2165 NumberOfActiveSurfaces,
2166 MALLAllocatedForDCN,
2167 UseMALLForStaticScreen, // mode
2168 SurfaceSizeInMALL,
2169 one_row_per_frame_fits_in_buffer,
2170 /* Output */
2171 UsesMALLForStaticScreen); // boolen
2172
2173 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2174 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2175 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2176 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2177 (GPUVMMinPageSizeKBytes[k] > 64);
2178 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2179 }
2180
2181 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2182#ifdef __DML_VBA_DEBUG__
2183 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]){do { } while(0); };
2184 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]){do { } while(0); };
2185#endif
2186 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2187 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2188 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2189 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2190
2191 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2192 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2193
2194 if (use_one_row_for_frame[k]) {
2195 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2196 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2197 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2198 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2199 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2200 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2201 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2202 }
2203
2204 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2205 DCCMetaBufferSizeNotExceeded[k] = true1;
2206 else
2207 DCCMetaBufferSizeNotExceeded[k] = false0;
2208
2209 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2210 if (use_one_row_for_frame[k])
2211 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2212
2213 dml32_CalculateRowBandwidth(
2214 GPUVMEnable,
2215 myPipe[k].SourcePixelFormat,
2216 myPipe[k].VRatio,
2217 myPipe[k].VRatioChroma,
2218 myPipe[k].DCCEnable,
2219 myPipe[k].HTotal / myPipe[k].PixelClock,
2220 MetaRowByteY[k], MetaRowByteC[k],
2221 meta_row_height[k],
2222 meta_row_height_chroma[k],
2223 PixelPTEBytesPerRowY[k],
2224 PixelPTEBytesPerRowC[k],
2225 dpte_row_height_luma[k],
2226 dpte_row_height_chroma[k],
2227
2228 /* Output */
2229 &meta_row_bw[k],
2230 &dpte_row_bw[k]);
2231#ifdef __DML_VBA_DEBUG__
2232 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]){do { } while(0); };
2233 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",{do { } while(0); }
2234 __func__, k, use_one_row_for_frame_flip[k]){do { } while(0); };
2235 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",{do { } while(0); }
2236 __func__, k, UseMALLForPStateChange[k]){do { } while(0); };
2237 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]){do { } while(0); };
2238 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",{do { } while(0); }
2239 __func__, k, dpte_row_width_luma_ub[k]){do { } while(0); };
2240 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]){do { } while(0); };
2241 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",{do { } while(0); }
2242 __func__, k, dpte_row_height_chroma[k]){do { } while(0); };
2243 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",{do { } while(0); }
2244 __func__, k, dpte_row_width_chroma_ub[k]){do { } while(0); };
2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]){do { } while(0); };
2246 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]){do { } while(0); };
2247 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",{do { } while(0); }
2248 __func__, k, PTEBufferSizeNotExceeded[k]){do { } while(0); };
2249 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]){do { } while(0); };
2250 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]){do { } while(0); };
2251#endif
2252 }
2253} // CalculateVMRowAndSwath
2254
2255unsigned int dml32_CalculateVMAndRowBytes(
2256 bool_Bool ViewportStationary,
2257 bool_Bool DCCEnable,
2258 unsigned int NumberOfDPPs,
2259 unsigned int BlockHeight256Bytes,
2260 unsigned int BlockWidth256Bytes,
2261 enum source_format_class SourcePixelFormat,
2262 unsigned int SurfaceTiling,
2263 unsigned int BytePerPixel,
2264 enum dm_rotation_angle SourceRotation,
2265 double SwathWidth,
2266 unsigned int ViewportHeight,
2267 unsigned int ViewportXStart,
2268 unsigned int ViewportYStart,
2269 bool_Bool GPUVMEnable,
2270 bool_Bool HostVMEnable,
2271 unsigned int HostVMMaxNonCachedPageTableLevels,
2272 unsigned int GPUVMMaxPageTableLevels,
2273 unsigned int GPUVMMinPageSizeKBytes,
2274 unsigned int HostVMMinPageSize,
2275 unsigned int PTEBufferSizeInRequests,
2276 unsigned int Pitch,
2277 unsigned int DCCMetaPitch,
2278 unsigned int MacroTileWidth,
2279 unsigned int MacroTileHeight,
2280
2281 /* Output */
2282 unsigned int *MetaRowByte,
2283 unsigned int *PixelPTEBytesPerRow,
2284 unsigned int *dpte_row_width_ub,
2285 unsigned int *dpte_row_height,
2286 unsigned int *dpte_row_height_linear,
2287 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2288 unsigned int *dpte_row_width_ub_one_row_per_frame,
2289 unsigned int *dpte_row_height_one_row_per_frame,
2290 unsigned int *MetaRequestWidth,
2291 unsigned int *MetaRequestHeight,
2292 unsigned int *meta_row_width,
2293 unsigned int *meta_row_height,
2294 unsigned int *PixelPTEReqWidth,
2295 unsigned int *PixelPTEReqHeight,
2296 unsigned int *PTERequestSize,
2297 unsigned int *DPDE0BytesFrame,
2298 unsigned int *MetaPTEBytesFrame)
2299{
2300 unsigned int MPDEBytesFrame;
2301 unsigned int DCCMetaSurfaceBytes;
2302 unsigned int ExtraDPDEBytesFrame;
2303 unsigned int PDEAndMetaPTEBytesFrame;
2304 unsigned int HostVMDynamicLevels = 0;
2305 unsigned int MacroTileSizeBytes;
2306 unsigned int vp_height_meta_ub;
2307 unsigned int vp_height_dpte_ub;
2308 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2309
2310 if (GPUVMEnable == true1 && HostVMEnable == true1) {
2311 if (HostVMMinPageSize < 2048)
2312 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2313 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2314 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2315 else
2316 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2317 }
2318
2319 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2320 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2321 if (SurfaceTiling == dm_sw_linear) {
2322 *meta_row_height = 32;
2323 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2324 - dml_floor(ViewportXStart, *MetaRequestWidth);
2325 } else if (!IsVertical(SourceRotation)) {
2326 *meta_row_height = *MetaRequestHeight;
2327 if (ViewportStationary && NumberOfDPPs == 1) {
2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2329 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2330 } else {
2331 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2332 }
2333 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2334 } else {
2335 *meta_row_height = *MetaRequestWidth;
2336 if (ViewportStationary && NumberOfDPPs == 1) {
2337 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2338 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2339 } else {
2340 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2341 }
2342 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2343 }
2344
2345 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2346 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2347 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2348 } else if (!IsVertical(SourceRotation)) {
2349 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2350 } else {
2351 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2352 }
2353
2354 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2355
2356 if (GPUVMEnable == true1) {
2357 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2358 (8 * 4.0 * 1024), 1) + 1) * 64;
2359 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2360 } else {
2361 *MetaPTEBytesFrame = 0;
2362 MPDEBytesFrame = 0;
2363 }
2364
2365 if (DCCEnable != true1) {
2366 *MetaPTEBytesFrame = 0;
2367 MPDEBytesFrame = 0;
2368 *MetaRowByte = 0;
2369 }
2370
2371 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2372
2373 if (GPUVMEnable == true1 && GPUVMMaxPageTableLevels > 1) {
2374 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2375 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2376 MacroTileHeight - 1, MacroTileHeight) -
2377 dml_floor(ViewportYStart, MacroTileHeight);
2378 } else if (!IsVertical(SourceRotation)) {
2379 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2380 } else {
2381 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2382 }
2383 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2384 (8 * 2097152), 1) + 1);
2385 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2386 } else {
2387 *DPDE0BytesFrame = 0;
2388 ExtraDPDEBytesFrame = 0;
2389 vp_height_dpte_ub = 0;
2390 }
2391
2392 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2393
2394#ifdef __DML_VBA_DEBUG__
2395 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable){do { } while(0); };
2396 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable){do { } while(0); };
2397 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear){do { } while(0); };
2398 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel){do { } while(0); };
2399 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels){do { } while(0); };
2400 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes){do { } while(0); };
2401 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes){do { } while(0); };
2402 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight){do { } while(0); };
2403 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth){do { } while(0); };
2404 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame){do { } while(0); };
2405 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame){do { } while(0); };
2406 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame){do { } while(0); };
2407 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame){do { } while(0); };
2408 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); };
2409 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight){do { } while(0); };
2410 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth){do { } while(0); };
2411 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub){do { } while(0); };
2412#endif
2413
2414 if (HostVMEnable == true1)
2415 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2416
2417 if (SurfaceTiling == dm_sw_linear) {
2418 *PixelPTEReqHeight = 1;
2419 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2420 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2421 *PTERequestSize = 64;
2422 } else if (GPUVMMinPageSizeKBytes == 4) {
2423 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2424 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2425 *PTERequestSize = 128;
2426 } else {
2427 *PixelPTEReqHeight = MacroTileHeight;
2428 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2429 *PTERequestSize = 64;
2430 }
2431#ifdef __DML_VBA_DEBUG__
2432 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes){do { } while(0); };
2433 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); };
2434 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight){do { } while(0); };
2435 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth){do { } while(0); };
2436 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear){do { } while(0); };
2437 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize){do { } while(0); };
2438 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch){do { } while(0); };
2439#endif
2440
2441 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2442 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2443 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2444 (double) *PixelPTEReqWidth;
2445 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2446 *PTERequestSize;
2447
2448 if (SurfaceTiling == dm_sw_linear) {
2449 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2450 *PixelPTEReqWidth / Pitch), 1));
2451#ifdef __DML_VBA_DEBUG__
2452 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,{do { } while(0); }
2453 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch){do { } while(0); };
2454 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,{do { } while(0); }
2455 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)){do { } while(0); };
2456 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,{do { } while(0); }
2457 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)){do { } while(0); };
2458 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,{do { } while(0); }
2459 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *{do { } while(0); }
2460 *PixelPTEReqWidth / Pitch), 1)){do { } while(0); };
2461 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height){do { } while(0); };
2462#endif
2463 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2464 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2465 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2466
2467 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2468 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2469 PixelPTEReqWidth_linear / Pitch), 1);
2470 if (*dpte_row_height_linear > 128)
2471 *dpte_row_height_linear = 128;
2472
2473 } else if (!IsVertical(SourceRotation)) {
2474 *dpte_row_height = *PixelPTEReqHeight;
2475
2476 if (GPUVMMinPageSizeKBytes > 64) {
2477 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2478 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2479 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2480 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2481 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2482 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2483 } else {
2484 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2485 *PixelPTEReqWidth;
2486 }
2487
2488 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2489 } else {
2490 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2491
2492 if (ViewportStationary && (NumberOfDPPs == 1)) {
2493 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2494 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2495 } else {
2496 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2497 * *PixelPTEReqHeight;
2498 }
2499
2500 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2501 }
2502
2503 if (GPUVMEnable != true1)
2504 *PixelPTEBytesPerRow = 0;
2505 if (HostVMEnable == true1)
2506 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2507
2508#ifdef __DML_VBA_DEBUG__
2509 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes){do { } while(0); };
2510 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height){do { } while(0); };
2511 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear){do { } while(0); };
2512 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub){do { } while(0); };
2513 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow){do { } while(0); };
2514 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests){do { } while(0); };
2515 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame){do { } while(0); };
2516 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",{do { } while(0); }
2517 __func__, *dpte_row_width_ub_one_row_per_frame){do { } while(0); };
2518 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",{do { } while(0); }
2519 __func__, *PixelPTEBytesPerRow_one_row_per_frame){do { } while(0); };
2520 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",{do { } while(0); }
2521 *MetaPTEBytesFrame){do { } while(0); };
2522#endif
2523
2524 return PDEAndMetaPTEBytesFrame;
2525} // CalculateVMAndRowBytes
2526
2527double dml32_CalculatePrefetchSourceLines(
2528 double VRatio,
2529 unsigned int VTaps,
2530 bool_Bool Interlace,
2531 bool_Bool ProgressiveToInterlaceUnitInOPP,
2532 unsigned int SwathHeight,
2533 enum dm_rotation_angle SourceRotation,
2534 bool_Bool ViewportStationary,
2535 double SwathWidth,
2536 unsigned int ViewportHeight,
2537 unsigned int ViewportXStart,
2538 unsigned int ViewportYStart,
2539
2540 /* Output */
2541 double *VInitPreFill,
2542 unsigned int *MaxNumSwath)
2543{
2544
2545 unsigned int vp_start_rot;
2546 unsigned int sw0_tmp;
2547 unsigned int MaxPartialSwath;
2548 double numLines;
2549
2550#ifdef __DML_VBA_DEBUG__
2551 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio){do { } while(0); };
2552 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps){do { } while(0); };
2553 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart){do { } while(0); };
2554 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart){do { } while(0); };
2555 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary){do { } while(0); };
2556 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight){do { } while(0); };
2557#endif
2558 if (ProgressiveToInterlaceUnitInOPP)
2559 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2560 else
2561 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2562
2563 if (ViewportStationary) {
2564 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2565 vp_start_rot = SwathHeight -
2566 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2567 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2568 vp_start_rot = ViewportXStart;
2569 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2570 vp_start_rot = SwathHeight -
2571 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2572 } else {
2573 vp_start_rot = ViewportYStart;
2574 }
2575 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2576 if (sw0_tmp < *VInitPreFill)
2577 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2578 else
2579 *MaxNumSwath = 1;
2580 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2581 } else {
2582 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2583 if (*VInitPreFill > 1)
2584 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2585 else
2586 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2587 }
2588 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2589
2590#ifdef __DML_VBA_DEBUG__
2591 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot){do { } while(0); };
2592 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill){do { } while(0); };
2593 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath){do { } while(0); };
2594 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath){do { } while(0); };
2595 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines){do { } while(0); };
2596#endif
2597 return numLines;
2598
2599} // CalculatePrefetchSourceLines
2600
2601void dml32_CalculateMALLUseForStaticScreen(
2602 unsigned int NumberOfActiveSurfaces,
2603 unsigned int MALLAllocatedForDCNFinal,
2604 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2605 unsigned int SurfaceSizeInMALL[],
2606 bool_Bool one_row_per_frame_fits_in_buffer[],
2607
2608 /* output */
2609 bool_Bool UsesMALLForStaticScreen[])
2610{
2611 unsigned int k;
2612 unsigned int SurfaceToAddToMALL;
2613 bool_Bool CanAddAnotherSurfaceToMALL;
2614 unsigned int TotalSurfaceSizeInMALL;
2615
2616 TotalSurfaceSizeInMALL = 0;
2617 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2618 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2619 if (UsesMALLForStaticScreen[k])
2620 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2621#ifdef __DML_VBA_DEBUG__
2622 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]){do { } while(0); };
2623 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL){do { } while(0); };
2624#endif
2625 }
2626
2627 SurfaceToAddToMALL = 0;
2628 CanAddAnotherSurfaceToMALL = true1;
2629 while (CanAddAnotherSurfaceToMALL) {
2630 CanAddAnotherSurfaceToMALL = false0;
2631 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2632 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2633 !UsesMALLForStaticScreen[k] &&
2634 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2635 one_row_per_frame_fits_in_buffer[k] &&
2636 (!CanAddAnotherSurfaceToMALL ||
2637 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2638 CanAddAnotherSurfaceToMALL = true1;
2639 SurfaceToAddToMALL = k;
2640#ifdef __DML_VBA_DEBUG__
2641 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",{do { } while(0); }
2642 __func__, k, UseMALLForStaticScreen[k]){do { } while(0); };
2643#endif
2644 }
2645 }
2646 if (CanAddAnotherSurfaceToMALL) {
2647 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true1;
2648 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2649
2650#ifdef __DML_VBA_DEBUG__
2651 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL){do { } while(0); };
2652 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL){do { } while(0); };
2653#endif
2654
2655 }
2656 }
2657}
2658
2659void dml32_CalculateRowBandwidth(
2660 bool_Bool GPUVMEnable,
2661 enum source_format_class SourcePixelFormat,
2662 double VRatio,
2663 double VRatioChroma,
2664 bool_Bool DCCEnable,
2665 double LineTime,
2666 unsigned int MetaRowByteLuma,
2667 unsigned int MetaRowByteChroma,
2668 unsigned int meta_row_height_luma,
2669 unsigned int meta_row_height_chroma,
2670 unsigned int PixelPTEBytesPerRowLuma,
2671 unsigned int PixelPTEBytesPerRowChroma,
2672 unsigned int dpte_row_height_luma,
2673 unsigned int dpte_row_height_chroma,
2674 /* Output */
2675 double *meta_row_bw,
2676 double *dpte_row_bw)
2677{
2678 if (DCCEnable != true1) {
2679 *meta_row_bw = 0;
2680 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2681 SourcePixelFormat == dm_rgbe_alpha) {
2682 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2683 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2684 } else {
2685 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2686 }
2687
2688 if (GPUVMEnable != true1) {
2689 *dpte_row_bw = 0;
2690 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2691 SourcePixelFormat == dm_rgbe_alpha) {
2692 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2693 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2694 } else {
2695 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2696 }
2697}
2698
2699double dml32_CalculateUrgentLatency(
2700 double UrgentLatencyPixelDataOnly,
2701 double UrgentLatencyPixelMixedWithVMData,
2702 double UrgentLatencyVMDataOnly,
2703 bool_Bool DoUrgentLatencyAdjustment,
2704 double UrgentLatencyAdjustmentFabricClockComponent,
2705 double UrgentLatencyAdjustmentFabricClockReference,
2706 double FabricClock)
2707{
2708 double ret;
2709
2710 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2711 if (DoUrgentLatencyAdjustment == true1) {
2712 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2713 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2714 }
2715 return ret;
2716}
2717
2718void dml32_CalculateUrgentBurstFactor(
2719 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2720 unsigned int swath_width_luma_ub,
2721 unsigned int swath_width_chroma_ub,
2722 unsigned int SwathHeightY,
2723 unsigned int SwathHeightC,
2724 double LineTime,
2725 double UrgentLatency,
2726 double CursorBufferSize,
2727 unsigned int CursorWidth,
2728 unsigned int CursorBPP,
2729 double VRatio,
2730 double VRatioC,
2731 double BytePerPixelInDETY,
2732 double BytePerPixelInDETC,
2733 unsigned int DETBufferSizeY,
2734 unsigned int DETBufferSizeC,
2735 /* Output */
2736 double *UrgentBurstFactorCursor,
2737 double *UrgentBurstFactorLuma,
2738 double *UrgentBurstFactorChroma,
2739 bool_Bool *NotEnoughUrgentLatencyHiding)
2740{
2741 double LinesInDETLuma;
2742 double LinesInDETChroma;
2743 unsigned int LinesInCursorBuffer;
2744 double CursorBufferSizeInTime;
2745 double DETBufferSizeInTimeLuma;
2746 double DETBufferSizeInTimeChroma;
2747
2748 *NotEnoughUrgentLatencyHiding = 0;
2749
2750 if (CursorWidth > 0) {
2751 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2752 (CursorWidth * CursorBPP / 8.0)), 1.0);
2753 if (VRatio > 0) {
2754 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2755 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2756 *NotEnoughUrgentLatencyHiding = 1;
2757 *UrgentBurstFactorCursor = 0;
2758 } else {
2759 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2760 (CursorBufferSizeInTime - UrgentLatency);
2761 }
2762 } else {
2763 *UrgentBurstFactorCursor = 1;
2764 }
2765 }
2766
2767 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2768 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2769
2770 if (VRatio > 0) {
2771 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2772 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2773 *NotEnoughUrgentLatencyHiding = 1;
2774 *UrgentBurstFactorLuma = 0;
2775 } else {
2776 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2777 }
2778 } else {
2779 *UrgentBurstFactorLuma = 1;
2780 }
2781
2782 if (BytePerPixelInDETC > 0) {
2783 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2784 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2785 / swath_width_chroma_ub;
2786
2787 if (VRatio > 0) {
2788 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2789 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2790 *NotEnoughUrgentLatencyHiding = 1;
2791 *UrgentBurstFactorChroma = 0;
2792 } else {
2793 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2794 / (DETBufferSizeInTimeChroma - UrgentLatency);
2795 }
2796 } else {
2797 *UrgentBurstFactorChroma = 1;
2798 }
2799 }
2800} // CalculateUrgentBurstFactor
2801
2802void dml32_CalculateDCFCLKDeepSleep(
2803 unsigned int NumberOfActiveSurfaces,
2804 unsigned int BytePerPixelY[],
2805 unsigned int BytePerPixelC[],
2806 double VRatio[],
2807 double VRatioChroma[],
2808 double SwathWidthY[],
2809 double SwathWidthC[],
2810 unsigned int DPPPerSurface[],
2811 double HRatio[],
2812 double HRatioChroma[],
2813 double PixelClock[],
2814 double PSCL_THROUGHPUT[],
2815 double PSCL_THROUGHPUT_CHROMA[],
2816 double Dppclk[],
2817 double ReadBandwidthLuma[],
2818 double ReadBandwidthChroma[],
2819 unsigned int ReturnBusWidth,
2820
2821 /* Output */
2822 double *DCFClkDeepSleep)
2823{
2824 unsigned int k;
2825 double DisplayPipeLineDeliveryTimeLuma;
2826 double DisplayPipeLineDeliveryTimeChroma;
2827 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX8];
2828 double ReadBandwidth = 0.0;
2829
2830 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2831
2832 if (VRatio[k] <= 1) {
2833 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2834 / PixelClock[k];
2835 } else {
2836 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2837 }
2838 if (BytePerPixelC[k] == 0) {
2839 DisplayPipeLineDeliveryTimeChroma = 0;
2840 } else {
2841 if (VRatioChroma[k] <= 1) {
2842 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2843 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2844 } else {
2845 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2846 / Dppclk[k];
2847 }
2848 }
2849
2850 if (BytePerPixelC[k] > 0) {
2851 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__1.15 * SwathWidthY[k] *
2852 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2853 __DML_MIN_DCFCLK_FACTOR__1.15 * SwathWidthC[k] * BytePerPixelC[k] /
2854 32.0 / DisplayPipeLineDeliveryTimeChroma);
2855 } else {
2856 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__1.15 * SwathWidthY[k] * BytePerPixelY[k] /
2857 64.0 / DisplayPipeLineDeliveryTimeLuma;
2858 }
2859 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2860
2861#ifdef __DML_VBA_DEBUG__
2862 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]){do { } while(0); };
2863 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]){do { } while(0); };
2864#endif
2865 }
2866
2867 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2868 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2869
2870 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__1.15 * ReadBandwidth / (double) ReturnBusWidth);
2871
2872#ifdef __DML_VBA_DEBUG__
2873 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__){do { } while(0); };
2874 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth){do { } while(0); };
2875 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth){do { } while(0); };
2876 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep){do { } while(0); };
2877#endif
2878
2879 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2880 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2881#ifdef __DML_VBA_DEBUG__
2882 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep){do { } while(0); };
2883#endif
2884} // CalculateDCFCLKDeepSleep
2885
2886double dml32_CalculateWriteBackDelay(
2887 enum source_format_class WritebackPixelFormat,
2888 double WritebackHRatio,
2889 double WritebackVRatio,
2890 unsigned int WritebackVTaps,
2891 unsigned int WritebackDestinationWidth,
2892 unsigned int WritebackDestinationHeight,
2893 unsigned int WritebackSourceHeight,
2894 unsigned int HTotal)
2895{
2896 double CalculateWriteBackDelay;
2897 double Line_length;
2898 double Output_lines_last_notclamped;
2899 double WritebackVInit;
2900
2901 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2902 Line_length = dml_max((double) WritebackDestinationWidth,
2903 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2904 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2905 dml_ceil(((double)WritebackSourceHeight -
2906 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2907 if (Output_lines_last_notclamped < 0) {
2908 CalculateWriteBackDelay = 0;
2909 } else {
2910 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2911 (HTotal - WritebackDestinationWidth) + 80;
2912 }
2913 return CalculateWriteBackDelay;
2914}
2915
2916void dml32_UseMinimumDCFCLK(
2917 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2918 bool_Bool DRRDisplay[],
2919 bool_Bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2920 unsigned int MaxInterDCNTileRepeaters,
2921 unsigned int MaxPrefetchMode,
2922 double DRAMClockChangeLatencyFinal,
2923 double FCLKChangeLatency,
2924 double SREnterPlusExitTime,
2925 unsigned int ReturnBusWidth,
2926 unsigned int RoundTripPingLatencyCycles,
2927 unsigned int ReorderingBytes,
2928 unsigned int PixelChunkSizeInKByte,
2929 unsigned int MetaChunkSize,
2930 bool_Bool GPUVMEnable,
2931 unsigned int GPUVMMaxPageTableLevels,
2932 bool_Bool HostVMEnable,
2933 unsigned int NumberOfActiveSurfaces,
2934 double HostVMMinPageSize,
2935 unsigned int HostVMMaxNonCachedPageTableLevels,
2936 bool_Bool DynamicMetadataVMEnabled,
2937 bool_Bool ImmediateFlipRequirement,
2938 bool_Bool ProgressiveToInterlaceUnitInOPP,
2939 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2940 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2941 unsigned int VTotal[],
2942 unsigned int VActive[],
2943 unsigned int DynamicMetadataTransmittedBytes[],
2944 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2945 bool_Bool Interlace[],
2946 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX8],
2947 double RequiredDISPCLK[][2],
2948 double UrgLatency[],
2949 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX8],
2950 double ProjectedDCFClkDeepSleep[][2],
2951 double MaximumVStartup[][2][DC__NUM_DPP__MAX8],
2952 unsigned int TotalNumberOfActiveDPP[][2],
2953 unsigned int TotalNumberOfDCCActiveDPP[][2],
2954 unsigned int dpte_group_bytes[],
2955 double PrefetchLinesY[][2][DC__NUM_DPP__MAX8],
2956 double PrefetchLinesC[][2][DC__NUM_DPP__MAX8],
2957 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX8],
2958 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX8],
2959 unsigned int BytePerPixelY[],
2960 unsigned int BytePerPixelC[],
2961 unsigned int HTotal[],
2962 double PixelClock[],
2963 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX8],
2964 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX8],
2965 double MetaRowBytes[][2][DC__NUM_DPP__MAX8],
2966 bool_Bool DynamicMetadataEnable[],
2967 double ReadBandwidthLuma[],
2968 double ReadBandwidthChroma[],
2969 double DCFCLKPerState[],
2970 /* Output */
2971 double DCFCLKState[][2])
2972{
2973 unsigned int i, j, k;
2974 unsigned int dummy1;
2975 double dummy2, dummy3;
2976 double NormalEfficiency;
2977 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES20][2];
2978
2979 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2980 for (i = 0; i < DC__VOLTAGE_STATES20; ++i) {
2981 for (j = 0; j <= 1; ++j) {
2982 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX8];
2983 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX8];
2984 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX8];
2985 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX8];
2986 double MinimumTWait = 0.0;
2987 double DPTEBandwidth;
2988 double DCFCLKRequiredForAverageBandwidth;
2989 unsigned int ExtraLatencyBytes;
2990 double ExtraLatencyCycles;
2991 double DCFCLKRequiredForPeakBandwidth;
2992 unsigned int NoOfDPPState[DC__NUM_DPP__MAX8];
2993 double MinimumTvmPlus2Tr0;
2994
2995 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2996 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2997 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2998 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2999 / (15.75 * HTotal[k] / PixelClock[k]);
3000 }
3001
3002 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3003 NoOfDPPState[k] = NoOfDPP[i][j][k];
3004
3005 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3006 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3007
3008 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3009 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3010 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3011 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3012 HostVMMaxNonCachedPageTableLevels);
3013 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__7 + 95
3014 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3015 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3016 double DCFCLKCyclesRequiredInPrefetch;
3017 double PrefetchTime;
3018
3019 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3020 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3021 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3022 * BytePerPixelC[k]) / NormalEfficiency
3023 / ReturnBusWidth;
3024 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3025 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3026 / NormalEfficiency / ReturnBusWidth
3027 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3028 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3029 / ReturnBusWidth
3030 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3031 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3032 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3033 * HTotal[k] / PixelClock[k];
3034 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true1 &&
3035 DynamicMetadataEnable[k] == true1 && DynamicMetadataVMEnabled == true1) ?
3036 UrgLatency[i] * GPUVMMaxPageTableLevels *
3037 (HostVMEnable == true1 ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3038
3039 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3040 UseMALLForPStateChange[k],
3041 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3042 DRRDisplay[k],
3043 DRAMClockChangeLatencyFinal,
3044 FCLKChangeLatency,
3045 UrgLatency[i],
3046 SREnterPlusExitTime);
3047
3048 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3049 MinimumTWait - UrgLatency[i] *
3050 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3051 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true1 ?
3052 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3053 DynamicMetadataVMExtraLatency[k];
3054
3055 if (PrefetchTime > 0) {
3056 double ExpectedVRatioPrefetch;
3057
3058 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3059 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3060 DCFCLKCyclesRequiredInPrefetch);
3061 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3062 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3063 PrefetchPixelLinesTime[k] *
3064 dml_max(1.0, ExpectedVRatioPrefetch) *
3065 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3066 if (HostVMEnable == true1 || ImmediateFlipRequirement == true1) {
3067 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3068 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3069 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3070 NormalEfficiency / ReturnBusWidth;
3071 }
3072 } else {
3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3074 }
3075 if (DynamicMetadataEnable[k] == true1) {
3076 double TSetupPipe;
3077 double TdmbfPipe;
3078 double TdmsksPipe;
3079 double TdmecPipe;
3080 double AllowedTimeForUrgentExtraLatency;
3081
3082 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3083 MaxInterDCNTileRepeaters,
3084 RequiredDPPCLKPerSurface[i][j][k],
3085 RequiredDISPCLK[i][j],
3086 ProjectedDCFClkDeepSleep[i][j],
3087 PixelClock[k],
3088 HTotal[k],
3089 VTotal[k] - VActive[k],
3090 DynamicMetadataTransmittedBytes[k],
3091 DynamicMetadataLinesBeforeActiveRequired[k],
3092 Interlace[k],
3093 ProgressiveToInterlaceUnitInOPP,
3094
3095 /* output */
3096 &TSetupPipe,
3097 &TdmbfPipe,
3098 &TdmecPipe,
3099 &TdmsksPipe,
3100 &dummy1,
3101 &dummy2,
3102 &dummy3);
3103 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3104 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3105 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3106 if (AllowedTimeForUrgentExtraLatency > 0)
3107 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3108 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3109 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3110 else
3111 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3112 }
3113 }
3114 DCFCLKRequiredForPeakBandwidth = 0;
3115 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3116 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3117 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3118 }
3119 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true1 ?
3120 (HostVMEnable == true1 ? (GPUVMMaxPageTableLevels + 2) *
3121 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3122 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3123 double MaximumTvmPlus2Tr0PlusTsw;
3124
3125 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3126 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3127 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3128 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3129 } else {
3130 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3131 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3132 MinimumTvmPlus2Tr0 -
3133 PrefetchPixelLinesTime[k] / 4),
3134 (2 * ExtraLatencyCycles +
3135 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3136 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3137 }
3138 }
3139 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3140 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3141 }
3142 }
3143}
3144
3145unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3146 unsigned int TotalNumberOfActiveDPP,
3147 unsigned int PixelChunkSizeInKByte,
3148 unsigned int TotalNumberOfDCCActiveDPP,
3149 unsigned int MetaChunkSize,
3150 bool_Bool GPUVMEnable,
3151 bool_Bool HostVMEnable,
3152 unsigned int NumberOfActiveSurfaces,
3153 unsigned int NumberOfDPP[],
3154 unsigned int dpte_group_bytes[],
3155 double HostVMInefficiencyFactor,
3156 double HostVMMinPageSize,
3157 unsigned int HostVMMaxNonCachedPageTableLevels)
3158{
3159 unsigned int k;
3160 double ret;
3161 unsigned int HostVMDynamicLevels;
3162
3163 if (GPUVMEnable == true1 && HostVMEnable == true1) {
3164 if (HostVMMinPageSize < 2048)
3165 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3166 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3167 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3168 else
3169 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3170 } else {
3171 HostVMDynamicLevels = 0;
3172 }
3173
3174 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3175 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3176
3177 if (GPUVMEnable == true1) {
3178 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3179 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3180 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3181 }
3182 }
3183 return ret;
3184}
3185
3186void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3187 unsigned int MaxInterDCNTileRepeaters,
3188 double Dppclk,
3189 double Dispclk,
3190 double DCFClkDeepSleep,
3191 double PixelClock,
3192 unsigned int HTotal,
3193 unsigned int VBlank,
3194 unsigned int DynamicMetadataTransmittedBytes,
3195 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3196 unsigned int InterlaceEnable,
3197 bool_Bool ProgressiveToInterlaceUnitInOPP,
3198
3199 /* output */
3200 double *TSetup,
3201 double *Tdmbf,
3202 double *Tdmec,
3203 double *Tdmsks,
3204 unsigned int *VUpdateOffsetPix,
3205 double *VUpdateWidthPix,
3206 double *VReadyOffsetPix)
3207{
3208 double TotalRepeaterDelayTime;
3209
3210 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3211 *VUpdateWidthPix =
3212 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3213 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3214 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3215 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3216 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3217 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3218 *Tdmec = HTotal / PixelClock;
3219
3220 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3221 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3222 else
3223 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3224
3225 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false0)
3226 *Tdmsks = *Tdmsks / 2;
3227#ifdef __DML_VBA_DEBUG__
3228 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix){do { } while(0); };
3229 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix){do { } while(0); };
3230 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix){do { } while(0); };
3231
3232 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",{do { } while(0); }
3233 __func__, DynamicMetadataLinesBeforeActiveRequired){do { } while(0); };
3234 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank){do { } while(0); };
3235 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal){do { } while(0); };
3236 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock){do { } while(0); };
3237 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks){do { } while(0); };
3238#endif
3239}
3240
3241double dml32_CalculateTWait(
3242 unsigned int PrefetchMode,
3243 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3244 bool_Bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3245 bool_Bool DRRDisplay,
3246 double DRAMClockChangeLatency,
3247 double FCLKChangeLatency,
3248 double UrgentLatency,
3249 double SREnterPlusExitTime)
3250{
3251 double TWait = 0.0;
3252
3253 if (PrefetchMode == 0 &&
3254 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3255 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3256 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3257 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3258 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3259 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3260 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3261 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3262 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3263 } else {
3264 TWait = UrgentLatency;
3265 }
3266
3267#ifdef __DML_VBA_DEBUG__
3268 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode){do { } while(0); };
3269 dml_print("DML::%s: TWait = %f\n", __func__, TWait){do { } while(0); };
3270#endif
3271 return TWait;
3272} // CalculateTWait
3273
3274// Function: get_return_bw_mbps
3275// Megabyte per second
3276double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3277 const int VoltageLevel,
3278 const bool_Bool HostVMEnable,
3279 const double DCFCLK,
3280 const double FabricClock,
3281 const double DRAMSpeed)
3282{
3283 double ReturnBW = 0.;
3284 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3285 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3286 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3287 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3288 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3289 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3290 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3291 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3292 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3293 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3294 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3295
3296 if (HostVMEnable != true1)
3297 ReturnBW = PixelDataOnlyReturnBW;
3298 else
3299 ReturnBW = PixelMixedWithVMDataReturnBW;
3300
3301#ifdef __DML_VBA_DEBUG__
3302 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel){do { } while(0); };
3303 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable){do { } while(0); };
3304 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK){do { } while(0); };
3305 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock){do { } while(0); };
3306 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed){do { } while(0); };
3307 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth){do { } while(0); };
3308 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth){do { } while(0); };
3309 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth){do { } while(0); };
3310 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW){do { } while(0); };
3311 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW){do { } while(0); };
3312 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW){do { } while(0); };
3313#endif
3314 return ReturnBW;
3315}
3316
3317// Function: get_return_bw_mbps_vm_only
3318// Megabyte per second
3319double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3320 const int VoltageLevel,
3321 const double DCFCLK,
3322 const double FabricClock,
3323 const double DRAMSpeed)
3324{
3325 double VMDataOnlyReturnBW = dml_min3(
3326 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3327 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3328 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3329 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3330 * (VoltageLevel < 2 ?
3331 soc->pct_ideal_dram_bw_after_urgent_strobe :
3332 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3333#ifdef __DML_VBA_DEBUG__
3334 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel){do { } while(0); };
3335 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK){do { } while(0); };
3336 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock){do { } while(0); };
3337 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed){do { } while(0); };
3338 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW){do { } while(0); };
3339#endif
3340 return VMDataOnlyReturnBW;
3341}
3342
3343double dml32_CalculateExtraLatency(
3344 unsigned int RoundTripPingLatencyCycles,
3345 unsigned int ReorderingBytes,
3346 double DCFCLK,
3347 unsigned int TotalNumberOfActiveDPP,
3348 unsigned int PixelChunkSizeInKByte,
3349 unsigned int TotalNumberOfDCCActiveDPP,
3350 unsigned int MetaChunkSize,
3351 double ReturnBW,
3352 bool_Bool GPUVMEnable,
3353 bool_Bool HostVMEnable,
3354 unsigned int NumberOfActiveSurfaces,
3355 unsigned int NumberOfDPP[],
3356 unsigned int dpte_group_bytes[],
3357 double HostVMInefficiencyFactor,
3358 double HostVMMinPageSize,
3359 unsigned int HostVMMaxNonCachedPageTableLevels)
3360{
3361 double ExtraLatencyBytes;
3362 double ExtraLatency;
3363
3364 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3365 ReorderingBytes,
3366 TotalNumberOfActiveDPP,
3367 PixelChunkSizeInKByte,
3368 TotalNumberOfDCCActiveDPP,
3369 MetaChunkSize,
3370 GPUVMEnable,
3371 HostVMEnable,
3372 NumberOfActiveSurfaces,
3373 NumberOfDPP,
3374 dpte_group_bytes,
3375 HostVMInefficiencyFactor,
3376 HostVMMinPageSize,
3377 HostVMMaxNonCachedPageTableLevels);
3378
3379 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__7 + 95) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3380
3381#ifdef __DML_VBA_DEBUG__
3382 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles){do { } while(0); };
3383 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK){do { } while(0); };
3384 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes){do { } while(0); };
3385 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW){do { } while(0); };
3386 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency){do { } while(0); };
3387#endif
3388
3389 return ExtraLatency;
3390} // CalculateExtraLatency
3391
3392bool_Bool dml32_CalculatePrefetchSchedule(
3393 struct vba_vars_st *v,
3394 unsigned int k,
3395 double HostVMInefficiencyFactor,
3396 DmlPipe *myPipe,
3397 unsigned int DSCDelay,
3398 unsigned int DPP_RECOUT_WIDTH,
3399 unsigned int VStartup,
3400 unsigned int MaxVStartup,
3401 double UrgentLatency,
3402 double UrgentExtraLatency,
3403 double TCalc,
3404 unsigned int PDEAndMetaPTEBytesFrame,
3405 unsigned int MetaRowByte,
3406 unsigned int PixelPTEBytesPerRow,
3407 double PrefetchSourceLinesY,
3408 unsigned int SwathWidthY,
3409 unsigned int VInitPreFillY,
3410 unsigned int MaxNumSwathY,
3411 double PrefetchSourceLinesC,
3412 unsigned int SwathWidthC,
3413 unsigned int VInitPreFillC,
3414 unsigned int MaxNumSwathC,
3415 unsigned int swath_width_luma_ub,
3416 unsigned int swath_width_chroma_ub,
3417 unsigned int SwathHeightY,
3418 unsigned int SwathHeightC,
3419 double TWait,
3420 double TPreReq,
3421 /* Output */
3422 double *DSTXAfterScaler,
3423 double *DSTYAfterScaler,
3424 double *DestinationLinesForPrefetch,
3425 double *PrefetchBandwidth,
3426 double *DestinationLinesToRequestVMInVBlank,
3427 double *DestinationLinesToRequestRowInVBlank,
3428 double *VRatioPrefetchY,
3429 double *VRatioPrefetchC,
3430 double *RequiredPrefetchPixDataBWLuma,
3431 double *RequiredPrefetchPixDataBWChroma,
3432 bool_Bool *NotEnoughTimeForDynamicMetadata,
3433 double *Tno_bw,
3434 double *prefetch_vmrow_bw,
3435 double *Tdmdl_vm,
3436 double *Tdmdl,
3437 double *TSetup,
3438 unsigned int *VUpdateOffsetPix,
3439 double *VUpdateWidthPix,
3440 double *VReadyOffsetPix)
3441{
3442 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3443 bool_Bool MyError = false0;
3444 unsigned int DPPCycles, DISPCLKCycles;
3445 double DSTTotalPixelsAfterScaler;
3446 double LineTime;
3447 double dst_y_prefetch_equ;
3448 double prefetch_bw_oto;
3449 double Tvm_oto;
3450 double Tr0_oto;
3451 double Tvm_oto_lines;
3452 double Tr0_oto_lines;
3453 double dst_y_prefetch_oto;
3454 double TimeForFetchingMetaPTE = 0;
3455 double TimeForFetchingRowInVBlank = 0;
3456 double LinesToRequestPrefetchPixelData = 0;
3457 double LinesForPrefetchBandwidth = 0;
3458 unsigned int HostVMDynamicLevelsTrips;
3459 double trip_to_mem;
3460 double Tvm_trips;
3461 double Tr0_trips;
3462 double Tvm_trips_rounded;
3463 double Tr0_trips_rounded;
3464 double Lsw_oto;
3465 double Tpre_rounded;
3466 double prefetch_bw_equ;
3467 double Tvm_equ;
3468 double Tr0_equ;
3469 double Tdmbf;
3470 double Tdmec;
3471 double Tdmsks;
3472 double prefetch_sw_bytes;
3473 double bytes_pp;
3474 double dep_bytes;
3475 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__4.0;
3476 double min_Lsw;
3477 double Tsw_est1 = 0;
3478 double Tsw_est3 = 0;
3479
3480 if (v->GPUVMEnable == true1 && v->HostVMEnable == true1)
3481 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3482 else
3483 HostVMDynamicLevelsTrips = 0;
3484#ifdef __DML_VBA_DEBUG__
3485 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable){do { } while(0); };
3486 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels){do { } while(0); };
3487 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable){do { } while(0); };
3488 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",{do { } while(0); }
3489 __func__, v->HostVMEnable, HostVMInefficiencyFactor){do { } while(0); };
3490#endif
3491 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3492 v->MaxInterDCNTileRepeaters,
3493 myPipe->Dppclk,
3494 myPipe->Dispclk,
3495 myPipe->DCFClkDeepSleep,
3496 myPipe->PixelClock,
3497 myPipe->HTotal,
3498 myPipe->VBlank,
3499 v->DynamicMetadataTransmittedBytes[k],
3500 v->DynamicMetadataLinesBeforeActiveRequired[k],
3501 myPipe->InterlaceEnable,
3502 myPipe->ProgressiveToInterlaceUnitInOPP,
3503 TSetup,
3504
3505 /* output */
3506 &Tdmbf,
3507 &Tdmec,
3508 &Tdmsks,
3509 VUpdateOffsetPix,
3510 VUpdateWidthPix,
3511 VReadyOffsetPix);
3512
3513 LineTime = myPipe->HTotal / myPipe->PixelClock;
3514 trip_to_mem = UrgentLatency;
3515 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3516
3517 if (v->DynamicMetadataVMEnabled == true1)
3518 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3519 else
3520 *Tdmdl = TWait + UrgentExtraLatency;
3521
3522#ifdef __DML_VBA_ALLOW_DELTA__
3523 if (v->DynamicMetadataEnable[k] == false0)
3524 *Tdmdl = 0.0;
3525#endif
3526
3527 if (v->DynamicMetadataEnable[k] == true1) {
3528 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3529 *NotEnoughTimeForDynamicMetadata = true1;
3530#ifdef __DML_VBA_DEBUG__
3531 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__){do { } while(0); };
3532 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",{do { } while(0); }
3533 __func__, Tdmbf){do { } while(0); };
3534 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec){do { } while(0); };
3535 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",{do { } while(0); }
3536 __func__, Tdmsks){do { } while(0); };
3537 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",{do { } while(0); }
3538 __func__, *Tdmdl){do { } while(0); };
3539#endif
3540 } else {
3541 *NotEnoughTimeForDynamicMetadata = false0;
3542 }
3543 } else {
3544 *NotEnoughTimeForDynamicMetadata = false0;
3545 }
3546
3547 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true1 && v->DynamicMetadataVMEnabled == true1 &&
3548 v->GPUVMEnable == true1 ? TWait + Tvm_trips : 0);
3549
3550 if (myPipe->ScalerEnabled)
3551 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3552 else
3553 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3554
3555 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3556
3557 DISPCLKCycles = v->DISPCLKDelaySubtotal;
3558
3559 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3560 return true1;
3561
3562 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3563 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3564
3565 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3566 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3567 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3568 myPipe->HActive / 2 : 0)
3569 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3570
3571#ifdef __DML_VBA_DEBUG__
3572 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles){do { } while(0); };
3573 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock){do { } while(0); };
3574 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk){do { } while(0); };
3575 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles){do { } while(0); };
3576 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk){do { } while(0); };
3577 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay){do { } while(0); };
3578 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode){do { } while(0); };
3579 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH){do { } while(0); };
3580 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler){do { } while(0); };
3581#endif
3582
3583 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3584 *DSTYAfterScaler = 1;
3585 else
3586 *DSTYAfterScaler = 0;
3587
3588 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3589 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3590 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3591#ifdef __DML_VBA_DEBUG__
3592 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler){do { } while(0); };
3593 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler){do { } while(0); };
3594#endif
3595
3596 MyError = false0;
3597
3598 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3599
3600 if (v->GPUVMEnable == true1) {
3601 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3602 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3603 if (v->GPUVMMaxPageTableLevels >= 3) {
3604 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3605 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3606 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true1) {
3607 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3608 4.0 * LineTime; // VBA_ERROR
3609 *Tno_bw = UrgentExtraLatency;
3610 } else {
3611 *Tno_bw = 0;
3612 }
3613 } else if (myPipe->DCCEnable == true1) {
3614 Tvm_trips_rounded = LineTime / 4.0;
3615 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3616 *Tno_bw = 0;
3617 } else {
3618 Tvm_trips_rounded = LineTime / 4.0;
3619 Tr0_trips_rounded = LineTime / 2.0;
3620 *Tno_bw = 0;
3621 }
3622 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3623 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3624
3625 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3626 || myPipe->SourcePixelFormat == dm_420_12) {
3627 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3628 } else {
3629 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3630 }
3631
3632 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3633 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3634 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3635 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3636
3637 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3638 min_Lsw = dml_max(min_Lsw, 1.0);
3639 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3640
3641 if (v->GPUVMEnable == true1) {
3642 Tvm_oto = dml_max3(
3643 Tvm_trips,
3644 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3645 LineTime / 4.0);
3646 } else
3647 Tvm_oto = LineTime / 4.0;
3648
3649 if ((v->GPUVMEnable == true1 || myPipe->DCCEnable == true1)) {
3650 Tr0_oto = dml_max4(
3651 Tr0_trips,
3652 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3653 (LineTime - Tvm_oto)/2.0,
3654 LineTime / 4.0);
3655#ifdef __DML_VBA_DEBUG__
3656 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,{do { } while(0); }
3657 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto){do { } while(0); };
3658 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips){do { } while(0); };
3659 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto){do { } while(0); };
3660 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4){do { } while(0); };
3661#endif
3662 } else
3663 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3664
3665 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3666 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3667 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3668
3669 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3670 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3671
3672 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__63.75);
3673#ifdef __DML_VBA_DEBUG__
3674 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal){do { } while(0); };
3675 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw){do { } while(0); };
3676 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw){do { } while(0); };
3677 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency){do { } while(0); };
3678 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem){do { } while(0); };
3679 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY){do { } while(0); };
3680 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY){do { } while(0); };
3681 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub){do { } while(0); };
3682 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC){do { } while(0); };
3683 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC){do { } while(0); };
3684 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub){do { } while(0); };
3685 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes){do { } while(0); };
3686 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp){do { } while(0); };
3687 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); };
3688 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte){do { } while(0); };
3689 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow){do { } while(0); };
3690 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor){do { } while(0); };
3691 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips){do { } while(0); };
3692 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips){do { } while(0); };
3693 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto){do { } while(0); };
3694 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto){do { } while(0); };
3695 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto){do { } while(0); };
3696 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines){do { } while(0); };
3697 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines){do { } while(0); };
3698 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto){do { } while(0); };
3699 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto){do { } while(0); };
3700 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ){do { } while(0); };
3701#endif
3702
3703 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3704 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3705#ifdef __DML_VBA_DEBUG__
3706 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ){do { } while(0); };
3707 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime){do { } while(0); };
3708 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup){do { } while(0); };
3709 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",{do { } while(0); }
3710 __func__, VStartup * LineTime){do { } while(0); };
3711 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup){do { } while(0); };
3712 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc){do { } while(0); };
3713 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf){do { } while(0); };
3714 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec){do { } while(0); };
3715 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm){do { } while(0); };
3716 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl){do { } while(0); };
3717 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",{do { } while(0); }
3718 __func__, *DSTYAfterScaler){do { } while(0); };
3719#endif
3720 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3721 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3722
3723 if (prefetch_sw_bytes < dep_bytes)
3724 prefetch_sw_bytes = 2 * dep_bytes;
3725
3726 *PrefetchBandwidth = 0;
3727 *DestinationLinesToRequestVMInVBlank = 0;
3728 *DestinationLinesToRequestRowInVBlank = 0;
3729 *VRatioPrefetchY = 0;
3730 *VRatioPrefetchC = 0;
3731 *RequiredPrefetchPixDataBWLuma = 0;
3732 if (dst_y_prefetch_equ > 1 &&
3733 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__63.75)) {
3734 double PrefetchBandwidth1;
3735 double PrefetchBandwidth2;
3736 double PrefetchBandwidth3;
3737 double PrefetchBandwidth4;
3738
3739 if (Tpre_rounded - *Tno_bw > 0) {
3740 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3741 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3742 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3743 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3744 } else
3745 PrefetchBandwidth1 = 0;
3746
3747 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3748 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3749 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3750 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3751 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3752 }
3753
3754 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3755 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3756 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3757 else
3758 PrefetchBandwidth2 = 0;
3759
3760 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3761 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3762 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3763 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3764 } else
3765 PrefetchBandwidth3 = 0;
3766
3767
3768 if (VStartup == MaxVStartup &&
3769 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3770 LineTime - Tvm_trips_rounded > 0) {
3771 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3772 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3773 }
3774
3775 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3776 PrefetchBandwidth4 = prefetch_sw_bytes /
3777 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3778 } else {
3779 PrefetchBandwidth4 = 0;
3780 }
3781
3782#ifdef __DML_VBA_DEBUG__
3783 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded){do { } while(0); };
3784 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw){do { } while(0); };
3785 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded){do { } while(0); };
3786 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1){do { } while(0); };
3787 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3){do { } while(0); };
3788 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1){do { } while(0); };
3789 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2){do { } while(0); };
3790 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3){do { } while(0); };
3791 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4){do { } while(0); };
3792#endif
3793 {
3794 bool_Bool Case1OK;
3795 bool_Bool Case2OK;
3796 bool_Bool Case3OK;
3797
3798 if (PrefetchBandwidth1 > 0) {
3799 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3800 >= Tvm_trips_rounded
3801 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3802 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3803 Case1OK = true1;
3804 } else {
3805 Case1OK = false0;
3806 }
3807 } else {
3808 Case1OK = false0;
3809 }
3810
3811 if (PrefetchBandwidth2 > 0) {
3812 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3813 >= Tvm_trips_rounded
3814 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3815 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3816 Case2OK = true1;
3817 } else {
3818 Case2OK = false0;
3819 }
3820 } else {
3821 Case2OK = false0;
3822 }
3823
3824 if (PrefetchBandwidth3 > 0) {
3825 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3826 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3827 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3828 Tr0_trips_rounded) {
3829 Case3OK = true1;
3830 } else {
3831 Case3OK = false0;
3832 }
3833 } else {
3834 Case3OK = false0;
3835 }
3836
3837 if (Case1OK)
3838 prefetch_bw_equ = PrefetchBandwidth1;
3839 else if (Case2OK)
3840 prefetch_bw_equ = PrefetchBandwidth2;
3841 else if (Case3OK)
3842 prefetch_bw_equ = PrefetchBandwidth3;
3843 else
3844 prefetch_bw_equ = PrefetchBandwidth4;
3845
3846#ifdef __DML_VBA_DEBUG__
3847 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK){do { } while(0); };
3848 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK){do { } while(0); };
3849 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK){do { } while(0); };
3850 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ){do { } while(0); };
3851#endif
3852
3853 if (prefetch_bw_equ > 0) {
3854 if (v->GPUVMEnable == true1) {
3855 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3856 HostVMInefficiencyFactor / prefetch_bw_equ,
3857 Tvm_trips, LineTime / 4);
3858 } else {
3859 Tvm_equ = LineTime / 4;
3860 }
3861
3862 if ((v->GPUVMEnable == true1 || myPipe->DCCEnable == true1)) {
3863 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3864 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3865 (LineTime - Tvm_equ) / 2, LineTime / 4);
3866 } else {
3867 Tr0_equ = (LineTime - Tvm_equ) / 2;
3868 }
3869 } else {
3870 Tvm_equ = 0;
3871 Tr0_equ = 0;
3872#ifdef __DML_VBA_DEBUG__
3873 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__){do { } while(0); };
3874#endif
3875 }
3876 }
3877
3878 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3879 if (dst_y_prefetch_oto * LineTime < TPreReq) {
3880 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3881 } else {
3882 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3883 }
3884 TimeForFetchingMetaPTE = Tvm_oto;
3885 TimeForFetchingRowInVBlank = Tr0_oto;
3886 *PrefetchBandwidth = prefetch_bw_oto;
3887 /* Clamp to oto for bandwidth calculation */
3888 LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3889 } else {
3890 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3891 TimeForFetchingMetaPTE = Tvm_equ;
3892 TimeForFetchingRowInVBlank = Tr0_equ;
3893 *PrefetchBandwidth = prefetch_bw_equ;
3894 /* Clamp to equ for bandwidth calculation */
3895 LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3896 }
3897
3898 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3899
3900 *DestinationLinesToRequestRowInVBlank =
3901 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3902
3903 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3904 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3905
3906#ifdef __DML_VBA_DEBUG__
3907 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch){do { } while(0); };
3908 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",{do { } while(0); }
3909 __func__, *DestinationLinesToRequestVMInVBlank){do { } while(0); };
3910 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank){do { } while(0); };
3911 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime){do { } while(0); };
3912 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",{do { } while(0); }
3913 __func__, *DestinationLinesToRequestRowInVBlank){do { } while(0); };
3914 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY){do { } while(0); };
3915 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData){do { } while(0); };
3916#endif
3917
3918 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3919 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3920 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3921#ifdef __DML_VBA_DEBUG__
3922 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY){do { } while(0); };
3923 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY){do { } while(0); };
3924 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY){do { } while(0); };
3925#endif
3926 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3927 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3928 *VRatioPrefetchY =
3929 dml_max((double) PrefetchSourceLinesY /
3930 LinesToRequestPrefetchPixelData,
3931 (double) MaxNumSwathY * SwathHeightY /
3932 (LinesToRequestPrefetchPixelData -
3933 (VInitPreFillY - 3.0) / 2.0));
3934 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3935 } else {
3936 MyError = true1;
3937 *VRatioPrefetchY = 0;
3938 }
3939#ifdef __DML_VBA_DEBUG__
3940 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY){do { } while(0); };
3941 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY){do { } while(0); };
3942 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY){do { } while(0); };
3943#endif
3944 }
3945
3946 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3947 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3948
3949#ifdef __DML_VBA_DEBUG__
3950 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC){do { } while(0); };
3951 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC){do { } while(0); };
3952 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC){do { } while(0); };
3953#endif
3954 if ((SwathHeightC > 4)) {
3955 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3956 *VRatioPrefetchC =
3957 dml_max(*VRatioPrefetchC,
3958 (double) MaxNumSwathC * SwathHeightC /
3959 (LinesToRequestPrefetchPixelData -
3960 (VInitPreFillC - 3.0) / 2.0));
3961 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3962 } else {
3963 MyError = true1;
3964 *VRatioPrefetchC = 0;
3965 }
3966#ifdef __DML_VBA_DEBUG__
3967 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC){do { } while(0); };
3968 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC){do { } while(0); };
3969 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC){do { } while(0); };
3970#endif
3971 }
3972
3973 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3974 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3975 / LineTime;
3976
3977#ifdef __DML_VBA_DEBUG__
3978 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY){do { } while(0); };
3979 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub){do { } while(0); };
3980 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime){do { } while(0); };
3981 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",{do { } while(0); }
3982 __func__, *RequiredPrefetchPixDataBWLuma){do { } while(0); };
3983#endif
3984 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3985 LinesToRequestPrefetchPixelData
3986 * myPipe->BytePerPixelC
3987 * swath_width_chroma_ub / LineTime;
3988 } else {
3989 MyError = true1;
3990#ifdef __DML_VBA_DEBUG__
3991 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",{do { } while(0); }
3992 __func__, LinesToRequestPrefetchPixelData){do { } while(0); };
3993#endif
3994 *VRatioPrefetchY = 0;
3995 *VRatioPrefetchC = 0;
3996 *RequiredPrefetchPixDataBWLuma = 0;
3997 *RequiredPrefetchPixDataBWChroma = 0;
3998 }
3999#ifdef __DML_VBA_DEBUG__
4000 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",{do { } while(0); }
4001 (double)LinesToRequestPrefetchPixelData * LineTime +{do { } while(0); }
4002 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE){do { } while(0); };
4003 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE){do { } while(0); };
4004 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",{do { } while(0); }
4005 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime){do { } while(0); };
4006 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"){do { } while(0); };
4007 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -{do { } while(0); }
4008 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +{do { } while(0); }
4009 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup){do { } while(0); };
4010 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",{do { } while(0); }
4011 PixelPTEBytesPerRow){do { } while(0); };
4012#endif
4013 } else {
4014 MyError = true1;
4015#ifdef __DML_VBA_DEBUG__
4016 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",{do { } while(0); }
4017 __func__, dst_y_prefetch_equ){do { } while(0); };
4018#endif
4019 }
4020
4021 {
4022 double prefetch_vm_bw;
4023 double prefetch_row_bw;
4024
4025 if (PDEAndMetaPTEBytesFrame == 0) {
4026 prefetch_vm_bw = 0;
4027 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4028#ifdef __DML_VBA_DEBUG__
4029 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); };
4030 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor){do { } while(0); };
4031 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",{do { } while(0); }
4032 __func__, *DestinationLinesToRequestVMInVBlank){do { } while(0); };
4033 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime){do { } while(0); };
4034#endif
4035 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4036 (*DestinationLinesToRequestVMInVBlank * LineTime);
4037#ifdef __DML_VBA_DEBUG__
4038 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw){do { } while(0); };
4039#endif
4040 } else {
4041 prefetch_vm_bw = 0;
4042 MyError = true1;
4043#ifdef __DML_VBA_DEBUG__
4044 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",{do { } while(0); }
4045 __func__, *DestinationLinesToRequestVMInVBlank){do { } while(0); };
4046#endif
4047 }
4048
4049 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4050 prefetch_row_bw = 0;
4051 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4052 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4053 (*DestinationLinesToRequestRowInVBlank * LineTime);
4054
4055#ifdef __DML_VBA_DEBUG__
4056 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte){do { } while(0); };
4057 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow){do { } while(0); };
4058 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",{do { } while(0); }
4059 __func__, *DestinationLinesToRequestRowInVBlank){do { } while(0); };
4060 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw){do { } while(0); };
4061#endif
4062 } else {
4063 prefetch_row_bw = 0;
4064 MyError = true1;
4065#ifdef __DML_VBA_DEBUG__
4066 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",{do { } while(0); }
4067 __func__, *DestinationLinesToRequestRowInVBlank){do { } while(0); };
4068#endif
4069 }
4070
4071 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4072 }
4073
4074 if (MyError) {
4075 *PrefetchBandwidth = 0;
4076 TimeForFetchingMetaPTE = 0;
Value stored to 'TimeForFetchingMetaPTE' is never read
4077 TimeForFetchingRowInVBlank = 0;
4078 *DestinationLinesToRequestVMInVBlank = 0;
4079 *DestinationLinesToRequestRowInVBlank = 0;
4080 *DestinationLinesForPrefetch = 0;
4081 LinesToRequestPrefetchPixelData = 0;
4082 *VRatioPrefetchY = 0;
4083 *VRatioPrefetchC = 0;
4084 *RequiredPrefetchPixDataBWLuma = 0;
4085 *RequiredPrefetchPixDataBWChroma = 0;
4086 }
4087
4088 return MyError;
4089} // CalculatePrefetchSchedule
4090
4091void dml32_CalculateFlipSchedule(
4092 double HostVMInefficiencyFactor,
4093 double UrgentExtraLatency,
4094 double UrgentLatency,
4095 unsigned int GPUVMMaxPageTableLevels,
4096 bool_Bool HostVMEnable,
4097 unsigned int HostVMMaxNonCachedPageTableLevels,
4098 bool_Bool GPUVMEnable,
4099 double HostVMMinPageSize,
4100 double PDEAndMetaPTEBytesPerFrame,
4101 double MetaRowBytes,
4102 double DPTEBytesPerRow,
4103 double BandwidthAvailableForImmediateFlip,
4104 unsigned int TotImmediateFlipBytes,
4105 enum source_format_class SourcePixelFormat,
4106 double LineTime,
4107 double VRatio,
4108 double VRatioChroma,
4109 double Tno_bw,
4110 bool_Bool DCCEnable,
4111 unsigned int dpte_row_height,
4112 unsigned int meta_row_height,
4113 unsigned int dpte_row_height_chroma,
4114 unsigned int meta_row_height_chroma,
4115 bool_Bool use_one_row_for_frame_flip,
4116
4117 /* Output */
4118 double *DestinationLinesToRequestVMInImmediateFlip,
4119 double *DestinationLinesToRequestRowInImmediateFlip,
4120 double *final_flip_bw,
4121 bool_Bool *ImmediateFlipSupportedForPipe)
4122{
4123 double min_row_time = 0.0;
4124 unsigned int HostVMDynamicLevelsTrips;
4125 double TimeForFetchingMetaPTEImmediateFlip;
4126 double TimeForFetchingRowInVBlankImmediateFlip;
4127 double ImmediateFlipBW;
4128
4129 if (GPUVMEnable == true1 && HostVMEnable == true1)
4130 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4131 else
4132 HostVMDynamicLevelsTrips = 0;
4133
4134#ifdef __DML_VBA_DEBUG__
4135 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes){do { } while(0); };
4136 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip){do { } while(0); };
4137#endif
4138
4139 if (TotImmediateFlipBytes > 0) {
4140 if (use_one_row_for_frame_flip) {
4141 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4142 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4143 } else {
4144 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4145 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4146 }
4147 if (GPUVMEnable == true1) {
4148 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4149 HostVMInefficiencyFactor / ImmediateFlipBW,
4150 UrgentExtraLatency + UrgentLatency *
4151 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4152 LineTime / 4.0);
4153 } else {
4154 TimeForFetchingMetaPTEImmediateFlip = 0;
4155 }
4156 if ((GPUVMEnable == true1 || DCCEnable == true1)) {
4157 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4158 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4159 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4160 } else {
4161 TimeForFetchingRowInVBlankImmediateFlip = 0;
4162 }
4163
4164 *DestinationLinesToRequestVMInImmediateFlip =
4165 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4166 *DestinationLinesToRequestRowInImmediateFlip =
4167 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4168
4169 if (GPUVMEnable == true1) {
4170 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4171 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4172 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4173 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4174 } else if ((GPUVMEnable == true1 || DCCEnable == true1)) {
4175 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4176 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4177 } else {
4178 *final_flip_bw = 0;
4179 }
4180 } else {
4181 TimeForFetchingMetaPTEImmediateFlip = 0;
4182 TimeForFetchingRowInVBlankImmediateFlip = 0;
4183 *DestinationLinesToRequestVMInImmediateFlip = 0;
4184 *DestinationLinesToRequestRowInImmediateFlip = 0;
4185 *final_flip_bw = 0;
4186 }
4187
4188 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4189 if (GPUVMEnable == true1 && DCCEnable != true1) {
4190 min_row_time = dml_min(dpte_row_height *
4191 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4192 } else if (GPUVMEnable != true1 && DCCEnable == true1) {
4193 min_row_time = dml_min(meta_row_height *
4194 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4195 } else {
4196 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4197 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4198 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4199 }
4200 } else {
4201 if (GPUVMEnable == true1 && DCCEnable != true1) {
4202 min_row_time = dpte_row_height * LineTime / VRatio;
4203 } else if (GPUVMEnable != true1 && DCCEnable == true1) {
4204 min_row_time = meta_row_height * LineTime / VRatio;
4205 } else {
4206 min_row_time =
4207 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4208 }
4209 }
4210
4211 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4212 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4213 > min_row_time) {
4214 *ImmediateFlipSupportedForPipe = false0;
4215 } else {
4216 *ImmediateFlipSupportedForPipe = true1;
4217 }
4218
4219#ifdef __DML_VBA_DEBUG__
4220 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable){do { } while(0); };
4221 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable){do { } while(0); };
4222 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",{do { } while(0); }
4223 __func__, *DestinationLinesToRequestVMInImmediateFlip){do { } while(0); };
4224 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",{do { } while(0); }
4225 __func__, *DestinationLinesToRequestRowInImmediateFlip){do { } while(0); };
4226 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip){do { } while(0); };
4227 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",{do { } while(0); }
4228 __func__, TimeForFetchingRowInVBlankImmediateFlip){do { } while(0); };
4229 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time){do { } while(0); };
4230 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe){do { } while(0); };
4231#endif
4232} // CalculateFlipSchedule
4233
4234void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4235 struct vba_vars_st *v,
4236 unsigned int PrefetchMode,
4237 double DCFCLK,
4238 double ReturnBW,
4239 SOCParametersList mmSOCParameters,
4240 double SOCCLK,
4241 double DCFClkDeepSleep,
4242 unsigned int DETBufferSizeY[],
4243 unsigned int DETBufferSizeC[],
4244 unsigned int SwathHeightY[],
4245 unsigned int SwathHeightC[],
4246 double SwathWidthY[],
4247 double SwathWidthC[],
4248 unsigned int DPPPerSurface[],
4249 double BytePerPixelDETY[],
4250 double BytePerPixelDETC[],
4251 double DSTXAfterScaler[],
4252 double DSTYAfterScaler[],
4253 bool_Bool UnboundedRequestEnabled,
4254 unsigned int CompressedBufferSizeInkByte,
4255
4256 /* Output */
4257 enum clock_change_support *DRAMClockChangeSupport,
4258 double MaxActiveDRAMClockChangeLatencySupported[],
4259 unsigned int SubViewportLinesNeededInMALL[],
4260 enum dm_fclock_change_support *FCLKChangeSupport,
4261 double *MinActiveFCLKChangeLatencySupported,
4262 bool_Bool *USRRetrainingSupport,
4263 double ActiveDRAMClockChangeLatencyMargin[])
4264{
4265 unsigned int i, j, k;
4266 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4267 unsigned int DRAMClockChangeSupportNumber = 0;
4268 unsigned int LastSurfaceWithoutMargin;
4269 unsigned int DRAMClockChangeMethod = 0;
4270 bool_Bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false0;
4271 double MinActiveFCLKChangeMargin = 0.;
4272 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4273 double ActiveClockChangeLatencyHidingY;
4274 double ActiveClockChangeLatencyHidingC;
4275 double ActiveClockChangeLatencyHiding;
4276 double EffectiveDETBufferSizeY;
4277 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX8];
4278 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX8];
4279 double TotalPixelBW = 0.0;
4280 bool_Bool SynchronizedSurfaces[DC__NUM_DPP__MAX8][DC__NUM_DPP__MAX8];
4281 double EffectiveLBLatencyHidingY;
4282 double EffectiveLBLatencyHidingC;
4283 double LinesInDETY[DC__NUM_DPP__MAX8];
4284 double LinesInDETC[DC__NUM_DPP__MAX8];
4285 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX8];
4286 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX8];
4287 double FullDETBufferingTimeY;
4288 double FullDETBufferingTimeC;
4289 double WritebackDRAMClockChangeLatencyMargin;
4290 double WritebackFCLKChangeLatencyMargin;
4291 double WritebackLatencyHiding;
4292 bool_Bool SameTimingForFCLKChange;
4293
4294 unsigned int TotalActiveWriteback = 0;
4295 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX8];
4296 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX8];
4297
4298 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4299 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4300 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4301 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4302 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4303 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4304 + 10 / DCFClkDeepSleep;
4305 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4306 + 10 / DCFClkDeepSleep;
4307 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4308 + 10 / DCFClkDeepSleep;
4309 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4310 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4311
4312#ifdef __DML_VBA_DEBUG__
4313 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency){do { } while(0); };
4314 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency){do { } while(0); };
4315 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency){do { } while(0); };
4316 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark){do { } while(0); };
4317 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark){do { } while(0); };
4318 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark){do { } while(0); };
4319 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark){do { } while(0); };
4320 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark){do { } while(0); };
4321 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark){do { } while(0); };
4322 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark){do { } while(0); };
4323 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",{do { } while(0); }
4324 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark){do { } while(0); };
4325#endif
4326
4327
4328 TotalActiveWriteback = 0;
4329 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4330 if (v->WritebackEnable[k] == true1)
4331 TotalActiveWriteback = TotalActiveWriteback + 1;
4332 }
4333
4334 if (TotalActiveWriteback <= 1) {
4335 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4336 } else {
4337 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4338 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4339 }
4340 if (v->USRRetrainingRequiredFinal)
4341 v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
4342 + mmSOCParameters.USRRetrainingLatency;
4343
4344 if (TotalActiveWriteback <= 1) {
4345 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4346 + mmSOCParameters.WritebackLatency;
4347 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4348 + mmSOCParameters.WritebackLatency;
4349 } else {
4350 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4351 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4352 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4353 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4354 }
4355
4356 if (v->USRRetrainingRequiredFinal)
4357 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4358 + mmSOCParameters.USRRetrainingLatency;
4359
4360 if (v->USRRetrainingRequiredFinal)
4361 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4362 + mmSOCParameters.USRRetrainingLatency;
4363
4364#ifdef __DML_VBA_DEBUG__
4365 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",{do { } while(0); }
4366 __func__, v->Watermark.WritebackDRAMClockChangeWatermark){do { } while(0); };
4367 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark){do { } while(0); };
4368 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark){do { } while(0); };
4369 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal){do { } while(0); };
4370 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency){do { } while(0); };
4371#endif
4372
4373 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4374 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4375 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4376 }
4377
4378 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4379
4380 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4381 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4382
4383
4384#ifdef __DML_VBA_DEBUG__
4385 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines){do { } while(0); };
4386 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal){do { } while(0); };
4387 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]){do { } while(0); };
4388 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]){do { } while(0); };
4389 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]){do { } while(0); };
4390#endif
4391
4392 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4393 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4394 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4395
4396 if (UnboundedRequestEnabled) {
4397 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4398 + CompressedBufferSizeInkByte * 1024
4399 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4400 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4401 }
4402
4403 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4404 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4405 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4406
4407 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4408 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4409
4410 if (v->NumberOfActiveSurfaces > 1) {
4411 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4412 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4413 / v->PixelClock[k] / v->VRatio[k];
4414 }
4415
4416 if (BytePerPixelDETC[k] > 0) {
4417 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4418 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4419 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4420 / v->VRatioChroma[k];
4421 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4422 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4423 / v->PixelClock[k];
4424 if (v->NumberOfActiveSurfaces > 1) {
4425 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4426 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4427 / v->PixelClock[k] / v->VRatioChroma[k];
4428 }
4429 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4430 ActiveClockChangeLatencyHidingC);
4431 } else {
4432 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4433 }
4434
4435 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4436 - v->Watermark.DRAMClockChangeWatermark;
4437 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4438 - v->Watermark.FCLKChangeWatermark;
4439 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4440
4441 if (v->WritebackEnable[k]) {
4442 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4443 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4444 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4445 if (v->WritebackPixelFormat[k] == dm_444_64)
4446 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4447
4448 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4449 - v->Watermark.WritebackDRAMClockChangeWatermark;
4450
4451 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4452 - v->Watermark.WritebackFCLKChangeWatermark;
4453
4454 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4455 WritebackFCLKChangeLatencyMargin);
4456 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4457 WritebackDRAMClockChangeLatencyMargin);
4458 }
4459 MaxActiveDRAMClockChangeLatencySupported[k] =
4460 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4461 0 :
4462 (ActiveDRAMClockChangeLatencyMargin[k]
4463 + mmSOCParameters.DRAMClockChangeLatency);
4464 }
4465
4466 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4467 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4468 if (i == j ||
4469 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4470 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4471 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4472 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4473 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4474 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4475 (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4476 SynchronizedSurfaces[i][j] = true1;
4477 } else {
4478 SynchronizedSurfaces[i][j] = false0;
4479 }
4480 }
4481 }
4482
4483 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4484 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4485 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4486 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4487 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true1;
4488 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4489 SurfaceWithMinActiveFCLKChangeMargin = k;
4490 }
4491 }
4492
4493 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4494
4495 SameTimingForFCLKChange = true1;
4496 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4497 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4498 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4499 (SameTimingForFCLKChange ||
4500 ActiveFCLKChangeLatencyMargin[k] <
4501 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4502 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4503 }
4504 SameTimingForFCLKChange = false0;
4505 }
4506 }
4507
4508 if (MinActiveFCLKChangeMargin > 0) {
4509 *FCLKChangeSupport = dm_fclock_change_vactive;
4510 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4511 (PrefetchMode <= 1)) {
4512 *FCLKChangeSupport = dm_fclock_change_vblank;
4513 } else {
4514 *FCLKChangeSupport = dm_fclock_change_unsupported;
4515 }
4516
4517 *USRRetrainingSupport = true1;
4518 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4519 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4520 (USRRetrainingLatencyMargin[k] < 0)) {
4521 *USRRetrainingSupport = false0;
4522 }
4523 }
4524
4525 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4526 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4527 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4528 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4529 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4530 if (PrefetchMode > 0) {
4531 DRAMClockChangeSupportNumber = 2;
4532 } else if (DRAMClockChangeSupportNumber == 0) {
4533 DRAMClockChangeSupportNumber = 1;
4534 LastSurfaceWithoutMargin = k;
4535 } else if (DRAMClockChangeSupportNumber == 1 &&
4536 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4537 DRAMClockChangeSupportNumber = 2;
4538 }
4539 }
4540 }
4541
4542 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4543 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4544 DRAMClockChangeMethod = 1;
4545 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4546 DRAMClockChangeMethod = 2;
4547 }
4548
4549 if (DRAMClockChangeMethod == 0) {
4550 if (DRAMClockChangeSupportNumber == 0)
4551 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4552 else if (DRAMClockChangeSupportNumber == 1)
4553 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4554 else
4555 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4556 } else if (DRAMClockChangeMethod == 1) {
4557 if (DRAMClockChangeSupportNumber == 0)
4558 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4559 else if (DRAMClockChangeSupportNumber == 1)
4560 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4561 else
4562 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4563 } else {
4564 if (DRAMClockChangeSupportNumber == 0)
4565 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4566 else if (DRAMClockChangeSupportNumber == 1)
4567 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4568 else
4569 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4570 }
4571
4572 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4573 unsigned int dst_y_pstate;
4574 unsigned int src_y_pstate_l;
4575 unsigned int src_y_pstate_c;
4576 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4577
4578 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4579 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4580 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4581 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4582
4583#ifdef __DML_VBA_DEBUG__
4584dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]){do { } while(0); };
4585dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]){do { } while(0); };
4586dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]){do { } while(0); };
4587dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]){do { } while(0); };
4588dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]){do { } while(0); };
4589dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate){do { } while(0); };
4590dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l){do { } while(0); };
4591dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l){do { } while(0); };
4592dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]){do { } while(0); };
4593dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l){do { } while(0); };
4594#endif
4595 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4596
4597 if (BytePerPixelDETC[k] > 0) {
4598 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4599 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4600 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4601 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4602
4603#ifdef __DML_VBA_DEBUG__
4604dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c){do { } while(0); };
4605dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c){do { } while(0); };
4606dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]){do { } while(0); };
4607dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c){do { } while(0); };
4608#endif
4609 }
4610 }
4611#ifdef __DML_VBA_DEBUG__
4612 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport){do { } while(0); };
4613 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport){do { } while(0); };
4614 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",{do { } while(0); }
4615 __func__, *MinActiveFCLKChangeLatencySupported){do { } while(0); };
4616 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport){do { } while(0); };
4617#endif
4618} // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4619
4620double dml32_CalculateWriteBackDISPCLK(
4621 enum source_format_class WritebackPixelFormat,
4622 double PixelClock,
4623 double WritebackHRatio,
4624 double WritebackVRatio,
4625 unsigned int WritebackHTaps,
4626 unsigned int WritebackVTaps,
4627 unsigned int WritebackSourceWidth,
4628 unsigned int WritebackDestinationWidth,
4629 unsigned int HTotal,
4630 unsigned int WritebackLineBufferSize,
4631 double DISPCLKDPPCLKVCOSpeed)
4632{
4633 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4634
4635 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4636 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4637 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4638 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4639 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4640}
4641
4642void dml32_CalculateMinAndMaxPrefetchMode(
4643 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4644 unsigned int *MinPrefetchMode,
4645 unsigned int *MaxPrefetchMode)
4646{
4647 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4648 *MinPrefetchMode = 3;
4649 *MaxPrefetchMode = 3;
4650 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4651 *MinPrefetchMode = 2;
4652 *MaxPrefetchMode = 2;
4653 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4654 *MinPrefetchMode = 1;
4655 *MaxPrefetchMode = 1;
4656 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4657 *MinPrefetchMode = 0;
4658 *MaxPrefetchMode = 0;
4659 } else {
4660 *MinPrefetchMode = 0;
4661 *MaxPrefetchMode = 3;
4662 }
4663} // CalculateMinAndMaxPrefetchMode
4664
4665void dml32_CalculatePixelDeliveryTimes(
4666 unsigned int NumberOfActiveSurfaces,
4667 double VRatio[],
4668 double VRatioChroma[],
4669 double VRatioPrefetchY[],
4670 double VRatioPrefetchC[],
4671 unsigned int swath_width_luma_ub[],
4672 unsigned int swath_width_chroma_ub[],
4673 unsigned int DPPPerSurface[],
4674 double HRatio[],
4675 double HRatioChroma[],
4676 double PixelClock[],
4677 double PSCL_THROUGHPUT[],
4678 double PSCL_THROUGHPUT_CHROMA[],
4679 double Dppclk[],
4680 unsigned int BytePerPixelC[],
4681 enum dm_rotation_angle SourceRotation[],
4682 unsigned int NumberOfCursors[],
4683 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX2],
4684 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX2],
4685 unsigned int BlockWidth256BytesY[],
4686 unsigned int BlockHeight256BytesY[],
4687 unsigned int BlockWidth256BytesC[],
4688 unsigned int BlockHeight256BytesC[],
4689
4690 /* Output */
4691 double DisplayPipeLineDeliveryTimeLuma[],
4692 double DisplayPipeLineDeliveryTimeChroma[],
4693 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4694 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4695 double DisplayPipeRequestDeliveryTimeLuma[],
4696 double DisplayPipeRequestDeliveryTimeChroma[],
4697 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4698 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4699 double CursorRequestDeliveryTime[],
4700 double CursorRequestDeliveryTimePrefetch[])
4701{
4702 double req_per_swath_ub;
4703 unsigned int k;
4704
4705 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4706
4707#ifdef __DML_VBA_DEBUG__
4708 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]){do { } while(0); };
4709 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]){do { } while(0); };
4710 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]){do { } while(0); };
4711 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]){do { } while(0); };
4712 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]){do { } while(0); };
4713 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]){do { } while(0); };
4714 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]){do { } while(0); };
4715 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]){do { } while(0); };
4716 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]){do { } while(0); };
4717 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]){do { } while(0); };
4718 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]){do { } while(0); };
4719#endif
4720
4721 if (VRatio[k] <= 1) {
4722 DisplayPipeLineDeliveryTimeLuma[k] =
4723 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4724 } else {
4725 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4726 }
4727
4728 if (BytePerPixelC[k] == 0) {
4729 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4730 } else {
4731 if (VRatioChroma[k] <= 1) {
4732 DisplayPipeLineDeliveryTimeChroma[k] =
4733 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4734 } else {
4735 DisplayPipeLineDeliveryTimeChroma[k] =
4736 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4737 }
4738 }
4739
4740 if (VRatioPrefetchY[k] <= 1) {
4741 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4742 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4743 } else {
4744 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4745 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4746 }
4747
4748 if (BytePerPixelC[k] == 0) {
4749 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4750 } else {
4751 if (VRatioPrefetchC[k] <= 1) {
4752 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4753 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4754 } else {
4755 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4756 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4757 }
4758 }
4759#ifdef __DML_VBA_DEBUG__
4760 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",{do { } while(0); }
4761 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]){do { } while(0); };
4762 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",{do { } while(0); }
4763 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]){do { } while(0); };
4764 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",{do { } while(0); }
4765 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]){do { } while(0); };
4766 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",{do { } while(0); }
4767 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]){do { } while(0); };
4768#endif
4769 }
4770
4771 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4772 if (!IsVertical(SourceRotation[k]))
4773 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4774 else
4775 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4776#ifdef __DML_VBA_DEBUG__
4777 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub){do { } while(0); };
4778#endif
4779
4780 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4781 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4782 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4783 if (BytePerPixelC[k] == 0) {
4784 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4785 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4786 } else {
4787 if (!IsVertical(SourceRotation[k]))
4788 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4789 else
4790 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4791#ifdef __DML_VBA_DEBUG__
4792 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub){do { } while(0); };
4793#endif
4794 DisplayPipeRequestDeliveryTimeChroma[k] =
4795 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4796 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4797 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4798 }
4799#ifdef __DML_VBA_DEBUG__
4800 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",{do { } while(0); }
4801 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]){do { } while(0); };
4802 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",{do { } while(0); }
4803 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]){do { } while(0); };
4804 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",{do { } while(0); }
4805 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]){do { } while(0); };
4806 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",{do { } while(0); }
4807 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]){do { } while(0); };
4808#endif
4809 }
4810
4811 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4812 unsigned int cursor_req_per_width;
4813
4814 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4815 256.0 / 8.0, 1.0);
4816 if (NumberOfCursors[k] > 0) {
4817 if (VRatio[k] <= 1) {
4818 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4819 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4820 } else {
4821 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4822 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4823 }
4824 if (VRatioPrefetchY[k] <= 1) {
4825 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4826 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4827 } else {
4828 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4829 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4830 }
4831 } else {
4832 CursorRequestDeliveryTime[k] = 0;
4833 CursorRequestDeliveryTimePrefetch[k] = 0;
4834 }
4835#ifdef __DML_VBA_DEBUG__
4836 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",{do { } while(0); }
4837 __func__, k, NumberOfCursors[k]){do { } while(0); };
4838 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",{do { } while(0); }
4839 __func__, k, CursorRequestDeliveryTime[k]){do { } while(0); };
4840 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",{do { } while(0); }
4841 __func__, k, CursorRequestDeliveryTimePrefetch[k]){do { } while(0); };
4842#endif
4843 }
4844} // CalculatePixelDeliveryTimes
4845
4846void dml32_CalculateMetaAndPTETimes(
4847 bool_Bool use_one_row_for_frame[],
4848 unsigned int NumberOfActiveSurfaces,
4849 bool_Bool GPUVMEnable,
4850 unsigned int MetaChunkSize,
4851 unsigned int MinMetaChunkSizeBytes,
4852 unsigned int HTotal[],
4853 double VRatio[],
4854 double VRatioChroma[],
4855 double DestinationLinesToRequestRowInVBlank[],
4856 double DestinationLinesToRequestRowInImmediateFlip[],
4857 bool_Bool DCCEnable[],
4858 double PixelClock[],
4859 unsigned int BytePerPixelY[],
4860 unsigned int BytePerPixelC[],
4861 enum dm_rotation_angle SourceRotation[],
4862 unsigned int dpte_row_height[],
4863 unsigned int dpte_row_height_chroma[],
4864 unsigned int meta_row_width[],
4865 unsigned int meta_row_width_chroma[],
4866 unsigned int meta_row_height[],
4867 unsigned int meta_row_height_chroma[],
4868 unsigned int meta_req_width[],
4869 unsigned int meta_req_width_chroma[],
4870 unsigned int meta_req_height[],
4871 unsigned int meta_req_height_chroma[],
4872 unsigned int dpte_group_bytes[],
4873 unsigned int PTERequestSizeY[],
4874 unsigned int PTERequestSizeC[],
4875 unsigned int PixelPTEReqWidthY[],
4876 unsigned int PixelPTEReqHeightY[],
4877 unsigned int PixelPTEReqWidthC[],
4878 unsigned int PixelPTEReqHeightC[],
4879 unsigned int dpte_row_width_luma_ub[],
4880 unsigned int dpte_row_width_chroma_ub[],
4881
4882 /* Output */
4883 double DST_Y_PER_PTE_ROW_NOM_L[],
4884 double DST_Y_PER_PTE_ROW_NOM_C[],
4885 double DST_Y_PER_META_ROW_NOM_L[],
4886 double DST_Y_PER_META_ROW_NOM_C[],
4887 double TimePerMetaChunkNominal[],
4888 double TimePerChromaMetaChunkNominal[],
4889 double TimePerMetaChunkVBlank[],
4890 double TimePerChromaMetaChunkVBlank[],
4891 double TimePerMetaChunkFlip[],
4892 double TimePerChromaMetaChunkFlip[],
4893 double time_per_pte_group_nom_luma[],
4894 double time_per_pte_group_vblank_luma[],
4895 double time_per_pte_group_flip_luma[],
4896 double time_per_pte_group_nom_chroma[],
4897 double time_per_pte_group_vblank_chroma[],
4898 double time_per_pte_group_flip_chroma[])
4899{
4900 unsigned int meta_chunk_width;
4901 unsigned int min_meta_chunk_width;
4902 unsigned int meta_chunk_per_row_int;
4903 unsigned int meta_row_remainder;
4904 unsigned int meta_chunk_threshold;
4905 unsigned int meta_chunks_per_row_ub;
4906 unsigned int meta_chunk_width_chroma;
4907 unsigned int min_meta_chunk_width_chroma;
4908 unsigned int meta_chunk_per_row_int_chroma;
4909 unsigned int meta_row_remainder_chroma;
4910 unsigned int meta_chunk_threshold_chroma;
4911 unsigned int meta_chunks_per_row_ub_chroma;
4912 unsigned int dpte_group_width_luma;
4913 unsigned int dpte_groups_per_row_luma_ub;
4914 unsigned int dpte_group_width_chroma;
4915 unsigned int dpte_groups_per_row_chroma_ub;
4916 unsigned int k;
4917
4918 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4919 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4920 if (BytePerPixelC[k] == 0)
4921 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4922 else
4923 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4924 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4925 if (BytePerPixelC[k] == 0)
4926 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4927 else
4928 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4929 }
4930
4931 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4932 if (DCCEnable[k] == true1) {
4933 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4934 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4935 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4936 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4937 if (!IsVertical(SourceRotation[k]))
4938 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4939 else
4940 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4941
4942 if (meta_row_remainder <= meta_chunk_threshold)
4943 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4944 else
4945 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4946
4947 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4948 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4949 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4950 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4951 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4952 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4953 if (BytePerPixelC[k] == 0) {
4954 TimePerChromaMetaChunkNominal[k] = 0;
4955 TimePerChromaMetaChunkVBlank[k] = 0;
4956 TimePerChromaMetaChunkFlip[k] = 0;
4957 } else {
4958 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4959 meta_row_height_chroma[k];
4960 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4961 meta_row_height_chroma[k];
4962 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4963 meta_chunk_width_chroma;
4964 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4965 if (!IsVertical(SourceRotation[k])) {
4966 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4967 meta_req_width_chroma[k];
4968 } else {
4969 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4970 meta_req_height_chroma[k];
4971 }
4972 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4973 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4974 else
4975 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4976
4977 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4978 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4979 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4980 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4981 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4982 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4983 }
4984 } else {
4985 TimePerMetaChunkNominal[k] = 0;
4986 TimePerMetaChunkVBlank[k] = 0;
4987 TimePerMetaChunkFlip[k] = 0;
4988 TimePerChromaMetaChunkNominal[k] = 0;
4989 TimePerChromaMetaChunkVBlank[k] = 0;
4990 TimePerChromaMetaChunkFlip[k] = 0;
4991 }
4992 }
4993
4994 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4995 if (GPUVMEnable == true1) {
4996 if (!IsVertical(SourceRotation[k])) {
4997 dpte_group_width_luma = (double) dpte_group_bytes[k] /
4998 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4999 } else {
5000 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5001 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5002 }
5003
5004 if (use_one_row_for_frame[k]) {
5005 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5006 (double) dpte_group_width_luma / 2.0, 1.0);
5007 } else {
5008 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5009 (double) dpte_group_width_luma, 1.0);
5010 }
5011#ifdef __DML_VBA_DEBUG__
5012 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",{do { } while(0); }
5013 __func__, k, use_one_row_for_frame[k]){do { } while(0); };
5014 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",{do { } while(0); }
5015 __func__, k, dpte_group_bytes[k]){do { } while(0); };
5016 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",{do { } while(0); }
5017 __func__, k, PTERequestSizeY[k]){do { } while(0); };
5018 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",{do { } while(0); }
5019 __func__, k, PixelPTEReqWidthY[k]){do { } while(0); };
5020 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",{do { } while(0); }
5021 __func__, k, PixelPTEReqHeightY[k]){do { } while(0); };
5022 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",{do { } while(0); }
5023 __func__, k, dpte_row_width_luma_ub[k]){do { } while(0); };
5024 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",{do { } while(0); }
5025 __func__, k, dpte_group_width_luma){do { } while(0); };
5026 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",{do { } while(0); }
5027 __func__, k, dpte_groups_per_row_luma_ub){do { } while(0); };
5028#endif
5029
5030 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5031 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5032 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5033 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5034 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5035 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5036 if (BytePerPixelC[k] == 0) {
5037 time_per_pte_group_nom_chroma[k] = 0;
5038 time_per_pte_group_vblank_chroma[k] = 0;
5039 time_per_pte_group_flip_chroma[k] = 0;
5040 } else {
5041 if (!IsVertical(SourceRotation[k])) {
5042 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5043 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5044 } else {
5045 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5046 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5047 }
5048
5049 if (use_one_row_for_frame[k]) {
5050 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5051 (double) dpte_group_width_chroma / 2.0, 1.0);
5052 } else {
5053 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5054 (double) dpte_group_width_chroma, 1.0);
5055 }
5056#ifdef __DML_VBA_DEBUG__
5057 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",{do { } while(0); }
5058 __func__, k, dpte_row_width_chroma_ub[k]){do { } while(0); };
5059 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",{do { } while(0); }
5060 __func__, k, dpte_group_width_chroma){do { } while(0); };
5061 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",{do { } while(0); }
5062 __func__, k, dpte_groups_per_row_chroma_ub){do { } while(0); };
5063#endif
5064 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5065 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5066 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5067 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5068 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5069 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5070 }
5071 } else {
5072 time_per_pte_group_nom_luma[k] = 0;
5073 time_per_pte_group_vblank_luma[k] = 0;
5074 time_per_pte_group_flip_luma[k] = 0;
5075 time_per_pte_group_nom_chroma[k] = 0;
5076 time_per_pte_group_vblank_chroma[k] = 0;
5077 time_per_pte_group_flip_chroma[k] = 0;
5078 }
5079#ifdef __DML_VBA_DEBUG__
5080 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",{do { } while(0); }
5081 __func__, k, DestinationLinesToRequestRowInVBlank[k]){do { } while(0); };
5082 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",{do { } while(0); }
5083 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]){do { } while(0); };
5084 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",{do { } while(0); }
5085 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]){do { } while(0); };
5086 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",{do { } while(0); }
5087 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]){do { } while(0); };
5088 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",{do { } while(0); }
5089 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]){do { } while(0); };
5090 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",{do { } while(0); }
5091 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]){do { } while(0); };
5092 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",{do { } while(0); }
5093 __func__, k, TimePerMetaChunkNominal[k]){do { } while(0); };
5094 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",{do { } while(0); }
5095 __func__, k, TimePerMetaChunkVBlank[k]){do { } while(0); };
5096 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",{do { } while(0); }
5097 __func__, k, TimePerMetaChunkFlip[k]){do { } while(0); };
5098 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",{do { } while(0); }
5099 __func__, k, TimePerChromaMetaChunkNominal[k]){do { } while(0); };
5100 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",{do { } while(0); }
5101 __func__, k, TimePerChromaMetaChunkVBlank[k]){do { } while(0); };
5102 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",{do { } while(0); }
5103 __func__, k, TimePerChromaMetaChunkFlip[k]){do { } while(0); };
5104 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",{do { } while(0); }
5105 __func__, k, time_per_pte_group_nom_luma[k]){do { } while(0); };
5106 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",{do { } while(0); }
5107 __func__, k, time_per_pte_group_vblank_luma[k]){do { } while(0); };
5108 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",{do { } while(0); }
5109 __func__, k, time_per_pte_group_flip_luma[k]){do { } while(0); };
5110 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",{do { } while(0); }
5111 __func__, k, time_per_pte_group_nom_chroma[k]){do { } while(0); };
5112 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",{do { } while(0); }
5113 __func__, k, time_per_pte_group_vblank_chroma[k]){do { } while(0); };
5114 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",{do { } while(0); }
5115 __func__, k, time_per_pte_group_flip_chroma[k]){do { } while(0); };
5116#endif
5117 }
5118} // CalculateMetaAndPTETimes
5119
5120void dml32_CalculateVMGroupAndRequestTimes(
5121 unsigned int NumberOfActiveSurfaces,
5122 bool_Bool GPUVMEnable,
5123 unsigned int GPUVMMaxPageTableLevels,
5124 unsigned int HTotal[],
5125 unsigned int BytePerPixelC[],
5126 double DestinationLinesToRequestVMInVBlank[],
5127 double DestinationLinesToRequestVMInImmediateFlip[],
5128 bool_Bool DCCEnable[],
5129 double PixelClock[],
5130 unsigned int dpte_row_width_luma_ub[],
5131 unsigned int dpte_row_width_chroma_ub[],
5132 unsigned int vm_group_bytes[],
5133 unsigned int dpde0_bytes_per_frame_ub_l[],
5134 unsigned int dpde0_bytes_per_frame_ub_c[],
5135 unsigned int meta_pte_bytes_per_frame_ub_l[],
5136 unsigned int meta_pte_bytes_per_frame_ub_c[],
5137
5138 /* Output */
5139 double TimePerVMGroupVBlank[],
5140 double TimePerVMGroupFlip[],
5141 double TimePerVMRequestVBlank[],
5142 double TimePerVMRequestFlip[])
5143{
5144 unsigned int k;
5145 unsigned int num_group_per_lower_vm_stage;
5146 unsigned int num_req_per_lower_vm_stage;
5147
5148#ifdef __DML_VBA_DEBUG__
5149 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces){do { } while(0); };
5150 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable){do { } while(0); };
5151#endif
5152 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5153
5154#ifdef __DML_VBA_DEBUG__
5155 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]){do { } while(0); };
5156 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]){do { } while(0); };
5157 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",{do { } while(0); }
5158 __func__, k, dpde0_bytes_per_frame_ub_l[k]){do { } while(0); };
5159 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",{do { } while(0); }
5160 __func__, k, dpde0_bytes_per_frame_ub_c[k]){do { } while(0); };
5161 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",{do { } while(0); }
5162 __func__, k, meta_pte_bytes_per_frame_ub_l[k]){do { } while(0); };
5163 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",{do { } while(0); }
5164 __func__, k, meta_pte_bytes_per_frame_ub_c[k]){do { } while(0); };
5165#endif
5166
5167 if (GPUVMEnable == true1 && (DCCEnable[k] == true1 || GPUVMMaxPageTableLevels > 1)) {
5168 if (DCCEnable[k] == false0) {
5169 if (BytePerPixelC[k] > 0) {
5170 num_group_per_lower_vm_stage = dml_ceil(
5171 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5172 (double) (vm_group_bytes[k]), 1.0) +
5173 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5174 (double) (vm_group_bytes[k]), 1.0);
5175 } else {
5176 num_group_per_lower_vm_stage = dml_ceil(
5177 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5178 (double) (vm_group_bytes[k]), 1.0);
5179 }
5180 } else {
5181 if (GPUVMMaxPageTableLevels == 1) {
5182 if (BytePerPixelC[k] > 0) {
5183 num_group_per_lower_vm_stage = dml_ceil(
5184 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5185 (double) (vm_group_bytes[k]), 1.0) +
5186 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5187 (double) (vm_group_bytes[k]), 1.0);
5188 } else {
5189 num_group_per_lower_vm_stage = dml_ceil(
5190 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5191 (double) (vm_group_bytes[k]), 1.0);
5192 }
5193 } else {
5194 if (BytePerPixelC[k] > 0) {
5195 num_group_per_lower_vm_stage = 2 + dml_ceil(
5196 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5197 (double) (vm_group_bytes[k]), 1) +
5198 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5199 (double) (vm_group_bytes[k]), 1) +
5200 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5201 (double) (vm_group_bytes[k]), 1) +
5202 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5203 (double) (vm_group_bytes[k]), 1);
5204 } else {
5205 num_group_per_lower_vm_stage = 1 + dml_ceil(
5206 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5207 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5208 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5209 (double) (vm_group_bytes[k]), 1);
5210 }
5211 }
5212 }
5213
5214 if (DCCEnable[k] == false0) {
5215 if (BytePerPixelC[k] > 0) {
5216 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5217 dpde0_bytes_per_frame_ub_c[k] / 64;
5218 } else {
5219 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5220 }
5221 } else {
5222 if (GPUVMMaxPageTableLevels == 1) {
5223 if (BytePerPixelC[k] > 0) {
5224 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5225 meta_pte_bytes_per_frame_ub_c[k] / 64;
5226 } else {
5227 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5228 }
5229 } else {
5230 if (BytePerPixelC[k] > 0) {
5231 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5232 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5233 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5234 meta_pte_bytes_per_frame_ub_c[k] / 64;
5235 } else {
5236 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5237 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5238 }
5239 }
5240 }
5241
5242 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5243 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5244 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5245 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5246 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5247 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5248 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5249 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5250
5251 if (GPUVMMaxPageTableLevels > 2) {
5252 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5253 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5254 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5255 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5256 }
5257
5258 } else {
5259 TimePerVMGroupVBlank[k] = 0;
5260 TimePerVMGroupFlip[k] = 0;
5261 TimePerVMRequestVBlank[k] = 0;
5262 TimePerVMRequestFlip[k] = 0;
5263 }
5264
5265#ifdef __DML_VBA_DEBUG__
5266 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]){do { } while(0); };
5267 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]){do { } while(0); };
5268 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]){do { } while(0); };
5269 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]){do { } while(0); };
5270#endif
5271 }
5272} // CalculateVMGroupAndRequestTimes
5273
5274void dml32_CalculateDCCConfiguration(
5275 bool_Bool DCCEnabled,
5276 bool_Bool DCCProgrammingAssumesScanDirectionUnknown,
5277 enum source_format_class SourcePixelFormat,
5278 unsigned int SurfaceWidthLuma,
5279 unsigned int SurfaceWidthChroma,
5280 unsigned int SurfaceHeightLuma,
5281 unsigned int SurfaceHeightChroma,
5282 unsigned int nomDETInKByte,
5283 unsigned int RequestHeight256ByteLuma,
5284 unsigned int RequestHeight256ByteChroma,
5285 enum dm_swizzle_mode TilingFormat,
5286 unsigned int BytePerPixelY,
5287 unsigned int BytePerPixelC,
5288 double BytePerPixelDETY,
5289 double BytePerPixelDETC,
5290 enum dm_rotation_angle SourceRotation,
5291 /* Output */
5292 unsigned int *MaxUncompressedBlockLuma,
5293 unsigned int *MaxUncompressedBlockChroma,
5294 unsigned int *MaxCompressedBlockLuma,
5295 unsigned int *MaxCompressedBlockChroma,
5296 unsigned int *IndependentBlockLuma,
5297 unsigned int *IndependentBlockChroma)
5298{
5299 typedef enum {
5300 REQ_256Bytes,
5301 REQ_128BytesNonContiguous,
5302 REQ_128BytesContiguous,
5303 REQ_NA
5304 } RequestType;
5305
5306 RequestType RequestLuma;
5307 RequestType RequestChroma;
5308
5309 unsigned int segment_order_horz_contiguous_luma;
5310 unsigned int segment_order_horz_contiguous_chroma;
5311 unsigned int segment_order_vert_contiguous_luma;
5312 unsigned int segment_order_vert_contiguous_chroma;
5313 unsigned int req128_horz_wc_l;
5314 unsigned int req128_horz_wc_c;
5315 unsigned int req128_vert_wc_l;
5316 unsigned int req128_vert_wc_c;
5317 unsigned int MAS_vp_horz_limit;
5318 unsigned int MAS_vp_vert_limit;
5319 unsigned int max_vp_horz_width;
5320 unsigned int max_vp_vert_height;
5321 unsigned int eff_surf_width_l;
5322 unsigned int eff_surf_width_c;
5323 unsigned int eff_surf_height_l;
5324 unsigned int eff_surf_height_c;
5325 unsigned int full_swath_bytes_horz_wc_l;
5326 unsigned int full_swath_bytes_horz_wc_c;
5327 unsigned int full_swath_bytes_vert_wc_l;
5328 unsigned int full_swath_bytes_vert_wc_c;
5329 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5330
5331 unsigned int yuv420;
5332 unsigned int horz_div_l;
5333 unsigned int horz_div_c;
5334 unsigned int vert_div_l;
5335 unsigned int vert_div_c;
5336
5337 unsigned int swath_buf_size;
5338 double detile_buf_vp_horz_limit;
5339 double detile_buf_vp_vert_limit;
5340
5341 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5342 SourcePixelFormat == dm_420_12) ? 1 : 0);
5343 horz_div_l = 1;
5344 horz_div_c = 1;
5345 vert_div_l = 1;
5346 vert_div_c = 1;
5347
5348 if (BytePerPixelY == 1)
5349 vert_div_l = 0;
5350 if (BytePerPixelC == 1)
5351 vert_div_c = 0;
5352
5353 if (BytePerPixelC == 0) {
5354 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5355 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5356 BytePerPixelY / (1 + horz_div_l));
5357 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5358 (1 + vert_div_l));
5359 } else {
5360 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5361 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5362 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5363 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5364 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5365 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5366 (1 + vert_div_c) / (1 + yuv420));
5367 }
5368
5369 if (SourcePixelFormat == dm_420_10) {
5370 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5371 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5372 }
5373
5374 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5375 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5376
5377 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5378 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5379 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5380 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5381 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5382 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5383 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5384 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5385
5386 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5387 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5388 if (BytePerPixelC > 0) {
5389 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5390 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5391 } else {
5392 full_swath_bytes_horz_wc_c = 0;
5393 full_swath_bytes_vert_wc_c = 0;
5394 }
5395
5396 if (SourcePixelFormat == dm_420_10) {
5397 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5398 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5399 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5400 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5401 }
5402
5403 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5404 req128_horz_wc_l = 0;
5405 req128_horz_wc_c = 0;
5406 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5407 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5408 req128_horz_wc_l = 0;
5409 req128_horz_wc_c = 1;
5410 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5411 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5412 req128_horz_wc_l = 1;
5413 req128_horz_wc_c = 0;
5414 } else {
5415 req128_horz_wc_l = 1;
5416 req128_horz_wc_c = 1;
5417 }
5418
5419 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5420 req128_vert_wc_l = 0;
5421 req128_vert_wc_c = 0;
5422 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5423 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5424 req128_vert_wc_l = 0;
5425 req128_vert_wc_c = 1;
5426 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5427 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5428 req128_vert_wc_l = 1;
5429 req128_vert_wc_c = 0;
5430 } else {
5431 req128_vert_wc_l = 1;
5432 req128_vert_wc_c = 1;
5433 }
5434
5435 if (BytePerPixelY == 2) {
5436 segment_order_horz_contiguous_luma = 0;
5437 segment_order_vert_contiguous_luma = 1;
5438 } else {
5439 segment_order_horz_contiguous_luma = 1;
5440 segment_order_vert_contiguous_luma = 0;
5441 }
5442
5443 if (BytePerPixelC == 2) {
5444 segment_order_horz_contiguous_chroma = 0;
5445 segment_order_vert_contiguous_chroma = 1;
5446 } else {
5447 segment_order_horz_contiguous_chroma = 1;
5448 segment_order_vert_contiguous_chroma = 0;
5449 }
5450#ifdef __DML_VBA_DEBUG__
5451 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled){do { } while(0); };
5452 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte){do { } while(0); };
5453 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC){do { } while(0); };
5454 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l){do { } while(0); };
5455 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c){do { } while(0); };
5456 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l){do { } while(0); };
5457 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c){do { } while(0); };
5458 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma){do { } while(0); };
5459 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",{do { } while(0); }
5460 __func__, segment_order_horz_contiguous_chroma){do { } while(0); };
5461#endif
5462
5463 if (DCCProgrammingAssumesScanDirectionUnknown == true1) {
5464 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5465 RequestLuma = REQ_256Bytes;
5466 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5467 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5468 RequestLuma = REQ_128BytesNonContiguous;
5469 else
5470 RequestLuma = REQ_128BytesContiguous;
5471
5472 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5473 RequestChroma = REQ_256Bytes;
5474 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5475 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5476 RequestChroma = REQ_128BytesNonContiguous;
5477 else
5478 RequestChroma = REQ_128BytesContiguous;
5479
5480 } else if (!IsVertical(SourceRotation)) {
5481 if (req128_horz_wc_l == 0)
5482 RequestLuma = REQ_256Bytes;
5483 else if (segment_order_horz_contiguous_luma == 0)
5484 RequestLuma = REQ_128BytesNonContiguous;
5485 else
5486 RequestLuma = REQ_128BytesContiguous;
5487
5488 if (req128_horz_wc_c == 0)
5489 RequestChroma = REQ_256Bytes;
5490 else if (segment_order_horz_contiguous_chroma == 0)
5491 RequestChroma = REQ_128BytesNonContiguous;
5492 else
5493 RequestChroma = REQ_128BytesContiguous;
5494
5495 } else {
5496 if (req128_vert_wc_l == 0)
5497 RequestLuma = REQ_256Bytes;
5498 else if (segment_order_vert_contiguous_luma == 0)
5499 RequestLuma = REQ_128BytesNonContiguous;
5500 else
5501 RequestLuma = REQ_128BytesContiguous;
5502
5503 if (req128_vert_wc_c == 0)
5504 RequestChroma = REQ_256Bytes;
5505 else if (segment_order_vert_contiguous_chroma == 0)
5506 RequestChroma = REQ_128BytesNonContiguous;
5507 else
5508 RequestChroma = REQ_128BytesContiguous;
5509 }
5510
5511 if (RequestLuma == REQ_256Bytes) {
5512 *MaxUncompressedBlockLuma = 256;
5513 *MaxCompressedBlockLuma = 256;
5514 *IndependentBlockLuma = 0;
5515 } else if (RequestLuma == REQ_128BytesContiguous) {
5516 *MaxUncompressedBlockLuma = 256;
5517 *MaxCompressedBlockLuma = 128;
5518 *IndependentBlockLuma = 128;
5519 } else {
5520 *MaxUncompressedBlockLuma = 256;
5521 *MaxCompressedBlockLuma = 64;
5522 *IndependentBlockLuma = 64;
5523 }
5524
5525 if (RequestChroma == REQ_256Bytes) {
5526 *MaxUncompressedBlockChroma = 256;
5527 *MaxCompressedBlockChroma = 256;
5528 *IndependentBlockChroma = 0;
5529 } else if (RequestChroma == REQ_128BytesContiguous) {
5530 *MaxUncompressedBlockChroma = 256;
5531 *MaxCompressedBlockChroma = 128;
5532 *IndependentBlockChroma = 128;
5533 } else {
5534 *MaxUncompressedBlockChroma = 256;
5535 *MaxCompressedBlockChroma = 64;
5536 *IndependentBlockChroma = 64;
5537 }
5538
5539 if (DCCEnabled != true1 || BytePerPixelC == 0) {
5540 *MaxUncompressedBlockChroma = 0;
5541 *MaxCompressedBlockChroma = 0;
5542 *IndependentBlockChroma = 0;
5543 }
5544
5545 if (DCCEnabled != true1) {
5546 *MaxUncompressedBlockLuma = 0;
5547 *MaxCompressedBlockLuma = 0;
5548 *IndependentBlockLuma = 0;
5549 }
5550
5551#ifdef __DML_VBA_DEBUG__
5552 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma){do { } while(0); };
5553 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma){do { } while(0); };
5554 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma){do { } while(0); };
5555 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma){do { } while(0); };
5556 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma){do { } while(0); };
5557 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma){do { } while(0); };
5558#endif
5559
5560} // CalculateDCCConfiguration
5561
5562void dml32_CalculateStutterEfficiency(
5563 unsigned int CompressedBufferSizeInkByte,
5564 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5565 bool_Bool UnboundedRequestEnabled,
5566 unsigned int MetaFIFOSizeInKEntries,
5567 unsigned int ZeroSizeBufferEntries,
5568 unsigned int PixelChunkSizeInKByte,
5569 unsigned int NumberOfActiveSurfaces,
5570 unsigned int ROBBufferSizeInKByte,
5571 double TotalDataReadBandwidth,
5572 double DCFCLK,
5573 double ReturnBW,
5574 unsigned int CompbufReservedSpace64B,
5575 unsigned int CompbufReservedSpaceZs,
5576 double SRExitTime,
5577 double SRExitZ8Time,
5578 bool_Bool SynchronizeTimingsFinal,
5579 unsigned int BlendingAndTiming[],
5580 double StutterEnterPlusExitWatermark,
5581 double Z8StutterEnterPlusExitWatermark,
5582 bool_Bool ProgressiveToInterlaceUnitInOPP,
5583 bool_Bool Interlace[],
5584 double MinTTUVBlank[],
5585 unsigned int DPPPerSurface[],
5586 unsigned int DETBufferSizeY[],
5587 unsigned int BytePerPixelY[],
5588 double BytePerPixelDETY[],
5589 double SwathWidthY[],
5590 unsigned int SwathHeightY[],
5591 unsigned int SwathHeightC[],
5592 double NetDCCRateLuma[],
5593 double NetDCCRateChroma[],
5594 double DCCFractionOfZeroSizeRequestsLuma[],
5595 double DCCFractionOfZeroSizeRequestsChroma[],
5596 unsigned int HTotal[],
5597 unsigned int VTotal[],
5598 double PixelClock[],
5599 double VRatio[],
5600 enum dm_rotation_angle SourceRotation[],
5601 unsigned int BlockHeight256BytesY[],
5602 unsigned int BlockWidth256BytesY[],
5603 unsigned int BlockHeight256BytesC[],
5604 unsigned int BlockWidth256BytesC[],
5605 unsigned int DCCYMaxUncompressedBlock[],
5606 unsigned int DCCCMaxUncompressedBlock[],
5607 unsigned int VActive[],
5608 bool_Bool DCCEnable[],
5609 bool_Bool WritebackEnable[],
5610 double ReadBandwidthSurfaceLuma[],
5611 double ReadBandwidthSurfaceChroma[],
5612 double meta_row_bw[],
5613 double dpte_row_bw[],
5614
5615 /* Output */
5616 double *StutterEfficiencyNotIncludingVBlank,
5617 double *StutterEfficiency,
5618 unsigned int *NumberOfStutterBurstsPerFrame,
5619 double *Z8StutterEfficiencyNotIncludingVBlank,
5620 double *Z8StutterEfficiency,
5621 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5622 double *StutterPeriod,
5623 bool_Bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5624{
5625
5626 bool_Bool FoundCriticalSurface = false0;
5627 unsigned int SwathSizeCriticalSurface = 0;
5628 unsigned int LastChunkOfSwathSize;
5629 unsigned int MissingPartOfLastSwathOfDETSize;
5630 double LastZ8StutterPeriod = 0.0;
5631 double LastStutterPeriod = 0.0;
5632 unsigned int TotalNumberOfActiveOTG = 0;
5633 double doublePixelClock;
5634 unsigned int doubleHTotal;
5635 unsigned int doubleVTotal;
5636 bool_Bool SameTiming = t