File: | dev/pci/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c |
Warning: | line 5613, column 4 Value stored to 'LinesInDETC' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* |
2 | * Copyright 2017 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: AMD |
23 | * |
24 | */ |
25 | |
26 | #include "dc.h" |
27 | #include "dc_link.h" |
28 | #include "../display_mode_lib.h" |
29 | #include "../dcn30/display_mode_vba_30.h" |
30 | #include "display_mode_vba_31.h" |
31 | #include "../dml_inline_defs.h" |
32 | |
33 | /* |
34 | * NOTE: |
35 | * This file is gcc-parsable HW gospel, coming straight from HW engineers. |
36 | * |
37 | * It doesn't adhere to Linux kernel style and sometimes will do things in odd |
38 | * ways. Unless there is something clearly wrong with it the code should |
39 | * remain as-is as it provides us with a guarantee from HW that it is correct. |
40 | */ |
41 | |
42 | #define BPP_INVALID0 0 |
43 | #define BPP_BLENDED_PIPE0xffffffff 0xffffffff |
44 | #define DCN31_MAX_DSC_IMAGE_WIDTH5184 5184 |
45 | #define DCN31_MAX_FMT_420_BUFFER_WIDTH4096 4096 |
46 | #define DCN3_15_MIN_COMPBUF_SIZE_KB128 128 |
47 | #define DCN3_15_MAX_DET_SIZE384 384 |
48 | |
49 | // For DML-C changes that hasn't been propagated to VBA yet |
50 | //#define __DML_VBA_ALLOW_DELTA__ |
51 | |
52 | // Move these to ip paramaters/constant |
53 | |
54 | // At which vstartup the DML start to try if the mode can be supported |
55 | #define __DML_VBA_MIN_VSTARTUP__9 9 |
56 | |
57 | // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) |
58 | #define __DML_ARB_TO_RET_DELAY__(7 + 95) (7 + 95) |
59 | |
60 | // fudge factor for min dcfclk calclation |
61 | #define __DML_MIN_DCFCLK_FACTOR__1.15 1.15 |
62 | |
63 | typedef struct { |
64 | double DPPCLK; |
65 | double DISPCLK; |
66 | double PixelClock; |
67 | double DCFCLKDeepSleep; |
68 | unsigned int DPPPerPlane; |
69 | bool_Bool ScalerEnabled; |
70 | double VRatio; |
71 | double VRatioChroma; |
72 | enum scan_direction_class SourceScan; |
73 | unsigned int BlockWidth256BytesY; |
74 | unsigned int BlockHeight256BytesY; |
75 | unsigned int BlockWidth256BytesC; |
76 | unsigned int BlockHeight256BytesC; |
77 | unsigned int InterlaceEnable; |
78 | unsigned int NumberOfCursors; |
79 | unsigned int VBlank; |
80 | unsigned int HTotal; |
81 | unsigned int DCCEnable; |
82 | bool_Bool ODMCombineIsEnabled; |
83 | enum source_format_class SourcePixelFormat; |
84 | int BytePerPixelY; |
85 | int BytePerPixelC; |
86 | bool_Bool ProgressiveToInterlaceUnitInOPP; |
87 | } Pipe; |
88 | |
89 | #define BPP_INVALID0 0 |
90 | #define BPP_BLENDED_PIPE0xffffffff 0xffffffff |
91 | |
92 | static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); |
93 | static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); |
94 | static unsigned int dscceComputeDelay( |
95 | unsigned int bpc, |
96 | double BPP, |
97 | unsigned int sliceWidth, |
98 | unsigned int numSlices, |
99 | enum output_format_class pixelFormat, |
100 | enum output_encoder_class Output); |
101 | static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); |
102 | static bool_Bool CalculatePrefetchSchedule( |
103 | struct display_mode_lib *mode_lib, |
104 | double HostVMInefficiencyFactor, |
105 | Pipe *myPipe, |
106 | unsigned int DSCDelay, |
107 | double DPPCLKDelaySubtotalPlusCNVCFormater, |
108 | double DPPCLKDelaySCL, |
109 | double DPPCLKDelaySCLLBOnly, |
110 | double DPPCLKDelayCNVCCursor, |
111 | double DISPCLKDelaySubtotal, |
112 | unsigned int DPP_RECOUT_WIDTH, |
113 | enum output_format_class OutputFormat, |
114 | unsigned int MaxInterDCNTileRepeaters, |
115 | unsigned int VStartup, |
116 | unsigned int MaxVStartup, |
117 | unsigned int GPUVMPageTableLevels, |
118 | bool_Bool GPUVMEnable, |
119 | bool_Bool HostVMEnable, |
120 | unsigned int HostVMMaxNonCachedPageTableLevels, |
121 | double HostVMMinPageSize, |
122 | bool_Bool DynamicMetadataEnable, |
123 | bool_Bool DynamicMetadataVMEnabled, |
124 | int DynamicMetadataLinesBeforeActiveRequired, |
125 | unsigned int DynamicMetadataTransmittedBytes, |
126 | double UrgentLatency, |
127 | double UrgentExtraLatency, |
128 | double TCalc, |
129 | unsigned int PDEAndMetaPTEBytesFrame, |
130 | unsigned int MetaRowByte, |
131 | unsigned int PixelPTEBytesPerRow, |
132 | double PrefetchSourceLinesY, |
133 | unsigned int SwathWidthY, |
134 | double VInitPreFillY, |
135 | unsigned int MaxNumSwathY, |
136 | double PrefetchSourceLinesC, |
137 | unsigned int SwathWidthC, |
138 | double VInitPreFillC, |
139 | unsigned int MaxNumSwathC, |
140 | int swath_width_luma_ub, |
141 | int swath_width_chroma_ub, |
142 | unsigned int SwathHeightY, |
143 | unsigned int SwathHeightC, |
144 | double TWait, |
145 | double *DSTXAfterScaler, |
146 | double *DSTYAfterScaler, |
147 | double *DestinationLinesForPrefetch, |
148 | double *PrefetchBandwidth, |
149 | double *DestinationLinesToRequestVMInVBlank, |
150 | double *DestinationLinesToRequestRowInVBlank, |
151 | double *VRatioPrefetchY, |
152 | double *VRatioPrefetchC, |
153 | double *RequiredPrefetchPixDataBWLuma, |
154 | double *RequiredPrefetchPixDataBWChroma, |
155 | bool_Bool *NotEnoughTimeForDynamicMetadata, |
156 | double *Tno_bw, |
157 | double *prefetch_vmrow_bw, |
158 | double *Tdmdl_vm, |
159 | double *Tdmdl, |
160 | double *TSetup, |
161 | int *VUpdateOffsetPix, |
162 | double *VUpdateWidthPix, |
163 | double *VReadyOffsetPix); |
164 | static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); |
165 | static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); |
166 | static void CalculateDCCConfiguration( |
167 | bool_Bool DCCEnabled, |
168 | bool_Bool DCCProgrammingAssumesScanDirectionUnknown, |
169 | enum source_format_class SourcePixelFormat, |
170 | unsigned int SurfaceWidthLuma, |
171 | unsigned int SurfaceWidthChroma, |
172 | unsigned int SurfaceHeightLuma, |
173 | unsigned int SurfaceHeightChroma, |
174 | double DETBufferSize, |
175 | unsigned int RequestHeight256ByteLuma, |
176 | unsigned int RequestHeight256ByteChroma, |
177 | enum dm_swizzle_mode TilingFormat, |
178 | unsigned int BytePerPixelY, |
179 | unsigned int BytePerPixelC, |
180 | double BytePerPixelDETY, |
181 | double BytePerPixelDETC, |
182 | enum scan_direction_class ScanOrientation, |
183 | unsigned int *MaxUncompressedBlockLuma, |
184 | unsigned int *MaxUncompressedBlockChroma, |
185 | unsigned int *MaxCompressedBlockLuma, |
186 | unsigned int *MaxCompressedBlockChroma, |
187 | unsigned int *IndependentBlockLuma, |
188 | unsigned int *IndependentBlockChroma); |
189 | static double CalculatePrefetchSourceLines( |
190 | struct display_mode_lib *mode_lib, |
191 | double VRatio, |
192 | double vtaps, |
193 | bool_Bool Interlace, |
194 | bool_Bool ProgressiveToInterlaceUnitInOPP, |
195 | unsigned int SwathHeight, |
196 | unsigned int ViewportYStart, |
197 | double *VInitPreFill, |
198 | unsigned int *MaxNumSwath); |
199 | static unsigned int CalculateVMAndRowBytes( |
200 | struct display_mode_lib *mode_lib, |
201 | bool_Bool DCCEnable, |
202 | unsigned int BlockHeight256Bytes, |
203 | unsigned int BlockWidth256Bytes, |
204 | enum source_format_class SourcePixelFormat, |
205 | unsigned int SurfaceTiling, |
206 | unsigned int BytePerPixel, |
207 | enum scan_direction_class ScanDirection, |
208 | unsigned int SwathWidth, |
209 | unsigned int ViewportHeight, |
210 | bool_Bool GPUVMEnable, |
211 | bool_Bool HostVMEnable, |
212 | unsigned int HostVMMaxNonCachedPageTableLevels, |
213 | unsigned int GPUVMMinPageSize, |
214 | unsigned int HostVMMinPageSize, |
215 | unsigned int PTEBufferSizeInRequests, |
216 | unsigned int Pitch, |
217 | unsigned int DCCMetaPitch, |
218 | unsigned int *MacroTileWidth, |
219 | unsigned int *MetaRowByte, |
220 | unsigned int *PixelPTEBytesPerRow, |
221 | bool_Bool *PTEBufferSizeNotExceeded, |
222 | int *dpte_row_width_ub, |
223 | unsigned int *dpte_row_height, |
224 | unsigned int *MetaRequestWidth, |
225 | unsigned int *MetaRequestHeight, |
226 | unsigned int *meta_row_width, |
227 | unsigned int *meta_row_height, |
228 | int *vm_group_bytes, |
229 | unsigned int *dpte_group_bytes, |
230 | unsigned int *PixelPTEReqWidth, |
231 | unsigned int *PixelPTEReqHeight, |
232 | unsigned int *PTERequestSize, |
233 | int *DPDE0BytesFrame, |
234 | int *MetaPTEBytesFrame); |
235 | static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); |
236 | static void CalculateRowBandwidth( |
237 | bool_Bool GPUVMEnable, |
238 | enum source_format_class SourcePixelFormat, |
239 | double VRatio, |
240 | double VRatioChroma, |
241 | bool_Bool DCCEnable, |
242 | double LineTime, |
243 | unsigned int MetaRowByteLuma, |
244 | unsigned int MetaRowByteChroma, |
245 | unsigned int meta_row_height_luma, |
246 | unsigned int meta_row_height_chroma, |
247 | unsigned int PixelPTEBytesPerRowLuma, |
248 | unsigned int PixelPTEBytesPerRowChroma, |
249 | unsigned int dpte_row_height_luma, |
250 | unsigned int dpte_row_height_chroma, |
251 | double *meta_row_bw, |
252 | double *dpte_row_bw); |
253 | |
254 | static void CalculateFlipSchedule( |
255 | struct display_mode_lib *mode_lib, |
256 | unsigned int k, |
257 | double HostVMInefficiencyFactor, |
258 | double UrgentExtraLatency, |
259 | double UrgentLatency, |
260 | double PDEAndMetaPTEBytesPerFrame, |
261 | double MetaRowBytes, |
262 | double DPTEBytesPerRow); |
263 | static double CalculateWriteBackDelay( |
264 | enum source_format_class WritebackPixelFormat, |
265 | double WritebackHRatio, |
266 | double WritebackVRatio, |
267 | unsigned int WritebackVTaps, |
268 | int WritebackDestinationWidth, |
269 | int WritebackDestinationHeight, |
270 | int WritebackSourceHeight, |
271 | unsigned int HTotal); |
272 | |
273 | static void CalculateVupdateAndDynamicMetadataParameters( |
274 | int MaxInterDCNTileRepeaters, |
275 | double DPPCLK, |
276 | double DISPCLK, |
277 | double DCFClkDeepSleep, |
278 | double PixelClock, |
279 | int HTotal, |
280 | int VBlank, |
281 | int DynamicMetadataTransmittedBytes, |
282 | int DynamicMetadataLinesBeforeActiveRequired, |
283 | int InterlaceEnable, |
284 | bool_Bool ProgressiveToInterlaceUnitInOPP, |
285 | double *TSetup, |
286 | double *Tdmbf, |
287 | double *Tdmec, |
288 | double *Tdmsks, |
289 | int *VUpdateOffsetPix, |
290 | double *VUpdateWidthPix, |
291 | double *VReadyOffsetPix); |
292 | |
293 | static void CalculateWatermarksAndDRAMSpeedChangeSupport( |
294 | struct display_mode_lib *mode_lib, |
295 | unsigned int PrefetchMode, |
296 | double DCFCLK, |
297 | double ReturnBW, |
298 | double UrgentLatency, |
299 | double ExtraLatency, |
300 | double SOCCLK, |
301 | double DCFCLKDeepSleep, |
302 | unsigned int DETBufferSizeY[], |
303 | unsigned int DETBufferSizeC[], |
304 | unsigned int SwathHeightY[], |
305 | unsigned int SwathHeightC[], |
306 | double SwathWidthY[], |
307 | double SwathWidthC[], |
308 | unsigned int DPPPerPlane[], |
309 | double BytePerPixelDETY[], |
310 | double BytePerPixelDETC[], |
311 | bool_Bool UnboundedRequestEnabled, |
312 | int unsigned CompressedBufferSizeInkByte, |
313 | enum clock_change_support *DRAMClockChangeSupport, |
314 | double *StutterExitWatermark, |
315 | double *StutterEnterPlusExitWatermark, |
316 | double *Z8StutterExitWatermark, |
317 | double *Z8StutterEnterPlusExitWatermark); |
318 | |
319 | static void CalculateDCFCLKDeepSleep( |
320 | struct display_mode_lib *mode_lib, |
321 | unsigned int NumberOfActivePlanes, |
322 | int BytePerPixelY[], |
323 | int BytePerPixelC[], |
324 | double VRatio[], |
325 | double VRatioChroma[], |
326 | double SwathWidthY[], |
327 | double SwathWidthC[], |
328 | unsigned int DPPPerPlane[], |
329 | double HRatio[], |
330 | double HRatioChroma[], |
331 | double PixelClock[], |
332 | double PSCL_THROUGHPUT[], |
333 | double PSCL_THROUGHPUT_CHROMA[], |
334 | double DPPCLK[], |
335 | double ReadBandwidthLuma[], |
336 | double ReadBandwidthChroma[], |
337 | int ReturnBusWidth, |
338 | double *DCFCLKDeepSleep); |
339 | |
340 | static void CalculateUrgentBurstFactor( |
341 | int swath_width_luma_ub, |
342 | int swath_width_chroma_ub, |
343 | unsigned int SwathHeightY, |
344 | unsigned int SwathHeightC, |
345 | double LineTime, |
346 | double UrgentLatency, |
347 | double CursorBufferSize, |
348 | unsigned int CursorWidth, |
349 | unsigned int CursorBPP, |
350 | double VRatio, |
351 | double VRatioC, |
352 | double BytePerPixelInDETY, |
353 | double BytePerPixelInDETC, |
354 | double DETBufferSizeY, |
355 | double DETBufferSizeC, |
356 | double *UrgentBurstFactorCursor, |
357 | double *UrgentBurstFactorLuma, |
358 | double *UrgentBurstFactorChroma, |
359 | bool_Bool *NotEnoughUrgentLatencyHiding); |
360 | |
361 | static void UseMinimumDCFCLK( |
362 | struct display_mode_lib *mode_lib, |
363 | int MaxPrefetchMode, |
364 | int ReorderingBytes); |
365 | |
366 | static void CalculatePixelDeliveryTimes( |
367 | unsigned int NumberOfActivePlanes, |
368 | double VRatio[], |
369 | double VRatioChroma[], |
370 | double VRatioPrefetchY[], |
371 | double VRatioPrefetchC[], |
372 | unsigned int swath_width_luma_ub[], |
373 | unsigned int swath_width_chroma_ub[], |
374 | unsigned int DPPPerPlane[], |
375 | double HRatio[], |
376 | double HRatioChroma[], |
377 | double PixelClock[], |
378 | double PSCL_THROUGHPUT[], |
379 | double PSCL_THROUGHPUT_CHROMA[], |
380 | double DPPCLK[], |
381 | int BytePerPixelC[], |
382 | enum scan_direction_class SourceScan[], |
383 | unsigned int NumberOfCursors[], |
384 | unsigned int CursorWidth[][DC__NUM_CURSOR__MAX2], |
385 | unsigned int CursorBPP[][DC__NUM_CURSOR__MAX2], |
386 | unsigned int BlockWidth256BytesY[], |
387 | unsigned int BlockHeight256BytesY[], |
388 | unsigned int BlockWidth256BytesC[], |
389 | unsigned int BlockHeight256BytesC[], |
390 | double DisplayPipeLineDeliveryTimeLuma[], |
391 | double DisplayPipeLineDeliveryTimeChroma[], |
392 | double DisplayPipeLineDeliveryTimeLumaPrefetch[], |
393 | double DisplayPipeLineDeliveryTimeChromaPrefetch[], |
394 | double DisplayPipeRequestDeliveryTimeLuma[], |
395 | double DisplayPipeRequestDeliveryTimeChroma[], |
396 | double DisplayPipeRequestDeliveryTimeLumaPrefetch[], |
397 | double DisplayPipeRequestDeliveryTimeChromaPrefetch[], |
398 | double CursorRequestDeliveryTime[], |
399 | double CursorRequestDeliveryTimePrefetch[]); |
400 | |
401 | static void CalculateMetaAndPTETimes( |
402 | int NumberOfActivePlanes, |
403 | bool_Bool GPUVMEnable, |
404 | int MetaChunkSize, |
405 | int MinMetaChunkSizeBytes, |
406 | int HTotal[], |
407 | double VRatio[], |
408 | double VRatioChroma[], |
409 | double DestinationLinesToRequestRowInVBlank[], |
410 | double DestinationLinesToRequestRowInImmediateFlip[], |
411 | bool_Bool DCCEnable[], |
412 | double PixelClock[], |
413 | int BytePerPixelY[], |
414 | int BytePerPixelC[], |
415 | enum scan_direction_class SourceScan[], |
416 | int dpte_row_height[], |
417 | int dpte_row_height_chroma[], |
418 | int meta_row_width[], |
419 | int meta_row_width_chroma[], |
420 | int meta_row_height[], |
421 | int meta_row_height_chroma[], |
422 | int meta_req_width[], |
423 | int meta_req_width_chroma[], |
424 | int meta_req_height[], |
425 | int meta_req_height_chroma[], |
426 | int dpte_group_bytes[], |
427 | int PTERequestSizeY[], |
428 | int PTERequestSizeC[], |
429 | int PixelPTEReqWidthY[], |
430 | int PixelPTEReqHeightY[], |
431 | int PixelPTEReqWidthC[], |
432 | int PixelPTEReqHeightC[], |
433 | int dpte_row_width_luma_ub[], |
434 | int dpte_row_width_chroma_ub[], |
435 | double DST_Y_PER_PTE_ROW_NOM_L[], |
436 | double DST_Y_PER_PTE_ROW_NOM_C[], |
437 | double DST_Y_PER_META_ROW_NOM_L[], |
438 | double DST_Y_PER_META_ROW_NOM_C[], |
439 | double TimePerMetaChunkNominal[], |
440 | double TimePerChromaMetaChunkNominal[], |
441 | double TimePerMetaChunkVBlank[], |
442 | double TimePerChromaMetaChunkVBlank[], |
443 | double TimePerMetaChunkFlip[], |
444 | double TimePerChromaMetaChunkFlip[], |
445 | double time_per_pte_group_nom_luma[], |
446 | double time_per_pte_group_vblank_luma[], |
447 | double time_per_pte_group_flip_luma[], |
448 | double time_per_pte_group_nom_chroma[], |
449 | double time_per_pte_group_vblank_chroma[], |
450 | double time_per_pte_group_flip_chroma[]); |
451 | |
452 | static void CalculateVMGroupAndRequestTimes( |
453 | unsigned int NumberOfActivePlanes, |
454 | bool_Bool GPUVMEnable, |
455 | unsigned int GPUVMMaxPageTableLevels, |
456 | unsigned int HTotal[], |
457 | int BytePerPixelC[], |
458 | double DestinationLinesToRequestVMInVBlank[], |
459 | double DestinationLinesToRequestVMInImmediateFlip[], |
460 | bool_Bool DCCEnable[], |
461 | double PixelClock[], |
462 | int dpte_row_width_luma_ub[], |
463 | int dpte_row_width_chroma_ub[], |
464 | int vm_group_bytes[], |
465 | unsigned int dpde0_bytes_per_frame_ub_l[], |
466 | unsigned int dpde0_bytes_per_frame_ub_c[], |
467 | int meta_pte_bytes_per_frame_ub_l[], |
468 | int meta_pte_bytes_per_frame_ub_c[], |
469 | double TimePerVMGroupVBlank[], |
470 | double TimePerVMGroupFlip[], |
471 | double TimePerVMRequestVBlank[], |
472 | double TimePerVMRequestFlip[]); |
473 | |
474 | static void CalculateStutterEfficiency( |
475 | struct display_mode_lib *mode_lib, |
476 | int CompressedBufferSizeInkByte, |
477 | bool_Bool UnboundedRequestEnabled, |
478 | int ConfigReturnBufferSizeInKByte, |
479 | int MetaFIFOSizeInKEntries, |
480 | int ZeroSizeBufferEntries, |
481 | int NumberOfActivePlanes, |
482 | int ROBBufferSizeInKByte, |
483 | double TotalDataReadBandwidth, |
484 | double DCFCLK, |
485 | double ReturnBW, |
486 | double COMPBUF_RESERVED_SPACE_64B, |
487 | double COMPBUF_RESERVED_SPACE_ZS, |
488 | double SRExitTime, |
489 | double SRExitZ8Time, |
490 | bool_Bool SynchronizedVBlank, |
491 | double Z8StutterEnterPlusExitWatermark, |
492 | double StutterEnterPlusExitWatermark, |
493 | bool_Bool ProgressiveToInterlaceUnitInOPP, |
494 | bool_Bool Interlace[], |
495 | double MinTTUVBlank[], |
496 | int DPPPerPlane[], |
497 | unsigned int DETBufferSizeY[], |
498 | int BytePerPixelY[], |
499 | double BytePerPixelDETY[], |
500 | double SwathWidthY[], |
501 | int SwathHeightY[], |
502 | int SwathHeightC[], |
503 | double NetDCCRateLuma[], |
504 | double NetDCCRateChroma[], |
505 | double DCCFractionOfZeroSizeRequestsLuma[], |
506 | double DCCFractionOfZeroSizeRequestsChroma[], |
507 | int HTotal[], |
508 | int VTotal[], |
509 | double PixelClock[], |
510 | double VRatio[], |
511 | enum scan_direction_class SourceScan[], |
512 | int BlockHeight256BytesY[], |
513 | int BlockWidth256BytesY[], |
514 | int BlockHeight256BytesC[], |
515 | int BlockWidth256BytesC[], |
516 | int DCCYMaxUncompressedBlock[], |
517 | int DCCCMaxUncompressedBlock[], |
518 | int VActive[], |
519 | bool_Bool DCCEnable[], |
520 | bool_Bool WritebackEnable[], |
521 | double ReadBandwidthPlaneLuma[], |
522 | double ReadBandwidthPlaneChroma[], |
523 | double meta_row_bw[], |
524 | double dpte_row_bw[], |
525 | double *StutterEfficiencyNotIncludingVBlank, |
526 | double *StutterEfficiency, |
527 | int *NumberOfStutterBurstsPerFrame, |
528 | double *Z8StutterEfficiencyNotIncludingVBlank, |
529 | double *Z8StutterEfficiency, |
530 | int *Z8NumberOfStutterBurstsPerFrame, |
531 | double *StutterPeriod); |
532 | |
533 | static void CalculateSwathAndDETConfiguration( |
534 | bool_Bool ForceSingleDPP, |
535 | int NumberOfActivePlanes, |
536 | bool_Bool DETSharedByAllDPP, |
537 | unsigned int DETBufferSizeInKByte[], |
538 | double MaximumSwathWidthLuma[], |
539 | double MaximumSwathWidthChroma[], |
540 | enum scan_direction_class SourceScan[], |
541 | enum source_format_class SourcePixelFormat[], |
542 | enum dm_swizzle_mode SurfaceTiling[], |
543 | int ViewportWidth[], |
544 | int ViewportHeight[], |
545 | int SurfaceWidthY[], |
546 | int SurfaceWidthC[], |
547 | int SurfaceHeightY[], |
548 | int SurfaceHeightC[], |
549 | int Read256BytesBlockHeightY[], |
550 | int Read256BytesBlockHeightC[], |
551 | int Read256BytesBlockWidthY[], |
552 | int Read256BytesBlockWidthC[], |
553 | enum odm_combine_mode ODMCombineEnabled[], |
554 | int BlendingAndTiming[], |
555 | int BytePerPixY[], |
556 | int BytePerPixC[], |
557 | double BytePerPixDETY[], |
558 | double BytePerPixDETC[], |
559 | int HActive[], |
560 | double HRatio[], |
561 | double HRatioChroma[], |
562 | int DPPPerPlane[], |
563 | int swath_width_luma_ub[], |
564 | int swath_width_chroma_ub[], |
565 | double SwathWidth[], |
566 | double SwathWidthChroma[], |
567 | int SwathHeightY[], |
568 | int SwathHeightC[], |
569 | unsigned int DETBufferSizeY[], |
570 | unsigned int DETBufferSizeC[], |
571 | bool_Bool ViewportSizeSupportPerPlane[], |
572 | bool_Bool *ViewportSizeSupport); |
573 | static void CalculateSwathWidth( |
574 | bool_Bool ForceSingleDPP, |
575 | int NumberOfActivePlanes, |
576 | enum source_format_class SourcePixelFormat[], |
577 | enum scan_direction_class SourceScan[], |
578 | int ViewportWidth[], |
579 | int ViewportHeight[], |
580 | int SurfaceWidthY[], |
581 | int SurfaceWidthC[], |
582 | int SurfaceHeightY[], |
583 | int SurfaceHeightC[], |
584 | enum odm_combine_mode ODMCombineEnabled[], |
585 | int BytePerPixY[], |
586 | int BytePerPixC[], |
587 | int Read256BytesBlockHeightY[], |
588 | int Read256BytesBlockHeightC[], |
589 | int Read256BytesBlockWidthY[], |
590 | int Read256BytesBlockWidthC[], |
591 | int BlendingAndTiming[], |
592 | int HActive[], |
593 | double HRatio[], |
594 | int DPPPerPlane[], |
595 | double SwathWidthSingleDPPY[], |
596 | double SwathWidthSingleDPPC[], |
597 | double SwathWidthY[], |
598 | double SwathWidthC[], |
599 | int MaximumSwathHeightY[], |
600 | int MaximumSwathHeightC[], |
601 | int swath_width_luma_ub[], |
602 | int swath_width_chroma_ub[]); |
603 | |
604 | static double CalculateExtraLatency( |
605 | int RoundTripPingLatencyCycles, |
606 | int ReorderingBytes, |
607 | double DCFCLK, |
608 | int TotalNumberOfActiveDPP, |
609 | int PixelChunkSizeInKByte, |
610 | int TotalNumberOfDCCActiveDPP, |
611 | int MetaChunkSize, |
612 | double ReturnBW, |
613 | bool_Bool GPUVMEnable, |
614 | bool_Bool HostVMEnable, |
615 | int NumberOfActivePlanes, |
616 | int NumberOfDPP[], |
617 | int dpte_group_bytes[], |
618 | double HostVMInefficiencyFactor, |
619 | double HostVMMinPageSize, |
620 | int HostVMMaxNonCachedPageTableLevels); |
621 | |
622 | static double CalculateExtraLatencyBytes( |
623 | int ReorderingBytes, |
624 | int TotalNumberOfActiveDPP, |
625 | int PixelChunkSizeInKByte, |
626 | int TotalNumberOfDCCActiveDPP, |
627 | int MetaChunkSize, |
628 | bool_Bool GPUVMEnable, |
629 | bool_Bool HostVMEnable, |
630 | int NumberOfActivePlanes, |
631 | int NumberOfDPP[], |
632 | int dpte_group_bytes[], |
633 | double HostVMInefficiencyFactor, |
634 | double HostVMMinPageSize, |
635 | int HostVMMaxNonCachedPageTableLevels); |
636 | |
637 | static double CalculateUrgentLatency( |
638 | double UrgentLatencyPixelDataOnly, |
639 | double UrgentLatencyPixelMixedWithVMData, |
640 | double UrgentLatencyVMDataOnly, |
641 | bool_Bool DoUrgentLatencyAdjustment, |
642 | double UrgentLatencyAdjustmentFabricClockComponent, |
643 | double UrgentLatencyAdjustmentFabricClockReference, |
644 | double FabricClockSingle); |
645 | |
646 | static void CalculateUnboundedRequestAndCompressedBufferSize( |
647 | unsigned int DETBufferSizeInKByte, |
648 | int ConfigReturnBufferSizeInKByte, |
649 | enum unbounded_requesting_policy UseUnboundedRequestingFinal, |
650 | int TotalActiveDPP, |
651 | bool_Bool NoChromaPlanes, |
652 | int MaxNumDPP, |
653 | int CompressedBufferSegmentSizeInkByteFinal, |
654 | enum output_encoder_class *Output, |
655 | bool_Bool *UnboundedRequestEnabled, |
656 | int *CompressedBufferSizeInkByte); |
657 | |
658 | static bool_Bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool_Bool NoChroma, enum output_encoder_class Output); |
659 | |
660 | void dml31_recalculate(struct display_mode_lib *mode_lib) |
661 | { |
662 | ModeSupportAndSystemConfiguration(mode_lib); |
663 | PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); |
664 | DisplayPipeConfiguration(mode_lib); |
665 | #ifdef __DML_VBA_DEBUG__ |
666 | dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__){do { } while(0); }; |
667 | #endif |
668 | DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); |
669 | } |
670 | |
671 | static unsigned int dscceComputeDelay( |
672 | unsigned int bpc, |
673 | double BPP, |
674 | unsigned int sliceWidth, |
675 | unsigned int numSlices, |
676 | enum output_format_class pixelFormat, |
677 | enum output_encoder_class Output) |
678 | { |
679 | // valid bpc = source bits per component in the set of {8, 10, 12} |
680 | // valid bpp = increments of 1/16 of a bit |
681 | // min = 6/7/8 in N420/N422/444, respectively |
682 | // max = such that compression is 1:1 |
683 | //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) |
684 | //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} |
685 | //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} |
686 | |
687 | // fixed value |
688 | unsigned int rcModelSize = 8192; |
689 | |
690 | // N422/N420 operate at 2 pixels per clock |
691 | unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; |
692 | |
693 | if (pixelFormat == dm_420) |
694 | pixelsPerClock = 2; |
695 | else if (pixelFormat == dm_444) |
696 | pixelsPerClock = 1; |
697 | else if (pixelFormat == dm_n422) |
698 | pixelsPerClock = 2; |
699 | // #all other modes operate at 1 pixel per clock |
700 | else |
701 | pixelsPerClock = 1; |
702 | |
703 | //initial transmit delay as per PPS |
704 | initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); |
705 | |
706 | //compute ssm delay |
707 | if (bpc == 8) |
708 | D = 81; |
709 | else if (bpc == 10) |
710 | D = 89; |
711 | else |
712 | D = 113; |
713 | |
714 | //divide by pixel per cycle to compute slice width as seen by DSC |
715 | w = sliceWidth / pixelsPerClock; |
716 | |
717 | //422 mode has an additional cycle of delay |
718 | if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) |
719 | s = 0; |
720 | else |
721 | s = 1; |
722 | |
723 | //main calculation for the dscce |
724 | ix = initalXmitDelay + 45; |
725 | wx = (w + 2) / 3; |
726 | P = 3 * wx - w; |
727 | l0 = ix / w; |
728 | a = ix + P * l0; |
729 | ax = (a + 2) / 3 + D + 6 + 1; |
730 | L = (ax + wx - 1) / wx; |
731 | if ((ix % w) == 0 && P != 0) |
732 | lstall = 1; |
733 | else |
734 | lstall = 0; |
735 | Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; |
736 | |
737 | //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels |
738 | pixels = Delay * 3 * pixelsPerClock; |
739 | return pixels; |
740 | } |
741 | |
742 | static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) |
743 | { |
744 | unsigned int Delay = 0; |
745 | |
746 | if (pixelFormat == dm_420) { |
747 | // sfr |
748 | Delay = Delay + 2; |
749 | // dsccif |
750 | Delay = Delay + 0; |
751 | // dscc - input deserializer |
752 | Delay = Delay + 3; |
753 | // dscc gets pixels every other cycle |
754 | Delay = Delay + 2; |
755 | // dscc - input cdc fifo |
756 | Delay = Delay + 12; |
757 | // dscc gets pixels every other cycle |
758 | Delay = Delay + 13; |
759 | // dscc - cdc uncertainty |
760 | Delay = Delay + 2; |
761 | // dscc - output cdc fifo |
762 | Delay = Delay + 7; |
763 | // dscc gets pixels every other cycle |
764 | Delay = Delay + 3; |
765 | // dscc - cdc uncertainty |
766 | Delay = Delay + 2; |
767 | // dscc - output serializer |
768 | Delay = Delay + 1; |
769 | // sft |
770 | Delay = Delay + 1; |
771 | } else if (pixelFormat == dm_n422) { |
772 | // sfr |
773 | Delay = Delay + 2; |
774 | // dsccif |
775 | Delay = Delay + 1; |
776 | // dscc - input deserializer |
777 | Delay = Delay + 5; |
778 | // dscc - input cdc fifo |
779 | Delay = Delay + 25; |
780 | // dscc - cdc uncertainty |
781 | Delay = Delay + 2; |
782 | // dscc - output cdc fifo |
783 | Delay = Delay + 10; |
784 | // dscc - cdc uncertainty |
785 | Delay = Delay + 2; |
786 | // dscc - output serializer |
787 | Delay = Delay + 1; |
788 | // sft |
789 | Delay = Delay + 1; |
790 | } else { |
791 | // sfr |
792 | Delay = Delay + 2; |
793 | // dsccif |
794 | Delay = Delay + 0; |
795 | // dscc - input deserializer |
796 | Delay = Delay + 3; |
797 | // dscc - input cdc fifo |
798 | Delay = Delay + 12; |
799 | // dscc - cdc uncertainty |
800 | Delay = Delay + 2; |
801 | // dscc - output cdc fifo |
802 | Delay = Delay + 7; |
803 | // dscc - output serializer |
804 | Delay = Delay + 1; |
805 | // dscc - cdc uncertainty |
806 | Delay = Delay + 2; |
807 | // sft |
808 | Delay = Delay + 1; |
809 | } |
810 | |
811 | return Delay; |
812 | } |
813 | |
814 | static bool_Bool CalculatePrefetchSchedule( |
815 | struct display_mode_lib *mode_lib, |
816 | double HostVMInefficiencyFactor, |
817 | Pipe *myPipe, |
818 | unsigned int DSCDelay, |
819 | double DPPCLKDelaySubtotalPlusCNVCFormater, |
820 | double DPPCLKDelaySCL, |
821 | double DPPCLKDelaySCLLBOnly, |
822 | double DPPCLKDelayCNVCCursor, |
823 | double DISPCLKDelaySubtotal, |
824 | unsigned int DPP_RECOUT_WIDTH, |
825 | enum output_format_class OutputFormat, |
826 | unsigned int MaxInterDCNTileRepeaters, |
827 | unsigned int VStartup, |
828 | unsigned int MaxVStartup, |
829 | unsigned int GPUVMPageTableLevels, |
830 | bool_Bool GPUVMEnable, |
831 | bool_Bool HostVMEnable, |
832 | unsigned int HostVMMaxNonCachedPageTableLevels, |
833 | double HostVMMinPageSize, |
834 | bool_Bool DynamicMetadataEnable, |
835 | bool_Bool DynamicMetadataVMEnabled, |
836 | int DynamicMetadataLinesBeforeActiveRequired, |
837 | unsigned int DynamicMetadataTransmittedBytes, |
838 | double UrgentLatency, |
839 | double UrgentExtraLatency, |
840 | double TCalc, |
841 | unsigned int PDEAndMetaPTEBytesFrame, |
842 | unsigned int MetaRowByte, |
843 | unsigned int PixelPTEBytesPerRow, |
844 | double PrefetchSourceLinesY, |
845 | unsigned int SwathWidthY, |
846 | double VInitPreFillY, |
847 | unsigned int MaxNumSwathY, |
848 | double PrefetchSourceLinesC, |
849 | unsigned int SwathWidthC, |
850 | double VInitPreFillC, |
851 | unsigned int MaxNumSwathC, |
852 | int swath_width_luma_ub, |
853 | int swath_width_chroma_ub, |
854 | unsigned int SwathHeightY, |
855 | unsigned int SwathHeightC, |
856 | double TWait, |
857 | double *DSTXAfterScaler, |
858 | double *DSTYAfterScaler, |
859 | double *DestinationLinesForPrefetch, |
860 | double *PrefetchBandwidth, |
861 | double *DestinationLinesToRequestVMInVBlank, |
862 | double *DestinationLinesToRequestRowInVBlank, |
863 | double *VRatioPrefetchY, |
864 | double *VRatioPrefetchC, |
865 | double *RequiredPrefetchPixDataBWLuma, |
866 | double *RequiredPrefetchPixDataBWChroma, |
867 | bool_Bool *NotEnoughTimeForDynamicMetadata, |
868 | double *Tno_bw, |
869 | double *prefetch_vmrow_bw, |
870 | double *Tdmdl_vm, |
871 | double *Tdmdl, |
872 | double *TSetup, |
873 | int *VUpdateOffsetPix, |
874 | double *VUpdateWidthPix, |
875 | double *VReadyOffsetPix) |
876 | { |
877 | bool_Bool MyError = false0; |
878 | unsigned int DPPCycles, DISPCLKCycles; |
879 | double DSTTotalPixelsAfterScaler; |
880 | double LineTime; |
881 | double dst_y_prefetch_equ; |
882 | double Tsw_oto; |
883 | double prefetch_bw_oto; |
884 | double prefetch_bw_pr; |
885 | double Tvm_oto; |
886 | double Tr0_oto; |
887 | double Tvm_oto_lines; |
888 | double Tr0_oto_lines; |
889 | double dst_y_prefetch_oto; |
890 | double TimeForFetchingMetaPTE = 0; |
891 | double TimeForFetchingRowInVBlank = 0; |
892 | double LinesToRequestPrefetchPixelData = 0; |
893 | unsigned int HostVMDynamicLevelsTrips; |
894 | double trip_to_mem; |
895 | double Tvm_trips; |
896 | double Tr0_trips; |
897 | double Tvm_trips_rounded; |
898 | double Tr0_trips_rounded; |
899 | double Lsw_oto; |
900 | double Tpre_rounded; |
901 | double prefetch_bw_equ; |
902 | double Tvm_equ; |
903 | double Tr0_equ; |
904 | double Tdmbf; |
905 | double Tdmec; |
906 | double Tdmsks; |
907 | double prefetch_sw_bytes; |
908 | double bytes_pp; |
909 | double dep_bytes; |
910 | int max_vratio_pre = 4; |
911 | double min_Lsw; |
912 | double Tsw_est1 = 0; |
913 | double Tsw_est3 = 0; |
914 | double max_Tsw = 0; |
915 | |
916 | if (GPUVMEnable == true1 && HostVMEnable == true1) { |
917 | HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; |
918 | } else { |
919 | HostVMDynamicLevelsTrips = 0; |
920 | } |
921 | #ifdef __DML_VBA_DEBUG__ |
922 | dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor){do { } while(0); }; |
923 | #endif |
924 | CalculateVupdateAndDynamicMetadataParameters( |
925 | MaxInterDCNTileRepeaters, |
926 | myPipe->DPPCLK, |
927 | myPipe->DISPCLK, |
928 | myPipe->DCFCLKDeepSleep, |
929 | myPipe->PixelClock, |
930 | myPipe->HTotal, |
931 | myPipe->VBlank, |
932 | DynamicMetadataTransmittedBytes, |
933 | DynamicMetadataLinesBeforeActiveRequired, |
934 | myPipe->InterlaceEnable, |
935 | myPipe->ProgressiveToInterlaceUnitInOPP, |
936 | TSetup, |
937 | &Tdmbf, |
938 | &Tdmec, |
939 | &Tdmsks, |
940 | VUpdateOffsetPix, |
941 | VUpdateWidthPix, |
942 | VReadyOffsetPix); |
943 | |
944 | LineTime = myPipe->HTotal / myPipe->PixelClock; |
945 | trip_to_mem = UrgentLatency; |
946 | Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); |
947 | |
948 | #ifdef __DML_VBA_ALLOW_DELTA__ |
949 | if (DynamicMetadataVMEnabled == true1 && GPUVMEnable == true1) { |
950 | #else |
951 | if (DynamicMetadataVMEnabled == true1) { |
952 | #endif |
953 | *Tdmdl = TWait + Tvm_trips + trip_to_mem; |
954 | } else { |
955 | *Tdmdl = TWait + UrgentExtraLatency; |
956 | } |
957 | |
958 | #ifdef __DML_VBA_ALLOW_DELTA__ |
959 | if (DynamicMetadataEnable == false0) { |
960 | *Tdmdl = 0.0; |
961 | } |
962 | #endif |
963 | |
964 | if (DynamicMetadataEnable == true1) { |
965 | if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { |
966 | *NotEnoughTimeForDynamicMetadata = true1; |
967 | dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__){do { } while(0); }; |
968 | dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf){do { } while(0); }; |
969 | dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec){do { } while(0); }; |
970 | dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks){do { } while(0); }; |
971 | dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl){do { } while(0); }; |
972 | } else { |
973 | *NotEnoughTimeForDynamicMetadata = false0; |
974 | } |
975 | } else { |
976 | *NotEnoughTimeForDynamicMetadata = false0; |
977 | } |
978 | |
979 | *Tdmdl_vm = (DynamicMetadataEnable == true1 && DynamicMetadataVMEnabled == true1 && GPUVMEnable == true1 ? TWait + Tvm_trips : 0); |
980 | |
981 | if (myPipe->ScalerEnabled) |
982 | DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; |
983 | else |
984 | DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; |
985 | |
986 | DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; |
987 | |
988 | DISPCLKCycles = DISPCLKDelaySubtotal; |
989 | |
990 | if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) |
991 | return true1; |
992 | |
993 | *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; |
994 | |
995 | #ifdef __DML_VBA_DEBUG__ |
996 | dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles){do { } while(0); }; |
997 | dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock){do { } while(0); }; |
998 | dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK){do { } while(0); }; |
999 | dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles){do { } while(0); }; |
1000 | dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK){do { } while(0); }; |
1001 | dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay){do { } while(0); }; |
1002 | dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler){do { } while(0); }; |
1003 | dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled){do { } while(0); }; |
1004 | #endif |
1005 | |
1006 | *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; |
1007 | |
1008 | if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) |
1009 | *DSTYAfterScaler = 1; |
1010 | else |
1011 | *DSTYAfterScaler = 0; |
1012 | |
1013 | DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; |
1014 | *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); |
1015 | *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); |
1016 | |
1017 | #ifdef __DML_VBA_DEBUG__ |
1018 | dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler){do { } while(0); }; |
1019 | #endif |
1020 | |
1021 | MyError = false0; |
1022 | |
1023 | Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); |
1024 | Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; |
1025 | Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; |
1026 | |
1027 | #ifdef __DML_VBA_ALLOW_DELTA__ |
1028 | if (!myPipe->DCCEnable) { |
1029 | Tr0_trips = 0.0; |
1030 | Tr0_trips_rounded = 0.0; |
1031 | } |
1032 | #endif |
1033 | |
1034 | if (!GPUVMEnable) { |
1035 | Tvm_trips = 0.0; |
1036 | Tvm_trips_rounded = 0.0; |
1037 | } |
1038 | |
1039 | if (GPUVMEnable) { |
1040 | if (GPUVMPageTableLevels >= 3) { |
1041 | *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); |
1042 | } else { |
1043 | *Tno_bw = 0; |
1044 | } |
1045 | } else if (!myPipe->DCCEnable) { |
1046 | *Tno_bw = LineTime; |
1047 | } else { |
1048 | *Tno_bw = LineTime / 4; |
1049 | } |
1050 | |
1051 | if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) |
1052 | bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; |
1053 | else |
1054 | bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; |
1055 | /*rev 99*/ |
1056 | prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); |
1057 | max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; |
1058 | prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; |
1059 | prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); |
1060 | prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); |
1061 | |
1062 | min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); |
1063 | Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; |
1064 | Tsw_oto = Lsw_oto * LineTime; |
1065 | |
1066 | prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; |
1067 | |
1068 | #ifdef __DML_VBA_DEBUG__ |
1069 | dml_print("DML: HTotal: %d\n", myPipe->HTotal){do { } while(0); }; |
1070 | dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto){do { } while(0); }; |
1071 | dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY){do { } while(0); }; |
1072 | dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub){do { } while(0); }; |
1073 | dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY){do { } while(0); }; |
1074 | dml_print("DML: Tsw_oto: %f\n", Tsw_oto){do { } while(0); }; |
1075 | #endif |
1076 | |
1077 | if (GPUVMEnable == true1) |
1078 | Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); |
1079 | else |
1080 | Tvm_oto = LineTime / 4.0; |
1081 | |
1082 | if ((GPUVMEnable == true1 || myPipe->DCCEnable == true1)) { |
1083 | Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) |
1084 | LineTime - Tvm_oto, |
1085 | LineTime / 4); |
1086 | } else { |
1087 | Tr0_oto = (LineTime - Tvm_oto) / 2.0; |
1088 | } |
1089 | |
1090 | #ifdef __DML_VBA_DEBUG__ |
1091 | dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips){do { } while(0); }; |
1092 | dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips){do { } while(0); }; |
1093 | dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte){do { } while(0); }; |
1094 | dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte){do { } while(0); }; |
1095 | dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow){do { } while(0); }; |
1096 | dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor){do { } while(0); }; |
1097 | dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto){do { } while(0); }; |
1098 | dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto){do { } while(0); }; |
1099 | dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto){do { } while(0); }; |
1100 | #endif |
1101 | |
1102 | Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; |
1103 | Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; |
1104 | dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; |
1105 | dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); |
1106 | dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; |
1107 | Tpre_rounded = dst_y_prefetch_equ * LineTime; |
1108 | |
1109 | dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); |
1110 | |
1111 | if (prefetch_sw_bytes < dep_bytes) |
1112 | prefetch_sw_bytes = 2 * dep_bytes; |
1113 | |
1114 | dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto){do { } while(0); }; |
1115 | dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines){do { } while(0); }; |
1116 | dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines){do { } while(0); }; |
1117 | dml_print("DML: Lsw_oto: %f\n", Lsw_oto){do { } while(0); }; |
1118 | dml_print("DML: LineTime: %f\n", LineTime){do { } while(0); }; |
1119 | dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ){do { } while(0); }; |
1120 | |
1121 | dml_print("DML: LineTime: %f\n", LineTime){do { } while(0); }; |
1122 | dml_print("DML: VStartup: %d\n", VStartup){do { } while(0); }; |
1123 | dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime){do { } while(0); }; |
1124 | dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup){do { } while(0); }; |
1125 | dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc){do { } while(0); }; |
1126 | dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait){do { } while(0); }; |
1127 | dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf){do { } while(0); }; |
1128 | dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec){do { } while(0); }; |
1129 | dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks){do { } while(0); }; |
1130 | dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm){do { } while(0); }; |
1131 | dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl){do { } while(0); }; |
1132 | dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler){do { } while(0); }; |
1133 | dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler){do { } while(0); }; |
1134 | |
1135 | *PrefetchBandwidth = 0; |
1136 | *DestinationLinesToRequestVMInVBlank = 0; |
1137 | *DestinationLinesToRequestRowInVBlank = 0; |
1138 | *VRatioPrefetchY = 0; |
1139 | *VRatioPrefetchC = 0; |
1140 | *RequiredPrefetchPixDataBWLuma = 0; |
1141 | if (dst_y_prefetch_equ > 1) { |
1142 | double PrefetchBandwidth1; |
1143 | double PrefetchBandwidth2; |
1144 | double PrefetchBandwidth3; |
1145 | double PrefetchBandwidth4; |
1146 | |
1147 | if (Tpre_rounded - *Tno_bw > 0) { |
1148 | PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor |
1149 | + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); |
1150 | Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; |
1151 | } else { |
1152 | PrefetchBandwidth1 = 0; |
1153 | } |
1154 | |
1155 | if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { |
1156 | PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) |
1157 | / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); |
1158 | } |
1159 | |
1160 | if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) |
1161 | PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); |
1162 | else |
1163 | PrefetchBandwidth2 = 0; |
1164 | |
1165 | if (Tpre_rounded - Tvm_trips_rounded > 0) { |
1166 | PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor |
1167 | + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); |
1168 | Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; |
1169 | } else { |
1170 | PrefetchBandwidth3 = 0; |
1171 | } |
1172 | |
1173 | #ifdef __DML_VBA_DEBUG__ |
1174 | dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded){do { } while(0); }; |
1175 | dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded){do { } while(0); }; |
1176 | dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3){do { } while(0); }; |
1177 | #endif |
1178 | if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { |
1179 | PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) |
1180 | / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); |
1181 | } |
1182 | |
1183 | if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) |
1184 | PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); |
1185 | else |
1186 | PrefetchBandwidth4 = 0; |
1187 | |
1188 | { |
1189 | bool_Bool Case1OK; |
1190 | bool_Bool Case2OK; |
1191 | bool_Bool Case3OK; |
1192 | |
1193 | if (PrefetchBandwidth1 > 0) { |
1194 | if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded |
1195 | && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { |
1196 | Case1OK = true1; |
1197 | } else { |
1198 | Case1OK = false0; |
1199 | } |
1200 | } else { |
1201 | Case1OK = false0; |
1202 | } |
1203 | |
1204 | if (PrefetchBandwidth2 > 0) { |
1205 | if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded |
1206 | && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { |
1207 | Case2OK = true1; |
1208 | } else { |
1209 | Case2OK = false0; |
1210 | } |
1211 | } else { |
1212 | Case2OK = false0; |
1213 | } |
1214 | |
1215 | if (PrefetchBandwidth3 > 0) { |
1216 | if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded |
1217 | && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { |
1218 | Case3OK = true1; |
1219 | } else { |
1220 | Case3OK = false0; |
1221 | } |
1222 | } else { |
1223 | Case3OK = false0; |
1224 | } |
1225 | |
1226 | if (Case1OK) { |
1227 | prefetch_bw_equ = PrefetchBandwidth1; |
1228 | } else if (Case2OK) { |
1229 | prefetch_bw_equ = PrefetchBandwidth2; |
1230 | } else if (Case3OK) { |
1231 | prefetch_bw_equ = PrefetchBandwidth3; |
1232 | } else { |
1233 | prefetch_bw_equ = PrefetchBandwidth4; |
1234 | } |
1235 | |
1236 | #ifdef __DML_VBA_DEBUG__ |
1237 | dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK){do { } while(0); }; |
1238 | dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK){do { } while(0); }; |
1239 | dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK){do { } while(0); }; |
1240 | dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ){do { } while(0); }; |
1241 | #endif |
1242 | |
1243 | if (prefetch_bw_equ > 0) { |
1244 | if (GPUVMEnable == true1) { |
1245 | Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); |
1246 | } else { |
1247 | Tvm_equ = LineTime / 4; |
1248 | } |
1249 | |
1250 | if ((GPUVMEnable == true1 || myPipe->DCCEnable == true1)) { |
1251 | Tr0_equ = dml_max4( |
1252 | (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, |
1253 | Tr0_trips, |
1254 | (LineTime - Tvm_equ) / 2, |
1255 | LineTime / 4); |
1256 | } else { |
1257 | Tr0_equ = (LineTime - Tvm_equ) / 2; |
1258 | } |
1259 | } else { |
1260 | Tvm_equ = 0; |
1261 | Tr0_equ = 0; |
1262 | dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1263 | } |
1264 | } |
1265 | |
1266 | if (dst_y_prefetch_oto < dst_y_prefetch_equ) { |
1267 | *DestinationLinesForPrefetch = dst_y_prefetch_oto; |
1268 | TimeForFetchingMetaPTE = Tvm_oto; |
1269 | TimeForFetchingRowInVBlank = Tr0_oto; |
1270 | *PrefetchBandwidth = prefetch_bw_oto; |
1271 | } else { |
1272 | *DestinationLinesForPrefetch = dst_y_prefetch_equ; |
1273 | TimeForFetchingMetaPTE = Tvm_equ; |
1274 | TimeForFetchingRowInVBlank = Tr0_equ; |
1275 | *PrefetchBandwidth = prefetch_bw_equ; |
1276 | } |
1277 | |
1278 | *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; |
1279 | |
1280 | *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; |
1281 | |
1282 | #ifdef __DML_VBA_ALLOW_DELTA__ |
1283 | LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch |
1284 | // See note above dated 5/30/2018 |
1285 | // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? |
1286 | - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? |
1287 | #else |
1288 | LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; |
1289 | #endif |
1290 | |
1291 | #ifdef __DML_VBA_DEBUG__ |
1292 | dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch){do { } while(0); }; |
1293 | dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank){do { } while(0); }; |
1294 | dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank){do { } while(0); }; |
1295 | dml_print("DML::%s: LineTime = %f\n", __func__, LineTime){do { } while(0); }; |
1296 | dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank){do { } while(0); }; |
1297 | dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY){do { } while(0); }; |
1298 | dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData){do { } while(0); }; |
1299 | #endif |
1300 | |
1301 | if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { |
1302 | |
1303 | *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; |
1304 | *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); |
1305 | #ifdef __DML_VBA_DEBUG__ |
1306 | dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY){do { } while(0); }; |
1307 | dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY){do { } while(0); }; |
1308 | dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY){do { } while(0); }; |
1309 | #endif |
1310 | if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { |
1311 | if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { |
1312 | *VRatioPrefetchY = dml_max( |
1313 | (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, |
1314 | (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); |
1315 | *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); |
1316 | } else { |
1317 | MyError = true1; |
1318 | dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1319 | *VRatioPrefetchY = 0; |
1320 | } |
1321 | #ifdef __DML_VBA_DEBUG__ |
1322 | dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY){do { } while(0); }; |
1323 | dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY){do { } while(0); }; |
1324 | dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY){do { } while(0); }; |
1325 | #endif |
1326 | } |
1327 | |
1328 | *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; |
1329 | *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); |
1330 | |
1331 | #ifdef __DML_VBA_DEBUG__ |
1332 | dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC){do { } while(0); }; |
1333 | dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC){do { } while(0); }; |
1334 | dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC){do { } while(0); }; |
1335 | #endif |
1336 | if ((SwathHeightC > 4) || VInitPreFillC > 3) { |
1337 | if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { |
1338 | *VRatioPrefetchC = dml_max( |
1339 | *VRatioPrefetchC, |
1340 | (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); |
1341 | *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); |
1342 | } else { |
1343 | MyError = true1; |
1344 | dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1345 | *VRatioPrefetchC = 0; |
1346 | } |
1347 | #ifdef __DML_VBA_DEBUG__ |
1348 | dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC){do { } while(0); }; |
1349 | dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC){do { } while(0); }; |
1350 | dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC){do { } while(0); }; |
1351 | #endif |
1352 | } |
1353 | |
1354 | #ifdef __DML_VBA_DEBUG__ |
1355 | dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY){do { } while(0); }; |
1356 | dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub){do { } while(0); }; |
1357 | dml_print("DML::%s: LineTime = %f\n", __func__, LineTime){do { } while(0); }; |
1358 | #endif |
1359 | |
1360 | *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; |
1361 | |
1362 | #ifdef __DML_VBA_DEBUG__ |
1363 | dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma){do { } while(0); }; |
1364 | #endif |
1365 | |
1366 | *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub |
1367 | / LineTime; |
1368 | } else { |
1369 | MyError = true1; |
1370 | dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1371 | dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData){do { } while(0); }; |
1372 | *VRatioPrefetchY = 0; |
1373 | *VRatioPrefetchC = 0; |
1374 | *RequiredPrefetchPixDataBWLuma = 0; |
1375 | *RequiredPrefetchPixDataBWChroma = 0; |
1376 | } |
1377 | |
1378 | dml_print({do { } while(0); } |
1379 | "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",{do { } while(0); } |
1380 | (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE){do { } while(0); }; |
1381 | dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE){do { } while(0); }; |
1382 | dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank){do { } while(0); }; |
1383 | dml_print({do { } while(0); } |
1384 | "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",{do { } while(0); } |
1385 | (double) LinesToRequestPrefetchPixelData * LineTime){do { } while(0); }; |
1386 | dml_print("DML: To: %fus - time for propagation from scaler to optc\n",{do { } while(0); } |
1387 | (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /{do { } while(0); } |
1388 | (double) myPipe->HTotal)) * LineTime){do { } while(0); }; |
1389 | dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"){do { } while(0); }; |
1390 | dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",{do { } while(0); } |
1391 | VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank{do { } while(0); } |
1392 | - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup){do { } while(0); }; |
1393 | dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow){do { } while(0); }; |
1394 | |
1395 | } else { |
1396 | MyError = true1; |
1397 | dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1398 | } |
1399 | |
1400 | { |
1401 | double prefetch_vm_bw; |
1402 | double prefetch_row_bw; |
1403 | |
1404 | if (PDEAndMetaPTEBytesFrame == 0) { |
1405 | prefetch_vm_bw = 0; |
1406 | } else if (*DestinationLinesToRequestVMInVBlank > 0) { |
1407 | #ifdef __DML_VBA_DEBUG__ |
1408 | dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); }; |
1409 | dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor){do { } while(0); }; |
1410 | dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank){do { } while(0); }; |
1411 | dml_print("DML::%s: LineTime = %f\n", __func__, LineTime){do { } while(0); }; |
1412 | #endif |
1413 | prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); |
1414 | #ifdef __DML_VBA_DEBUG__ |
1415 | dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw){do { } while(0); }; |
1416 | #endif |
1417 | } else { |
1418 | prefetch_vm_bw = 0; |
1419 | MyError = true1; |
1420 | dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1421 | } |
1422 | |
1423 | if (MetaRowByte + PixelPTEBytesPerRow == 0) { |
1424 | prefetch_row_bw = 0; |
1425 | } else if (*DestinationLinesToRequestRowInVBlank > 0) { |
1426 | prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); |
1427 | |
1428 | #ifdef __DML_VBA_DEBUG__ |
1429 | dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte){do { } while(0); }; |
1430 | dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow){do { } while(0); }; |
1431 | dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank){do { } while(0); }; |
1432 | dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw){do { } while(0); }; |
1433 | #endif |
1434 | } else { |
1435 | prefetch_row_bw = 0; |
1436 | MyError = true1; |
1437 | dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__){do { } while(0); }; |
1438 | } |
1439 | |
1440 | *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); |
1441 | } |
1442 | |
1443 | if (MyError) { |
1444 | *PrefetchBandwidth = 0; |
1445 | TimeForFetchingMetaPTE = 0; |
1446 | TimeForFetchingRowInVBlank = 0; |
1447 | *DestinationLinesToRequestVMInVBlank = 0; |
1448 | *DestinationLinesToRequestRowInVBlank = 0; |
1449 | *DestinationLinesForPrefetch = 0; |
1450 | LinesToRequestPrefetchPixelData = 0; |
1451 | *VRatioPrefetchY = 0; |
1452 | *VRatioPrefetchC = 0; |
1453 | *RequiredPrefetchPixDataBWLuma = 0; |
1454 | *RequiredPrefetchPixDataBWChroma = 0; |
1455 | } |
1456 | |
1457 | return MyError; |
1458 | } |
1459 | |
1460 | static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) |
1461 | { |
1462 | return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); |
1463 | } |
1464 | |
1465 | static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) |
1466 | { |
1467 | return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); |
1468 | } |
1469 | |
1470 | static void CalculateDCCConfiguration( |
1471 | bool_Bool DCCEnabled, |
1472 | bool_Bool DCCProgrammingAssumesScanDirectionUnknown, |
1473 | enum source_format_class SourcePixelFormat, |
1474 | unsigned int SurfaceWidthLuma, |
1475 | unsigned int SurfaceWidthChroma, |
1476 | unsigned int SurfaceHeightLuma, |
1477 | unsigned int SurfaceHeightChroma, |
1478 | double DETBufferSize, |
1479 | unsigned int RequestHeight256ByteLuma, |
1480 | unsigned int RequestHeight256ByteChroma, |
1481 | enum dm_swizzle_mode TilingFormat, |
1482 | unsigned int BytePerPixelY, |
1483 | unsigned int BytePerPixelC, |
1484 | double BytePerPixelDETY, |
1485 | double BytePerPixelDETC, |
1486 | enum scan_direction_class ScanOrientation, |
1487 | unsigned int *MaxUncompressedBlockLuma, |
1488 | unsigned int *MaxUncompressedBlockChroma, |
1489 | unsigned int *MaxCompressedBlockLuma, |
1490 | unsigned int *MaxCompressedBlockChroma, |
1491 | unsigned int *IndependentBlockLuma, |
1492 | unsigned int *IndependentBlockChroma) |
1493 | { |
1494 | int yuv420; |
1495 | int horz_div_l; |
1496 | int horz_div_c; |
1497 | int vert_div_l; |
1498 | int vert_div_c; |
1499 | |
1500 | int swath_buf_size; |
1501 | double detile_buf_vp_horz_limit; |
1502 | double detile_buf_vp_vert_limit; |
1503 | |
1504 | int MAS_vp_horz_limit; |
1505 | int MAS_vp_vert_limit; |
1506 | int max_vp_horz_width; |
1507 | int max_vp_vert_height; |
1508 | int eff_surf_width_l; |
1509 | int eff_surf_width_c; |
1510 | int eff_surf_height_l; |
1511 | int eff_surf_height_c; |
1512 | |
1513 | int full_swath_bytes_horz_wc_l; |
1514 | int full_swath_bytes_horz_wc_c; |
1515 | int full_swath_bytes_vert_wc_l; |
1516 | int full_swath_bytes_vert_wc_c; |
1517 | int req128_horz_wc_l; |
1518 | int req128_horz_wc_c; |
1519 | int req128_vert_wc_l; |
1520 | int req128_vert_wc_c; |
1521 | int segment_order_horz_contiguous_luma; |
1522 | int segment_order_horz_contiguous_chroma; |
1523 | int segment_order_vert_contiguous_luma; |
1524 | int segment_order_vert_contiguous_chroma; |
1525 | |
1526 | typedef enum { |
1527 | REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA |
1528 | } RequestType; |
1529 | RequestType RequestLuma; |
1530 | RequestType RequestChroma; |
1531 | |
1532 | yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); |
1533 | horz_div_l = 1; |
1534 | horz_div_c = 1; |
1535 | vert_div_l = 1; |
1536 | vert_div_c = 1; |
1537 | |
1538 | if (BytePerPixelY == 1) |
1539 | vert_div_l = 0; |
1540 | if (BytePerPixelC == 1) |
1541 | vert_div_c = 0; |
1542 | if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) |
1543 | horz_div_l = 0; |
1544 | if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) |
1545 | horz_div_c = 0; |
1546 | |
1547 | if (BytePerPixelC == 0) { |
1548 | swath_buf_size = DETBufferSize / 2 - 2 * 256; |
1549 | detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); |
1550 | detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); |
1551 | } else { |
1552 | swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; |
1553 | detile_buf_vp_horz_limit = (double) swath_buf_size |
1554 | / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) |
1555 | + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); |
1556 | detile_buf_vp_vert_limit = (double) swath_buf_size |
1557 | / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); |
1558 | } |
1559 | |
1560 | if (SourcePixelFormat == dm_420_10) { |
1561 | detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; |
1562 | detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; |
1563 | } |
1564 | |
1565 | detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); |
1566 | detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); |
1567 | |
1568 | MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; |
1569 | MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); |
1570 | max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); |
1571 | max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); |
1572 | eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); |
1573 | eff_surf_width_c = eff_surf_width_l / (1 + yuv420); |
1574 | eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); |
1575 | eff_surf_height_c = eff_surf_height_l / (1 + yuv420); |
1576 | |
1577 | full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; |
1578 | full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; |
1579 | if (BytePerPixelC > 0) { |
1580 | full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; |
1581 | full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; |
1582 | } else { |
1583 | full_swath_bytes_horz_wc_c = 0; |
1584 | full_swath_bytes_vert_wc_c = 0; |
1585 | } |
1586 | |
1587 | if (SourcePixelFormat == dm_420_10) { |
1588 | full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); |
1589 | full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); |
1590 | full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); |
1591 | full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); |
1592 | } |
1593 | |
1594 | if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { |
1595 | req128_horz_wc_l = 0; |
1596 | req128_horz_wc_c = 0; |
1597 | } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { |
1598 | req128_horz_wc_l = 0; |
1599 | req128_horz_wc_c = 1; |
1600 | } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { |
1601 | req128_horz_wc_l = 1; |
1602 | req128_horz_wc_c = 0; |
1603 | } else { |
1604 | req128_horz_wc_l = 1; |
1605 | req128_horz_wc_c = 1; |
1606 | } |
1607 | |
1608 | if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { |
1609 | req128_vert_wc_l = 0; |
1610 | req128_vert_wc_c = 0; |
1611 | } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { |
1612 | req128_vert_wc_l = 0; |
1613 | req128_vert_wc_c = 1; |
1614 | } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { |
1615 | req128_vert_wc_l = 1; |
1616 | req128_vert_wc_c = 0; |
1617 | } else { |
1618 | req128_vert_wc_l = 1; |
1619 | req128_vert_wc_c = 1; |
1620 | } |
1621 | |
1622 | if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { |
1623 | segment_order_horz_contiguous_luma = 0; |
1624 | } else { |
1625 | segment_order_horz_contiguous_luma = 1; |
1626 | } |
1627 | if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) |
1628 | || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { |
1629 | segment_order_vert_contiguous_luma = 0; |
1630 | } else { |
1631 | segment_order_vert_contiguous_luma = 1; |
1632 | } |
1633 | if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { |
1634 | segment_order_horz_contiguous_chroma = 0; |
1635 | } else { |
1636 | segment_order_horz_contiguous_chroma = 1; |
1637 | } |
1638 | if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) |
1639 | || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { |
1640 | segment_order_vert_contiguous_chroma = 0; |
1641 | } else { |
1642 | segment_order_vert_contiguous_chroma = 1; |
1643 | } |
1644 | |
1645 | if (DCCProgrammingAssumesScanDirectionUnknown == true1) { |
1646 | if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { |
1647 | RequestLuma = REQ_256Bytes; |
1648 | } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { |
1649 | RequestLuma = REQ_128BytesNonContiguous; |
1650 | } else { |
1651 | RequestLuma = REQ_128BytesContiguous; |
1652 | } |
1653 | if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { |
1654 | RequestChroma = REQ_256Bytes; |
1655 | } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { |
1656 | RequestChroma = REQ_128BytesNonContiguous; |
1657 | } else { |
1658 | RequestChroma = REQ_128BytesContiguous; |
1659 | } |
1660 | } else if (ScanOrientation != dm_vert) { |
1661 | if (req128_horz_wc_l == 0) { |
1662 | RequestLuma = REQ_256Bytes; |
1663 | } else if (segment_order_horz_contiguous_luma == 0) { |
1664 | RequestLuma = REQ_128BytesNonContiguous; |
1665 | } else { |
1666 | RequestLuma = REQ_128BytesContiguous; |
1667 | } |
1668 | if (req128_horz_wc_c == 0) { |
1669 | RequestChroma = REQ_256Bytes; |
1670 | } else if (segment_order_horz_contiguous_chroma == 0) { |
1671 | RequestChroma = REQ_128BytesNonContiguous; |
1672 | } else { |
1673 | RequestChroma = REQ_128BytesContiguous; |
1674 | } |
1675 | } else { |
1676 | if (req128_vert_wc_l == 0) { |
1677 | RequestLuma = REQ_256Bytes; |
1678 | } else if (segment_order_vert_contiguous_luma == 0) { |
1679 | RequestLuma = REQ_128BytesNonContiguous; |
1680 | } else { |
1681 | RequestLuma = REQ_128BytesContiguous; |
1682 | } |
1683 | if (req128_vert_wc_c == 0) { |
1684 | RequestChroma = REQ_256Bytes; |
1685 | } else if (segment_order_vert_contiguous_chroma == 0) { |
1686 | RequestChroma = REQ_128BytesNonContiguous; |
1687 | } else { |
1688 | RequestChroma = REQ_128BytesContiguous; |
1689 | } |
1690 | } |
1691 | |
1692 | if (RequestLuma == REQ_256Bytes) { |
1693 | *MaxUncompressedBlockLuma = 256; |
1694 | *MaxCompressedBlockLuma = 256; |
1695 | *IndependentBlockLuma = 0; |
1696 | } else if (RequestLuma == REQ_128BytesContiguous) { |
1697 | *MaxUncompressedBlockLuma = 256; |
1698 | *MaxCompressedBlockLuma = 128; |
1699 | *IndependentBlockLuma = 128; |
1700 | } else { |
1701 | *MaxUncompressedBlockLuma = 256; |
1702 | *MaxCompressedBlockLuma = 64; |
1703 | *IndependentBlockLuma = 64; |
1704 | } |
1705 | |
1706 | if (RequestChroma == REQ_256Bytes) { |
1707 | *MaxUncompressedBlockChroma = 256; |
1708 | *MaxCompressedBlockChroma = 256; |
1709 | *IndependentBlockChroma = 0; |
1710 | } else if (RequestChroma == REQ_128BytesContiguous) { |
1711 | *MaxUncompressedBlockChroma = 256; |
1712 | *MaxCompressedBlockChroma = 128; |
1713 | *IndependentBlockChroma = 128; |
1714 | } else { |
1715 | *MaxUncompressedBlockChroma = 256; |
1716 | *MaxCompressedBlockChroma = 64; |
1717 | *IndependentBlockChroma = 64; |
1718 | } |
1719 | |
1720 | if (DCCEnabled != true1 || BytePerPixelC == 0) { |
1721 | *MaxUncompressedBlockChroma = 0; |
1722 | *MaxCompressedBlockChroma = 0; |
1723 | *IndependentBlockChroma = 0; |
1724 | } |
1725 | |
1726 | if (DCCEnabled != true1) { |
1727 | *MaxUncompressedBlockLuma = 0; |
1728 | *MaxCompressedBlockLuma = 0; |
1729 | *IndependentBlockLuma = 0; |
1730 | } |
1731 | } |
1732 | |
1733 | static double CalculatePrefetchSourceLines( |
1734 | struct display_mode_lib *mode_lib, |
1735 | double VRatio, |
1736 | double vtaps, |
1737 | bool_Bool Interlace, |
1738 | bool_Bool ProgressiveToInterlaceUnitInOPP, |
1739 | unsigned int SwathHeight, |
1740 | unsigned int ViewportYStart, |
1741 | double *VInitPreFill, |
1742 | unsigned int *MaxNumSwath) |
1743 | { |
1744 | struct vba_vars_st *v = &mode_lib->vba; |
1745 | unsigned int MaxPartialSwath; |
1746 | |
1747 | if (ProgressiveToInterlaceUnitInOPP) |
1748 | *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); |
1749 | else |
1750 | *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); |
1751 | |
1752 | if (!v->IgnoreViewportPositioning) { |
1753 | |
1754 | *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; |
1755 | |
1756 | if (*VInitPreFill > 1.0) |
1757 | MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; |
1758 | else |
1759 | MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; |
1760 | MaxPartialSwath = dml_max(1U, MaxPartialSwath); |
1761 | |
1762 | } else { |
1763 | |
1764 | if (ViewportYStart != 0) |
1765 | dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"){do { } while(0); }; |
1766 | |
1767 | *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); |
1768 | |
1769 | if (*VInitPreFill > 1.0) |
1770 | MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; |
1771 | else |
1772 | MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; |
1773 | } |
1774 | |
1775 | #ifdef __DML_VBA_DEBUG__ |
1776 | dml_print("DML::%s: VRatio = %f\n", __func__, VRatio){do { } while(0); }; |
1777 | dml_print("DML::%s: vtaps = %f\n", __func__, vtaps){do { } while(0); }; |
1778 | dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill){do { } while(0); }; |
1779 | dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP){do { } while(0); }; |
1780 | dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning){do { } while(0); }; |
1781 | dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight){do { } while(0); }; |
1782 | dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath){do { } while(0); }; |
1783 | dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath){do { } while(0); }; |
1784 | dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath){do { } while(0); }; |
1785 | #endif |
1786 | return *MaxNumSwath * SwathHeight + MaxPartialSwath; |
1787 | } |
1788 | |
1789 | static unsigned int CalculateVMAndRowBytes( |
1790 | struct display_mode_lib *mode_lib, |
1791 | bool_Bool DCCEnable, |
1792 | unsigned int BlockHeight256Bytes, |
1793 | unsigned int BlockWidth256Bytes, |
1794 | enum source_format_class SourcePixelFormat, |
1795 | unsigned int SurfaceTiling, |
1796 | unsigned int BytePerPixel, |
1797 | enum scan_direction_class ScanDirection, |
1798 | unsigned int SwathWidth, |
1799 | unsigned int ViewportHeight, |
1800 | bool_Bool GPUVMEnable, |
1801 | bool_Bool HostVMEnable, |
1802 | unsigned int HostVMMaxNonCachedPageTableLevels, |
1803 | unsigned int GPUVMMinPageSize, |
1804 | unsigned int HostVMMinPageSize, |
1805 | unsigned int PTEBufferSizeInRequests, |
1806 | unsigned int Pitch, |
1807 | unsigned int DCCMetaPitch, |
1808 | unsigned int *MacroTileWidth, |
1809 | unsigned int *MetaRowByte, |
1810 | unsigned int *PixelPTEBytesPerRow, |
1811 | bool_Bool *PTEBufferSizeNotExceeded, |
1812 | int *dpte_row_width_ub, |
1813 | unsigned int *dpte_row_height, |
1814 | unsigned int *MetaRequestWidth, |
1815 | unsigned int *MetaRequestHeight, |
1816 | unsigned int *meta_row_width, |
1817 | unsigned int *meta_row_height, |
1818 | int *vm_group_bytes, |
1819 | unsigned int *dpte_group_bytes, |
1820 | unsigned int *PixelPTEReqWidth, |
1821 | unsigned int *PixelPTEReqHeight, |
1822 | unsigned int *PTERequestSize, |
1823 | int *DPDE0BytesFrame, |
1824 | int *MetaPTEBytesFrame) |
1825 | { |
1826 | struct vba_vars_st *v = &mode_lib->vba; |
1827 | unsigned int MPDEBytesFrame; |
1828 | unsigned int DCCMetaSurfaceBytes; |
1829 | unsigned int MacroTileSizeBytes; |
1830 | unsigned int MacroTileHeight; |
1831 | unsigned int ExtraDPDEBytesFrame; |
1832 | unsigned int PDEAndMetaPTEBytesFrame; |
1833 | unsigned int PixelPTEReqHeightPTEs = 0; |
1834 | unsigned int HostVMDynamicLevels = 0; |
1835 | double FractionOfPTEReturnDrop; |
1836 | |
1837 | if (GPUVMEnable == true1 && HostVMEnable == true1) { |
1838 | if (HostVMMinPageSize < 2048) { |
1839 | HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; |
1840 | } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { |
1841 | HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); |
1842 | } else { |
1843 | HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); |
1844 | } |
1845 | } |
1846 | |
1847 | *MetaRequestHeight = 8 * BlockHeight256Bytes; |
1848 | *MetaRequestWidth = 8 * BlockWidth256Bytes; |
1849 | if (ScanDirection != dm_vert) { |
1850 | *meta_row_height = *MetaRequestHeight; |
1851 | *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; |
1852 | *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; |
1853 | } else { |
1854 | *meta_row_height = *MetaRequestWidth; |
1855 | *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; |
1856 | *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; |
1857 | } |
1858 | DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; |
1859 | if (GPUVMEnable == true1) { |
1860 | *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; |
1861 | MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); |
1862 | } else { |
1863 | *MetaPTEBytesFrame = 0; |
1864 | MPDEBytesFrame = 0; |
1865 | } |
1866 | |
1867 | if (DCCEnable != true1) { |
1868 | *MetaPTEBytesFrame = 0; |
1869 | MPDEBytesFrame = 0; |
1870 | *MetaRowByte = 0; |
1871 | } |
1872 | |
1873 | if (SurfaceTiling == dm_sw_linear) { |
1874 | MacroTileSizeBytes = 256; |
1875 | MacroTileHeight = BlockHeight256Bytes; |
1876 | } else { |
1877 | MacroTileSizeBytes = 65536; |
1878 | MacroTileHeight = 16 * BlockHeight256Bytes; |
1879 | } |
1880 | *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; |
1881 | |
1882 | if (GPUVMEnable == true1 && v->GPUVMMaxPageTableLevels > 1) { |
1883 | if (ScanDirection != dm_vert) { |
1884 | *DPDE0BytesFrame = 64 |
1885 | * (dml_ceil( |
1886 | ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) |
1887 | / (8 * 2097152), |
1888 | 1) + 1); |
1889 | } else { |
1890 | *DPDE0BytesFrame = 64 |
1891 | * (dml_ceil( |
1892 | ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) |
1893 | / (8 * 2097152), |
1894 | 1) + 1); |
1895 | } |
1896 | ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); |
1897 | } else { |
1898 | *DPDE0BytesFrame = 0; |
1899 | ExtraDPDEBytesFrame = 0; |
1900 | } |
1901 | |
1902 | PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; |
1903 | |
1904 | #ifdef __DML_VBA_DEBUG__ |
1905 | dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame){do { } while(0); }; |
1906 | dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame){do { } while(0); }; |
1907 | dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame){do { } while(0); }; |
1908 | dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame){do { } while(0); }; |
1909 | dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); }; |
1910 | #endif |
1911 | |
1912 | if (HostVMEnable == true1) { |
1913 | PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); |
1914 | } |
1915 | #ifdef __DML_VBA_DEBUG__ |
1916 | dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame){do { } while(0); }; |
1917 | #endif |
1918 | |
1919 | if (SurfaceTiling == dm_sw_linear) { |
1920 | PixelPTEReqHeightPTEs = 1; |
1921 | *PixelPTEReqHeight = 1; |
1922 | *PixelPTEReqWidth = 32768.0 / BytePerPixel; |
1923 | *PTERequestSize = 64; |
1924 | FractionOfPTEReturnDrop = 0; |
1925 | } else if (MacroTileSizeBytes == 4096) { |
1926 | PixelPTEReqHeightPTEs = 1; |
1927 | *PixelPTEReqHeight = MacroTileHeight; |
1928 | *PixelPTEReqWidth = 8 * *MacroTileWidth; |
1929 | *PTERequestSize = 64; |
1930 | if (ScanDirection != dm_vert) |
1931 | FractionOfPTEReturnDrop = 0; |
1932 | else |
1933 | FractionOfPTEReturnDrop = 7 / 8; |
1934 | } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { |
1935 | PixelPTEReqHeightPTEs = 16; |
1936 | *PixelPTEReqHeight = 16 * BlockHeight256Bytes; |
1937 | *PixelPTEReqWidth = 16 * BlockWidth256Bytes; |
1938 | *PTERequestSize = 128; |
1939 | FractionOfPTEReturnDrop = 0; |
1940 | } else { |
1941 | PixelPTEReqHeightPTEs = 1; |
1942 | *PixelPTEReqHeight = MacroTileHeight; |
1943 | *PixelPTEReqWidth = 8 * *MacroTileWidth; |
1944 | *PTERequestSize = 64; |
1945 | FractionOfPTEReturnDrop = 0; |
1946 | } |
1947 | |
1948 | if (SurfaceTiling == dm_sw_linear) { |
1949 | *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); |
1950 | *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; |
1951 | *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; |
1952 | } else if (ScanDirection != dm_vert) { |
1953 | *dpte_row_height = *PixelPTEReqHeight; |
1954 | *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; |
1955 | *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; |
1956 | } else { |
1957 | *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); |
1958 | *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; |
1959 | *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; |
1960 | } |
1961 | |
1962 | if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { |
1963 | *PTEBufferSizeNotExceeded = true1; |
1964 | } else { |
1965 | *PTEBufferSizeNotExceeded = false0; |
1966 | } |
1967 | |
1968 | if (GPUVMEnable != true1) { |
1969 | *PixelPTEBytesPerRow = 0; |
1970 | *PTEBufferSizeNotExceeded = true1; |
1971 | } |
1972 | |
1973 | dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame){do { } while(0); }; |
1974 | |
1975 | if (HostVMEnable == true1) { |
1976 | *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); |
1977 | } |
1978 | |
1979 | if (HostVMEnable == true1) { |
1980 | *vm_group_bytes = 512; |
1981 | *dpte_group_bytes = 512; |
1982 | } else if (GPUVMEnable == true1) { |
1983 | *vm_group_bytes = 2048; |
1984 | if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { |
1985 | *dpte_group_bytes = 512; |
1986 | } else { |
1987 | *dpte_group_bytes = 2048; |
1988 | } |
1989 | } else { |
1990 | *vm_group_bytes = 0; |
1991 | *dpte_group_bytes = 0; |
1992 | } |
1993 | return PDEAndMetaPTEBytesFrame; |
1994 | } |
1995 | |
1996 | static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) |
1997 | { |
1998 | struct vba_vars_st *v = &mode_lib->vba; |
1999 | unsigned int j, k; |
2000 | double HostVMInefficiencyFactor = 1.0; |
2001 | bool_Bool NoChromaPlanes = true1; |
2002 | int ReorderBytes; |
2003 | double VMDataOnlyReturnBW; |
2004 | double MaxTotalRDBandwidth = 0; |
2005 | int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; |
2006 | |
2007 | v->WritebackDISPCLK = 0.0; |
2008 | v->DISPCLKWithRamping = 0; |
2009 | v->DISPCLKWithoutRamping = 0; |
2010 | v->GlobalDPPCLK = 0.0; |
2011 | /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ |
2012 | { |
2013 | double IdealFabricAndSDPPortBandwidthPerState = dml_min( |
2014 | v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], |
2015 | v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); |
2016 | double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; |
2017 | if (v->HostVMEnable != true1) { |
2018 | v->ReturnBW = dml_min( |
2019 | IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, |
2020 | IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); |
2021 | } else { |
2022 | v->ReturnBW = dml_min( |
2023 | IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, |
2024 | IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); |
2025 | } |
2026 | } |
2027 | /* End DAL custom code */ |
2028 | |
2029 | // DISPCLK and DPPCLK Calculation |
2030 | // |
2031 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2032 | if (v->WritebackEnable[k]) { |
2033 | v->WritebackDISPCLK = dml_max( |
2034 | v->WritebackDISPCLK, |
2035 | dml31_CalculateWriteBackDISPCLK( |
2036 | v->WritebackPixelFormat[k], |
2037 | v->PixelClock[k], |
2038 | v->WritebackHRatio[k], |
2039 | v->WritebackVRatio[k], |
2040 | v->WritebackHTaps[k], |
2041 | v->WritebackVTaps[k], |
2042 | v->WritebackSourceWidth[k], |
2043 | v->WritebackDestinationWidth[k], |
2044 | v->HTotal[k], |
2045 | v->WritebackLineBufferSize)); |
2046 | } |
2047 | } |
2048 | |
2049 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2050 | if (v->HRatio[k] > 1) { |
2051 | v->PSCL_THROUGHPUT_LUMA[k] = dml_min( |
2052 | v->MaxDCHUBToPSCLThroughput, |
2053 | v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); |
2054 | } else { |
2055 | v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); |
2056 | } |
2057 | |
2058 | v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] |
2059 | * dml_max( |
2060 | v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), |
2061 | dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); |
2062 | |
2063 | if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { |
2064 | v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; |
2065 | } |
2066 | |
2067 | if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 |
2068 | && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { |
2069 | v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; |
2070 | v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; |
2071 | } else { |
2072 | if (v->HRatioChroma[k] > 1) { |
2073 | v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( |
2074 | v->MaxDCHUBToPSCLThroughput, |
2075 | v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); |
2076 | } else { |
2077 | v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); |
2078 | } |
2079 | v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] |
2080 | * dml_max3( |
2081 | v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), |
2082 | v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], |
2083 | 1.0); |
2084 | |
2085 | if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { |
2086 | v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; |
2087 | } |
2088 | |
2089 | v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); |
2090 | } |
2091 | } |
2092 | |
2093 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2094 | if (v->BlendingAndTiming[k] != k) |
2095 | continue; |
2096 | if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { |
2097 | v->DISPCLKWithRamping = dml_max( |
2098 | v->DISPCLKWithRamping, |
2099 | v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) |
2100 | * (1 + v->DISPCLKRampingMargin / 100)); |
2101 | v->DISPCLKWithoutRamping = dml_max( |
2102 | v->DISPCLKWithoutRamping, |
2103 | v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); |
2104 | } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { |
2105 | v->DISPCLKWithRamping = dml_max( |
2106 | v->DISPCLKWithRamping, |
2107 | v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) |
2108 | * (1 + v->DISPCLKRampingMargin / 100)); |
2109 | v->DISPCLKWithoutRamping = dml_max( |
2110 | v->DISPCLKWithoutRamping, |
2111 | v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); |
2112 | } else { |
2113 | v->DISPCLKWithRamping = dml_max( |
2114 | v->DISPCLKWithRamping, |
2115 | v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); |
2116 | v->DISPCLKWithoutRamping = dml_max( |
2117 | v->DISPCLKWithoutRamping, |
2118 | v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); |
2119 | } |
2120 | } |
2121 | |
2122 | v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); |
2123 | v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); |
2124 | |
2125 | ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0)do { if (({ static int __warned; int __ret = !!(!(v->DISPCLKDPPCLKVCOSpeed != 0)); if (__ret && !__warned) { printf("WARNING %s failed at %s:%d\n" , "!(v->DISPCLKDPPCLKVCOSpeed != 0)", "/usr/src/sys/dev/pci/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c" , 2125); __warned = 1; } __builtin_expect(!!(__ret), 0); })) do {} while (0); } while (0); |
2126 | v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); |
2127 | v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); |
2128 | v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( |
2129 | v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, |
2130 | v->DISPCLKDPPCLKVCOSpeed); |
2131 | if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { |
2132 | v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; |
2133 | } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { |
2134 | v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; |
2135 | } else { |
2136 | v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; |
2137 | } |
2138 | v->DISPCLK = v->DISPCLK_calculated; |
2139 | DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated){do { } while(0); }; |
2140 | |
2141 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2142 | v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); |
2143 | v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); |
2144 | } |
2145 | v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); |
2146 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2147 | v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); |
2148 | DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]){do { } while(0); }; |
2149 | } |
2150 | |
2151 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2152 | v->DPPCLK[k] = v->DPPCLK_calculated[k]; |
2153 | } |
2154 | |
2155 | // Urgent and B P-State/DRAM Clock Change Watermark |
2156 | DTRACE(" dcfclk_mhz = %f", v->DCFCLK){do { } while(0); }; |
2157 | DTRACE(" return_bus_bw = %f", v->ReturnBW){do { } while(0); }; |
2158 | |
2159 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2160 | dml30_CalculateBytePerPixelAnd256BBlockSizes( |
2161 | v->SourcePixelFormat[k], |
2162 | v->SurfaceTiling[k], |
2163 | &v->BytePerPixelY[k], |
2164 | &v->BytePerPixelC[k], |
2165 | &v->BytePerPixelDETY[k], |
2166 | &v->BytePerPixelDETC[k], |
2167 | &v->BlockHeight256BytesY[k], |
2168 | &v->BlockHeight256BytesC[k], |
2169 | &v->BlockWidth256BytesY[k], |
2170 | &v->BlockWidth256BytesC[k]); |
2171 | } |
2172 | |
2173 | CalculateSwathWidth( |
2174 | false0, |
2175 | v->NumberOfActivePlanes, |
2176 | v->SourcePixelFormat, |
2177 | v->SourceScan, |
2178 | v->ViewportWidth, |
2179 | v->ViewportHeight, |
2180 | v->SurfaceWidthY, |
2181 | v->SurfaceWidthC, |
2182 | v->SurfaceHeightY, |
2183 | v->SurfaceHeightC, |
2184 | v->ODMCombineEnabled, |
2185 | v->BytePerPixelY, |
2186 | v->BytePerPixelC, |
2187 | v->BlockHeight256BytesY, |
2188 | v->BlockHeight256BytesC, |
2189 | v->BlockWidth256BytesY, |
2190 | v->BlockWidth256BytesC, |
2191 | v->BlendingAndTiming, |
2192 | v->HActive, |
2193 | v->HRatio, |
2194 | v->DPPPerPlane, |
2195 | v->SwathWidthSingleDPPY, |
2196 | v->SwathWidthSingleDPPC, |
2197 | v->SwathWidthY, |
2198 | v->SwathWidthC, |
2199 | v->dummyinteger3, |
2200 | v->dummyinteger4, |
2201 | v->swath_width_luma_ub, |
2202 | v->swath_width_chroma_ub); |
2203 | |
2204 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2205 | v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) |
2206 | * v->VRatio[k]; |
2207 | v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) |
2208 | * v->VRatioChroma[k]; |
2209 | DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]){do { } while(0); }; |
2210 | } |
2211 | |
2212 | // DCFCLK Deep Sleep |
2213 | CalculateDCFCLKDeepSleep( |
2214 | mode_lib, |
2215 | v->NumberOfActivePlanes, |
2216 | v->BytePerPixelY, |
2217 | v->BytePerPixelC, |
2218 | v->VRatio, |
2219 | v->VRatioChroma, |
2220 | v->SwathWidthY, |
2221 | v->SwathWidthC, |
2222 | v->DPPPerPlane, |
2223 | v->HRatio, |
2224 | v->HRatioChroma, |
2225 | v->PixelClock, |
2226 | v->PSCL_THROUGHPUT_LUMA, |
2227 | v->PSCL_THROUGHPUT_CHROMA, |
2228 | v->DPPCLK, |
2229 | v->ReadBandwidthPlaneLuma, |
2230 | v->ReadBandwidthPlaneChroma, |
2231 | v->ReturnBusWidth, |
2232 | &v->DCFCLKDeepSleep); |
2233 | |
2234 | // DSCCLK |
2235 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2236 | if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { |
2237 | v->DSCCLK_calculated[k] = 0.0; |
2238 | } else { |
2239 | if (v->OutputFormat[k] == dm_420) |
2240 | v->DSCFormatFactor = 2; |
2241 | else if (v->OutputFormat[k] == dm_444) |
2242 | v->DSCFormatFactor = 1; |
2243 | else if (v->OutputFormat[k] == dm_n422) |
2244 | v->DSCFormatFactor = 2; |
2245 | else |
2246 | v->DSCFormatFactor = 1; |
2247 | if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) |
2248 | v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor |
2249 | / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); |
2250 | else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) |
2251 | v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor |
2252 | / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); |
2253 | else |
2254 | v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor |
2255 | / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); |
2256 | } |
2257 | } |
2258 | |
2259 | // DSC Delay |
2260 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2261 | double BPP = v->OutputBpp[k]; |
2262 | |
2263 | if (v->DSCEnabled[k] && BPP != 0) { |
2264 | if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { |
2265 | v->DSCDelay[k] = dscceComputeDelay( |
2266 | v->DSCInputBitPerComponent[k], |
2267 | BPP, |
2268 | dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), |
2269 | v->NumberOfDSCSlices[k], |
2270 | v->OutputFormat[k], |
2271 | v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); |
2272 | } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { |
2273 | v->DSCDelay[k] = 2 |
2274 | * (dscceComputeDelay( |
2275 | v->DSCInputBitPerComponent[k], |
2276 | BPP, |
2277 | dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), |
2278 | v->NumberOfDSCSlices[k] / 2.0, |
2279 | v->OutputFormat[k], |
2280 | v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); |
2281 | } else { |
2282 | v->DSCDelay[k] = 4 |
2283 | * (dscceComputeDelay( |
2284 | v->DSCInputBitPerComponent[k], |
2285 | BPP, |
2286 | dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), |
2287 | v->NumberOfDSCSlices[k] / 4.0, |
2288 | v->OutputFormat[k], |
2289 | v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); |
2290 | } |
2291 | v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; |
2292 | } else { |
2293 | v->DSCDelay[k] = 0; |
2294 | } |
2295 | } |
2296 | |
2297 | for (k = 0; k < v->NumberOfActivePlanes; ++k) |
2298 | for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes |
2299 | if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) |
2300 | v->DSCDelay[k] = v->DSCDelay[j]; |
2301 | |
2302 | // Prefetch |
2303 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2304 | unsigned int PDEAndMetaPTEBytesFrameY; |
2305 | unsigned int PixelPTEBytesPerRowY; |
2306 | unsigned int MetaRowByteY; |
2307 | unsigned int MetaRowByteC; |
2308 | unsigned int PDEAndMetaPTEBytesFrameC; |
2309 | unsigned int PixelPTEBytesPerRowC; |
2310 | bool_Bool PTEBufferSizeNotExceededY; |
2311 | bool_Bool PTEBufferSizeNotExceededC; |
2312 | |
2313 | if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 |
2314 | || v->SourcePixelFormat[k] == dm_rgbe_alpha) { |
2315 | if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { |
2316 | v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; |
2317 | v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; |
2318 | } else { |
2319 | v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; |
2320 | v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; |
2321 | } |
2322 | |
2323 | PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( |
2324 | mode_lib, |
2325 | v->DCCEnable[k], |
2326 | v->BlockHeight256BytesC[k], |
2327 | v->BlockWidth256BytesC[k], |
2328 | v->SourcePixelFormat[k], |
2329 | v->SurfaceTiling[k], |
2330 | v->BytePerPixelC[k], |
2331 | v->SourceScan[k], |
2332 | v->SwathWidthC[k], |
2333 | v->ViewportHeightChroma[k], |
2334 | v->GPUVMEnable, |
2335 | v->HostVMEnable, |
2336 | v->HostVMMaxNonCachedPageTableLevels, |
2337 | v->GPUVMMinPageSize, |
2338 | v->HostVMMinPageSize, |
2339 | v->PTEBufferSizeInRequestsForChroma, |
2340 | v->PitchC[k], |
2341 | v->DCCMetaPitchC[k], |
2342 | &v->MacroTileWidthC[k], |
2343 | &MetaRowByteC, |
2344 | &PixelPTEBytesPerRowC, |
2345 | &PTEBufferSizeNotExceededC, |
2346 | &v->dpte_row_width_chroma_ub[k], |
2347 | &v->dpte_row_height_chroma[k], |
2348 | &v->meta_req_width_chroma[k], |
2349 | &v->meta_req_height_chroma[k], |
2350 | &v->meta_row_width_chroma[k], |
2351 | &v->meta_row_height_chroma[k], |
2352 | &v->dummyinteger1, |
2353 | &v->dummyinteger2, |
2354 | &v->PixelPTEReqWidthC[k], |
2355 | &v->PixelPTEReqHeightC[k], |
2356 | &v->PTERequestSizeC[k], |
2357 | &v->dpde0_bytes_per_frame_ub_c[k], |
2358 | &v->meta_pte_bytes_per_frame_ub_c[k]); |
2359 | |
2360 | v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( |
2361 | mode_lib, |
2362 | v->VRatioChroma[k], |
2363 | v->VTAPsChroma[k], |
2364 | v->Interlace[k], |
2365 | v->ProgressiveToInterlaceUnitInOPP, |
2366 | v->SwathHeightC[k], |
2367 | v->ViewportYStartC[k], |
2368 | &v->VInitPreFillC[k], |
2369 | &v->MaxNumSwathC[k]); |
2370 | } else { |
2371 | v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; |
2372 | v->PTEBufferSizeInRequestsForChroma = 0; |
2373 | PixelPTEBytesPerRowC = 0; |
2374 | PDEAndMetaPTEBytesFrameC = 0; |
2375 | MetaRowByteC = 0; |
2376 | v->MaxNumSwathC[k] = 0; |
2377 | v->PrefetchSourceLinesC[k] = 0; |
2378 | } |
2379 | |
2380 | PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( |
2381 | mode_lib, |
2382 | v->DCCEnable[k], |
2383 | v->BlockHeight256BytesY[k], |
2384 | v->BlockWidth256BytesY[k], |
2385 | v->SourcePixelFormat[k], |
2386 | v->SurfaceTiling[k], |
2387 | v->BytePerPixelY[k], |
2388 | v->SourceScan[k], |
2389 | v->SwathWidthY[k], |
2390 | v->ViewportHeight[k], |
2391 | v->GPUVMEnable, |
2392 | v->HostVMEnable, |
2393 | v->HostVMMaxNonCachedPageTableLevels, |
2394 | v->GPUVMMinPageSize, |
2395 | v->HostVMMinPageSize, |
2396 | v->PTEBufferSizeInRequestsForLuma, |
2397 | v->PitchY[k], |
2398 | v->DCCMetaPitchY[k], |
2399 | &v->MacroTileWidthY[k], |
2400 | &MetaRowByteY, |
2401 | &PixelPTEBytesPerRowY, |
2402 | &PTEBufferSizeNotExceededY, |
2403 | &v->dpte_row_width_luma_ub[k], |
2404 | &v->dpte_row_height[k], |
2405 | &v->meta_req_width[k], |
2406 | &v->meta_req_height[k], |
2407 | &v->meta_row_width[k], |
2408 | &v->meta_row_height[k], |
2409 | &v->vm_group_bytes[k], |
2410 | &v->dpte_group_bytes[k], |
2411 | &v->PixelPTEReqWidthY[k], |
2412 | &v->PixelPTEReqHeightY[k], |
2413 | &v->PTERequestSizeY[k], |
2414 | &v->dpde0_bytes_per_frame_ub_l[k], |
2415 | &v->meta_pte_bytes_per_frame_ub_l[k]); |
2416 | |
2417 | v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( |
2418 | mode_lib, |
2419 | v->VRatio[k], |
2420 | v->vtaps[k], |
2421 | v->Interlace[k], |
2422 | v->ProgressiveToInterlaceUnitInOPP, |
2423 | v->SwathHeightY[k], |
2424 | v->ViewportYStartY[k], |
2425 | &v->VInitPreFillY[k], |
2426 | &v->MaxNumSwathY[k]); |
2427 | v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; |
2428 | v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; |
2429 | v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; |
2430 | |
2431 | CalculateRowBandwidth( |
2432 | v->GPUVMEnable, |
2433 | v->SourcePixelFormat[k], |
2434 | v->VRatio[k], |
2435 | v->VRatioChroma[k], |
2436 | v->DCCEnable[k], |
2437 | v->HTotal[k] / v->PixelClock[k], |
2438 | MetaRowByteY, |
2439 | MetaRowByteC, |
2440 | v->meta_row_height[k], |
2441 | v->meta_row_height_chroma[k], |
2442 | PixelPTEBytesPerRowY, |
2443 | PixelPTEBytesPerRowC, |
2444 | v->dpte_row_height[k], |
2445 | v->dpte_row_height_chroma[k], |
2446 | &v->meta_row_bw[k], |
2447 | &v->dpte_row_bw[k]); |
2448 | } |
2449 | |
2450 | v->TotalDCCActiveDPP = 0; |
2451 | v->TotalActiveDPP = 0; |
2452 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2453 | v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; |
2454 | if (v->DCCEnable[k]) |
2455 | v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; |
2456 | if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 |
2457 | || v->SourcePixelFormat[k] == dm_rgbe_alpha) |
2458 | NoChromaPlanes = false0; |
2459 | } |
2460 | |
2461 | ReorderBytes = v->NumberOfChannels |
2462 | * dml_max3( |
2463 | v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, |
2464 | v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, |
2465 | v->UrgentOutOfOrderReturnPerChannelVMDataOnly); |
2466 | |
2467 | VMDataOnlyReturnBW = dml_min( |
2468 | dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) |
2469 | * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, |
2470 | v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth |
2471 | * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); |
2472 | |
2473 | #ifdef __DML_VBA_DEBUG__ |
2474 | dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth){do { } while(0); }; |
2475 | dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK){do { } while(0); }; |
2476 | dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock){do { } while(0); }; |
2477 | dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn){do { } while(0); }; |
2478 | dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency){do { } while(0); }; |
2479 | dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed){do { } while(0); }; |
2480 | dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels){do { } while(0); }; |
2481 | dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth){do { } while(0); }; |
2482 | dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly){do { } while(0); }; |
2483 | dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW){do { } while(0); }; |
2484 | dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW){do { } while(0); }; |
2485 | #endif |
2486 | |
2487 | if (v->GPUVMEnable && v->HostVMEnable) |
2488 | HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; |
2489 | |
2490 | v->UrgentExtraLatency = CalculateExtraLatency( |
2491 | v->RoundTripPingLatencyCycles, |
2492 | ReorderBytes, |
2493 | v->DCFCLK, |
2494 | v->TotalActiveDPP, |
2495 | v->PixelChunkSizeInKByte, |
2496 | v->TotalDCCActiveDPP, |
2497 | v->MetaChunkSize, |
2498 | v->ReturnBW, |
2499 | v->GPUVMEnable, |
2500 | v->HostVMEnable, |
2501 | v->NumberOfActivePlanes, |
2502 | v->DPPPerPlane, |
2503 | v->dpte_group_bytes, |
2504 | HostVMInefficiencyFactor, |
2505 | v->HostVMMinPageSize, |
2506 | v->HostVMMaxNonCachedPageTableLevels); |
2507 | |
2508 | v->TCalc = 24.0 / v->DCFCLKDeepSleep; |
2509 | |
2510 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2511 | if (v->BlendingAndTiming[k] == k) { |
2512 | if (v->WritebackEnable[k] == true1) { |
2513 | v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency |
2514 | + CalculateWriteBackDelay( |
2515 | v->WritebackPixelFormat[k], |
2516 | v->WritebackHRatio[k], |
2517 | v->WritebackVRatio[k], |
2518 | v->WritebackVTaps[k], |
2519 | v->WritebackDestinationWidth[k], |
2520 | v->WritebackDestinationHeight[k], |
2521 | v->WritebackSourceHeight[k], |
2522 | v->HTotal[k]) / v->DISPCLK; |
2523 | } else |
2524 | v->WritebackDelay[v->VoltageLevel][k] = 0; |
2525 | for (j = 0; j < v->NumberOfActivePlanes; ++j) { |
2526 | if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true1) { |
2527 | v->WritebackDelay[v->VoltageLevel][k] = dml_max( |
2528 | v->WritebackDelay[v->VoltageLevel][k], |
2529 | v->WritebackLatency |
2530 | + CalculateWriteBackDelay( |
2531 | v->WritebackPixelFormat[j], |
2532 | v->WritebackHRatio[j], |
2533 | v->WritebackVRatio[j], |
2534 | v->WritebackVTaps[j], |
2535 | v->WritebackDestinationWidth[j], |
2536 | v->WritebackDestinationHeight[j], |
2537 | v->WritebackSourceHeight[j], |
2538 | v->HTotal[k]) / v->DISPCLK); |
2539 | } |
2540 | } |
2541 | } |
2542 | } |
2543 | |
2544 | for (k = 0; k < v->NumberOfActivePlanes; ++k) |
2545 | for (j = 0; j < v->NumberOfActivePlanes; ++j) |
2546 | if (v->BlendingAndTiming[k] == j) |
2547 | v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; |
2548 | |
2549 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2550 | v->MaxVStartupLines[k] = |
2551 | (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? |
2552 | dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : |
2553 | v->VTotal[k] - v->VActive[k] |
2554 | - dml_max( |
2555 | 1.0, |
2556 | dml_ceil( |
2557 | (double) v->WritebackDelay[v->VoltageLevel][k] |
2558 | / (v->HTotal[k] / v->PixelClock[k]), |
2559 | 1)); |
2560 | if (v->MaxVStartupLines[k] > 1023) |
2561 | v->MaxVStartupLines[k] = 1023; |
2562 | |
2563 | #ifdef __DML_VBA_DEBUG__ |
2564 | dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]){do { } while(0); }; |
2565 | dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel){do { } while(0); }; |
2566 | dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]){do { } while(0); }; |
2567 | #endif |
2568 | } |
2569 | |
2570 | v->MaximumMaxVStartupLines = 0; |
2571 | for (k = 0; k < v->NumberOfActivePlanes; ++k) |
2572 | v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); |
2573 | |
2574 | // VBA_DELTA |
2575 | // We don't really care to iterate between the various prefetch modes |
2576 | //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); |
2577 | |
2578 | v->UrgentLatency = CalculateUrgentLatency( |
2579 | v->UrgentLatencyPixelDataOnly, |
2580 | v->UrgentLatencyPixelMixedWithVMData, |
2581 | v->UrgentLatencyVMDataOnly, |
2582 | v->DoUrgentLatencyAdjustment, |
2583 | v->UrgentLatencyAdjustmentFabricClockComponent, |
2584 | v->UrgentLatencyAdjustmentFabricClockReference, |
2585 | v->FabricClock); |
2586 | |
2587 | v->FractionOfUrgentBandwidth = 0.0; |
2588 | v->FractionOfUrgentBandwidthImmediateFlip = 0.0; |
2589 | |
2590 | v->VStartupLines = __DML_VBA_MIN_VSTARTUP__9; |
2591 | |
2592 | do { |
2593 | double MaxTotalRDBandwidthNoUrgentBurst = 0.0; |
2594 | bool_Bool DestinationLineTimesForPrefetchLessThan2 = false0; |
2595 | bool_Bool VRatioPrefetchMoreThan4 = false0; |
2596 | double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); |
2597 | MaxTotalRDBandwidth = 0; |
2598 | |
2599 | dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines){do { } while(0); }; |
2600 | |
2601 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2602 | Pipe myPipe; |
2603 | |
2604 | myPipe.DPPCLK = v->DPPCLK[k]; |
2605 | myPipe.DISPCLK = v->DISPCLK; |
2606 | myPipe.PixelClock = v->PixelClock[k]; |
2607 | myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; |
2608 | myPipe.DPPPerPlane = v->DPPPerPlane[k]; |
2609 | myPipe.ScalerEnabled = v->ScalerEnabled[k]; |
2610 | myPipe.VRatio = v->VRatio[k]; |
2611 | myPipe.VRatioChroma = v->VRatioChroma[k]; |
2612 | myPipe.SourceScan = v->SourceScan[k]; |
2613 | myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; |
2614 | myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; |
2615 | myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; |
2616 | myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; |
2617 | myPipe.InterlaceEnable = v->Interlace[k]; |
2618 | myPipe.NumberOfCursors = v->NumberOfCursors[k]; |
2619 | myPipe.VBlank = v->VTotal[k] - v->VActive[k]; |
2620 | myPipe.HTotal = v->HTotal[k]; |
2621 | myPipe.DCCEnable = v->DCCEnable[k]; |
2622 | myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 |
2623 | || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; |
2624 | myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; |
2625 | myPipe.BytePerPixelY = v->BytePerPixelY[k]; |
2626 | myPipe.BytePerPixelC = v->BytePerPixelC[k]; |
2627 | myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; |
2628 | v->ErrorResult[k] = CalculatePrefetchSchedule( |
2629 | mode_lib, |
2630 | HostVMInefficiencyFactor, |
2631 | &myPipe, |
2632 | v->DSCDelay[k], |
2633 | v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, |
2634 | v->DPPCLKDelaySCL, |
2635 | v->DPPCLKDelaySCLLBOnly, |
2636 | v->DPPCLKDelayCNVCCursor, |
2637 | v->DISPCLKDelaySubtotal, |
2638 | (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), |
2639 | v->OutputFormat[k], |
2640 | v->MaxInterDCNTileRepeaters, |
2641 | dml_min(v->VStartupLines, v->MaxVStartupLines[k]), |
2642 | v->MaxVStartupLines[k], |
2643 | v->GPUVMMaxPageTableLevels, |
2644 | v->GPUVMEnable, |
2645 | v->HostVMEnable, |
2646 | v->HostVMMaxNonCachedPageTableLevels, |
2647 | v->HostVMMinPageSize, |
2648 | v->DynamicMetadataEnable[k], |
2649 | v->DynamicMetadataVMEnabled, |
2650 | v->DynamicMetadataLinesBeforeActiveRequired[k], |
2651 | v->DynamicMetadataTransmittedBytes[k], |
2652 | v->UrgentLatency, |
2653 | v->UrgentExtraLatency, |
2654 | v->TCalc, |
2655 | v->PDEAndMetaPTEBytesFrame[k], |
2656 | v->MetaRowByte[k], |
2657 | v->PixelPTEBytesPerRow[k], |
2658 | v->PrefetchSourceLinesY[k], |
2659 | v->SwathWidthY[k], |
2660 | v->VInitPreFillY[k], |
2661 | v->MaxNumSwathY[k], |
2662 | v->PrefetchSourceLinesC[k], |
2663 | v->SwathWidthC[k], |
2664 | v->VInitPreFillC[k], |
2665 | v->MaxNumSwathC[k], |
2666 | v->swath_width_luma_ub[k], |
2667 | v->swath_width_chroma_ub[k], |
2668 | v->SwathHeightY[k], |
2669 | v->SwathHeightC[k], |
2670 | TWait, |
2671 | &v->DSTXAfterScaler[k], |
2672 | &v->DSTYAfterScaler[k], |
2673 | &v->DestinationLinesForPrefetch[k], |
2674 | &v->PrefetchBandwidth[k], |
2675 | &v->DestinationLinesToRequestVMInVBlank[k], |
2676 | &v->DestinationLinesToRequestRowInVBlank[k], |
2677 | &v->VRatioPrefetchY[k], |
2678 | &v->VRatioPrefetchC[k], |
2679 | &v->RequiredPrefetchPixDataBWLuma[k], |
2680 | &v->RequiredPrefetchPixDataBWChroma[k], |
2681 | &v->NotEnoughTimeForDynamicMetadata[k], |
2682 | &v->Tno_bw[k], |
2683 | &v->prefetch_vmrow_bw[k], |
2684 | &v->Tdmdl_vm[k], |
2685 | &v->Tdmdl[k], |
2686 | &v->TSetup[k], |
2687 | &v->VUpdateOffsetPix[k], |
2688 | &v->VUpdateWidthPix[k], |
2689 | &v->VReadyOffsetPix[k]); |
2690 | |
2691 | #ifdef __DML_VBA_DEBUG__ |
2692 | dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]){do { } while(0); }; |
2693 | #endif |
2694 | v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); |
2695 | } |
2696 | |
2697 | v->NoEnoughUrgentLatencyHiding = false0; |
2698 | v->NoEnoughUrgentLatencyHidingPre = false0; |
2699 | |
2700 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2701 | v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 |
2702 | / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; |
2703 | v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 |
2704 | / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; |
2705 | |
2706 | CalculateUrgentBurstFactor( |
2707 | v->swath_width_luma_ub[k], |
2708 | v->swath_width_chroma_ub[k], |
2709 | v->SwathHeightY[k], |
2710 | v->SwathHeightC[k], |
2711 | v->HTotal[k] / v->PixelClock[k], |
2712 | v->UrgentLatency, |
2713 | v->CursorBufferSize, |
2714 | v->CursorWidth[k][0], |
2715 | v->CursorBPP[k][0], |
2716 | v->VRatio[k], |
2717 | v->VRatioChroma[k], |
2718 | v->BytePerPixelDETY[k], |
2719 | v->BytePerPixelDETC[k], |
2720 | v->DETBufferSizeY[k], |
2721 | v->DETBufferSizeC[k], |
2722 | &v->UrgBurstFactorCursor[k], |
2723 | &v->UrgBurstFactorLuma[k], |
2724 | &v->UrgBurstFactorChroma[k], |
2725 | &v->NoUrgentLatencyHiding[k]); |
2726 | |
2727 | CalculateUrgentBurstFactor( |
2728 | v->swath_width_luma_ub[k], |
2729 | v->swath_width_chroma_ub[k], |
2730 | v->SwathHeightY[k], |
2731 | v->SwathHeightC[k], |
2732 | v->HTotal[k] / v->PixelClock[k], |
2733 | v->UrgentLatency, |
2734 | v->CursorBufferSize, |
2735 | v->CursorWidth[k][0], |
2736 | v->CursorBPP[k][0], |
2737 | v->VRatioPrefetchY[k], |
2738 | v->VRatioPrefetchC[k], |
2739 | v->BytePerPixelDETY[k], |
2740 | v->BytePerPixelDETC[k], |
2741 | v->DETBufferSizeY[k], |
2742 | v->DETBufferSizeC[k], |
2743 | &v->UrgBurstFactorCursorPre[k], |
2744 | &v->UrgBurstFactorLumaPre[k], |
2745 | &v->UrgBurstFactorChromaPre[k], |
2746 | &v->NoUrgentLatencyHidingPre[k]); |
2747 | |
2748 | MaxTotalRDBandwidth = MaxTotalRDBandwidth |
2749 | + dml_max3( |
2750 | v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], |
2751 | v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] |
2752 | + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] |
2753 | + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] |
2754 | + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), |
2755 | v->DPPPerPlane[k] |
2756 | * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] |
2757 | + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) |
2758 | + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); |
2759 | |
2760 | MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst |
2761 | + dml_max3( |
2762 | v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], |
2763 | v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] |
2764 | + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), |
2765 | v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) |
2766 | + v->cursor_bw_pre[k]); |
2767 | |
2768 | #ifdef __DML_VBA_DEBUG__ |
2769 | dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]){do { } while(0); }; |
2770 | dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]){do { } while(0); }; |
2771 | dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]){do { } while(0); }; |
2772 | dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]){do { } while(0); }; |
2773 | dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]){do { } while(0); }; |
2774 | |
2775 | dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]){do { } while(0); }; |
2776 | dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]){do { } while(0); }; |
2777 | |
2778 | dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]){do { } while(0); }; |
2779 | dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]){do { } while(0); }; |
2780 | dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]){do { } while(0); }; |
2781 | dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]){do { } while(0); }; |
2782 | dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]){do { } while(0); }; |
2783 | dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]){do { } while(0); }; |
2784 | dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]){do { } while(0); }; |
2785 | dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]){do { } while(0); }; |
2786 | dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]){do { } while(0); }; |
2787 | dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst){do { } while(0); }; |
2788 | #endif |
2789 | |
2790 | if (v->DestinationLinesForPrefetch[k] < 2) |
2791 | DestinationLineTimesForPrefetchLessThan2 = true1; |
2792 | |
2793 | if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) |
2794 | VRatioPrefetchMoreThan4 = true1; |
2795 | |
2796 | if (v->NoUrgentLatencyHiding[k] == true1) |
2797 | v->NoEnoughUrgentLatencyHiding = true1; |
2798 | |
2799 | if (v->NoUrgentLatencyHidingPre[k] == true1) |
2800 | v->NoEnoughUrgentLatencyHidingPre = true1; |
2801 | } |
2802 | |
2803 | v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; |
2804 | |
2805 | #ifdef __DML_VBA_DEBUG__ |
2806 | dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst){do { } while(0); }; |
2807 | dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW){do { } while(0); }; |
2808 | dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth){do { } while(0); }; |
2809 | #endif |
2810 | |
2811 | if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 |
2812 | && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) |
2813 | v->PrefetchModeSupported = true1; |
2814 | else { |
2815 | v->PrefetchModeSupported = false0; |
2816 | dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__){do { } while(0); }; |
2817 | dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW){do { } while(0); }; |
2818 | dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"){do { } while(0); }; |
2819 | dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"){do { } while(0); }; |
2820 | } |
2821 | |
2822 | // PREVIOUS_ERROR |
2823 | // This error result check was done after the PrefetchModeSupported. So we will |
2824 | // still try to calculate flip schedule even prefetch mode not supported |
2825 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2826 | if (v->ErrorResult[k] == true1 || v->NotEnoughTimeForDynamicMetadata[k] == true1) { |
2827 | v->PrefetchModeSupported = false0; |
2828 | dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__){do { } while(0); }; |
2829 | } |
2830 | } |
2831 | |
2832 | if (v->PrefetchModeSupported == true1 && v->ImmediateFlipSupport == true1) { |
2833 | v->BandwidthAvailableForImmediateFlip = v->ReturnBW; |
2834 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2835 | v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip |
2836 | - dml_max( |
2837 | v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] |
2838 | + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] |
2839 | + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], |
2840 | v->DPPPerPlane[k] |
2841 | * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] |
2842 | + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) |
2843 | + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); |
2844 | } |
2845 | |
2846 | v->TotImmediateFlipBytes = 0; |
2847 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2848 | v->TotImmediateFlipBytes = v->TotImmediateFlipBytes |
2849 | + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); |
2850 | } |
2851 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2852 | CalculateFlipSchedule( |
2853 | mode_lib, |
2854 | k, |
2855 | HostVMInefficiencyFactor, |
2856 | v->UrgentExtraLatency, |
2857 | v->UrgentLatency, |
2858 | v->PDEAndMetaPTEBytesFrame[k], |
2859 | v->MetaRowByte[k], |
2860 | v->PixelPTEBytesPerRow[k]); |
2861 | } |
2862 | |
2863 | v->total_dcn_read_bw_with_flip = 0.0; |
2864 | v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; |
2865 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2866 | v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip |
2867 | + dml_max3( |
2868 | v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], |
2869 | v->DPPPerPlane[k] * v->final_flip_bw[k] |
2870 | + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] |
2871 | + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] |
2872 | + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], |
2873 | v->DPPPerPlane[k] |
2874 | * (v->final_flip_bw[k] |
2875 | + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] |
2876 | + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) |
2877 | + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); |
2878 | v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst |
2879 | + dml_max3( |
2880 | v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], |
2881 | v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] |
2882 | + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], |
2883 | v->DPPPerPlane[k] |
2884 | * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] |
2885 | + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); |
2886 | } |
2887 | v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; |
2888 | |
2889 | v->ImmediateFlipSupported = true1; |
2890 | if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { |
2891 | #ifdef __DML_VBA_DEBUG__ |
2892 | dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip){do { } while(0); }; |
2893 | #endif |
2894 | v->ImmediateFlipSupported = false0; |
2895 | v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; |
2896 | } |
2897 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2898 | if (v->ImmediateFlipSupportedForPipe[k] == false0) { |
2899 | #ifdef __DML_VBA_DEBUG__ |
2900 | dml_print("DML::%s: Pipe %0d not supporting iflip\n",{do { } while(0); } |
2901 | __func__, k){do { } while(0); }; |
2902 | #endif |
2903 | v->ImmediateFlipSupported = false0; |
2904 | } |
2905 | } |
2906 | } else { |
2907 | v->ImmediateFlipSupported = false0; |
2908 | } |
2909 | |
2910 | v->PrefetchAndImmediateFlipSupported = |
2911 | (v->PrefetchModeSupported == true1 && ((!v->ImmediateFlipSupport && !v->HostVMEnable |
2912 | && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || |
2913 | v->ImmediateFlipSupported)) ? true1 : false0; |
2914 | #ifdef __DML_VBA_DEBUG__ |
2915 | dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported){do { } while(0); }; |
2916 | dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required){do { } while(0); }; |
2917 | dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported){do { } while(0); }; |
2918 | dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport){do { } while(0); }; |
2919 | dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable){do { } while(0); }; |
2920 | dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported){do { } while(0); }; |
2921 | #endif |
2922 | dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines){do { } while(0); }; |
2923 | |
2924 | v->VStartupLines = v->VStartupLines + 1; |
2925 | } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); |
2926 | ASSERT(v->PrefetchAndImmediateFlipSupported)do { if (({ static int __warned; int __ret = !!(!(v->PrefetchAndImmediateFlipSupported )); if (__ret && !__warned) { printf("WARNING %s failed at %s:%d\n" , "!(v->PrefetchAndImmediateFlipSupported)", "/usr/src/sys/dev/pci/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c" , 2926); __warned = 1; } __builtin_expect(!!(__ret), 0); })) do {} while (0); } while (0); |
2927 | |
2928 | // Unbounded Request Enabled |
2929 | CalculateUnboundedRequestAndCompressedBufferSize( |
2930 | v->DETBufferSizeInKByte[0], |
2931 | v->ConfigReturnBufferSizeInKByte, |
2932 | v->UseUnboundedRequesting, |
2933 | v->TotalActiveDPP, |
2934 | NoChromaPlanes, |
2935 | v->MaxNumDPP, |
2936 | v->CompressedBufferSegmentSizeInkByte, |
2937 | v->Output, |
2938 | &v->UnboundedRequestEnabled, |
2939 | &v->CompressedBufferSizeInkByte); |
2940 | |
2941 | //Watermarks and NB P-State/DRAM Clock Change Support |
2942 | { |
2943 | enum clock_change_support DRAMClockChangeSupport; // dummy |
2944 | CalculateWatermarksAndDRAMSpeedChangeSupport( |
2945 | mode_lib, |
2946 | PrefetchMode, |
2947 | v->DCFCLK, |
2948 | v->ReturnBW, |
2949 | v->UrgentLatency, |
2950 | v->UrgentExtraLatency, |
2951 | v->SOCCLK, |
2952 | v->DCFCLKDeepSleep, |
2953 | v->DETBufferSizeY, |
2954 | v->DETBufferSizeC, |
2955 | v->SwathHeightY, |
2956 | v->SwathHeightC, |
2957 | v->SwathWidthY, |
2958 | v->SwathWidthC, |
2959 | v->DPPPerPlane, |
2960 | v->BytePerPixelDETY, |
2961 | v->BytePerPixelDETC, |
2962 | v->UnboundedRequestEnabled, |
2963 | v->CompressedBufferSizeInkByte, |
2964 | &DRAMClockChangeSupport, |
2965 | &v->StutterExitWatermark, |
2966 | &v->StutterEnterPlusExitWatermark, |
2967 | &v->Z8StutterExitWatermark, |
2968 | &v->Z8StutterEnterPlusExitWatermark); |
2969 | |
2970 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
2971 | if (v->WritebackEnable[k] == true1) { |
2972 | v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( |
2973 | 0, |
2974 | v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); |
2975 | } else { |
2976 | v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; |
2977 | } |
2978 | } |
2979 | } |
2980 | |
2981 | //Display Pipeline Delivery Time in Prefetch, Groups |
2982 | CalculatePixelDeliveryTimes( |
2983 | v->NumberOfActivePlanes, |
2984 | v->VRatio, |
2985 | v->VRatioChroma, |
2986 | v->VRatioPrefetchY, |
2987 | v->VRatioPrefetchC, |
2988 | v->swath_width_luma_ub, |
2989 | v->swath_width_chroma_ub, |
2990 | v->DPPPerPlane, |
2991 | v->HRatio, |
2992 | v->HRatioChroma, |
2993 | v->PixelClock, |
2994 | v->PSCL_THROUGHPUT_LUMA, |
2995 | v->PSCL_THROUGHPUT_CHROMA, |
2996 | v->DPPCLK, |
2997 | v->BytePerPixelC, |
2998 | v->SourceScan, |
2999 | v->NumberOfCursors, |
3000 | v->CursorWidth, |
3001 | v->CursorBPP, |
3002 | v->BlockWidth256BytesY, |
3003 | v->BlockHeight256BytesY, |
3004 | v->BlockWidth256BytesC, |
3005 | v->BlockHeight256BytesC, |
3006 | v->DisplayPipeLineDeliveryTimeLuma, |
3007 | v->DisplayPipeLineDeliveryTimeChroma, |
3008 | v->DisplayPipeLineDeliveryTimeLumaPrefetch, |
3009 | v->DisplayPipeLineDeliveryTimeChromaPrefetch, |
3010 | v->DisplayPipeRequestDeliveryTimeLuma, |
3011 | v->DisplayPipeRequestDeliveryTimeChroma, |
3012 | v->DisplayPipeRequestDeliveryTimeLumaPrefetch, |
3013 | v->DisplayPipeRequestDeliveryTimeChromaPrefetch, |
3014 | v->CursorRequestDeliveryTime, |
3015 | v->CursorRequestDeliveryTimePrefetch); |
3016 | |
3017 | CalculateMetaAndPTETimes( |
3018 | v->NumberOfActivePlanes, |
3019 | v->GPUVMEnable, |
3020 | v->MetaChunkSize, |
3021 | v->MinMetaChunkSizeBytes, |
3022 | v->HTotal, |
3023 | v->VRatio, |
3024 | v->VRatioChroma, |
3025 | v->DestinationLinesToRequestRowInVBlank, |
3026 | v->DestinationLinesToRequestRowInImmediateFlip, |
3027 | v->DCCEnable, |
3028 | v->PixelClock, |
3029 | v->BytePerPixelY, |
3030 | v->BytePerPixelC, |
3031 | v->SourceScan, |
3032 | v->dpte_row_height, |
3033 | v->dpte_row_height_chroma, |
3034 | v->meta_row_width, |
3035 | v->meta_row_width_chroma, |
3036 | v->meta_row_height, |
3037 | v->meta_row_height_chroma, |
3038 | v->meta_req_width, |
3039 | v->meta_req_width_chroma, |
3040 | v->meta_req_height, |
3041 | v->meta_req_height_chroma, |
3042 | v->dpte_group_bytes, |
3043 | v->PTERequestSizeY, |
3044 | v->PTERequestSizeC, |
3045 | v->PixelPTEReqWidthY, |
3046 | v->PixelPTEReqHeightY, |
3047 | v->PixelPTEReqWidthC, |
3048 | v->PixelPTEReqHeightC, |
3049 | v->dpte_row_width_luma_ub, |
3050 | v->dpte_row_width_chroma_ub, |
3051 | v->DST_Y_PER_PTE_ROW_NOM_L, |
3052 | v->DST_Y_PER_PTE_ROW_NOM_C, |
3053 | v->DST_Y_PER_META_ROW_NOM_L, |
3054 | v->DST_Y_PER_META_ROW_NOM_C, |
3055 | v->TimePerMetaChunkNominal, |
3056 | v->TimePerChromaMetaChunkNominal, |
3057 | v->TimePerMetaChunkVBlank, |
3058 | v->TimePerChromaMetaChunkVBlank, |
3059 | v->TimePerMetaChunkFlip, |
3060 | v->TimePerChromaMetaChunkFlip, |
3061 | v->time_per_pte_group_nom_luma, |
3062 | v->time_per_pte_group_vblank_luma, |
3063 | v->time_per_pte_group_flip_luma, |
3064 | v->time_per_pte_group_nom_chroma, |
3065 | v->time_per_pte_group_vblank_chroma, |
3066 | v->time_per_pte_group_flip_chroma); |
3067 | |
3068 | CalculateVMGroupAndRequestTimes( |
3069 | v->NumberOfActivePlanes, |
3070 | v->GPUVMEnable, |
3071 | v->GPUVMMaxPageTableLevels, |
3072 | v->HTotal, |
3073 | v->BytePerPixelC, |
3074 | v->DestinationLinesToRequestVMInVBlank, |
3075 | v->DestinationLinesToRequestVMInImmediateFlip, |
3076 | v->DCCEnable, |
3077 | v->PixelClock, |
3078 | v->dpte_row_width_luma_ub, |
3079 | v->dpte_row_width_chroma_ub, |
3080 | v->vm_group_bytes, |
3081 | v->dpde0_bytes_per_frame_ub_l, |
3082 | v->dpde0_bytes_per_frame_ub_c, |
3083 | v->meta_pte_bytes_per_frame_ub_l, |
3084 | v->meta_pte_bytes_per_frame_ub_c, |
3085 | v->TimePerVMGroupVBlank, |
3086 | v->TimePerVMGroupFlip, |
3087 | v->TimePerVMRequestVBlank, |
3088 | v->TimePerVMRequestFlip); |
3089 | |
3090 | // Min TTUVBlank |
3091 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
3092 | if (PrefetchMode == 0) { |
3093 | v->AllowDRAMClockChangeDuringVBlank[k] = true1; |
3094 | v->AllowDRAMSelfRefreshDuringVBlank[k] = true1; |
3095 | v->MinTTUVBlank[k] = dml_max( |
3096 | v->DRAMClockChangeWatermark, |
3097 | dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); |
3098 | } else if (PrefetchMode == 1) { |
3099 | v->AllowDRAMClockChangeDuringVBlank[k] = false0; |
3100 | v->AllowDRAMSelfRefreshDuringVBlank[k] = true1; |
3101 | v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); |
3102 | } else { |
3103 | v->AllowDRAMClockChangeDuringVBlank[k] = false0; |
3104 | v->AllowDRAMSelfRefreshDuringVBlank[k] = false0; |
3105 | v->MinTTUVBlank[k] = v->UrgentWatermark; |
3106 | } |
3107 | if (!v->DynamicMetadataEnable[k]) |
3108 | v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; |
3109 | } |
3110 | |
3111 | // DCC Configuration |
3112 | v->ActiveDPPs = 0; |
3113 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
3114 | CalculateDCCConfiguration(v->DCCEnable[k], false0, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, |
3115 | v->SourcePixelFormat[k], |
3116 | v->SurfaceWidthY[k], |
3117 | v->SurfaceWidthC[k], |
3118 | v->SurfaceHeightY[k], |
3119 | v->SurfaceHeightC[k], |
3120 | v->DETBufferSizeInKByte[k] * 1024, |
3121 | v->BlockHeight256BytesY[k], |
3122 | v->BlockHeight256BytesC[k], |
3123 | v->SurfaceTiling[k], |
3124 | v->BytePerPixelY[k], |
3125 | v->BytePerPixelC[k], |
3126 | v->BytePerPixelDETY[k], |
3127 | v->BytePerPixelDETC[k], |
3128 | v->SourceScan[k], |
3129 | &v->DCCYMaxUncompressedBlock[k], |
3130 | &v->DCCCMaxUncompressedBlock[k], |
3131 | &v->DCCYMaxCompressedBlock[k], |
3132 | &v->DCCCMaxCompressedBlock[k], |
3133 | &v->DCCYIndependentBlock[k], |
3134 | &v->DCCCIndependentBlock[k]); |
3135 | } |
3136 | |
3137 | // VStartup Adjustment |
3138 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
3139 | bool_Bool isInterlaceTiming; |
3140 | double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; |
3141 | #ifdef __DML_VBA_DEBUG__ |
3142 | dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]){do { } while(0); }; |
3143 | #endif |
3144 | |
3145 | v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; |
3146 | |
3147 | #ifdef __DML_VBA_DEBUG__ |
3148 | dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin){do { } while(0); }; |
3149 | dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]){do { } while(0); }; |
3150 | dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]){do { } while(0); }; |
3151 | dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]){do { } while(0); }; |
3152 | #endif |
3153 | |
3154 | v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; |
3155 | if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { |
3156 | v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; |
3157 | } |
3158 | |
3159 | isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); |
3160 | |
3161 | v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) |
3162 | - v->VFrontPorch[k]) |
3163 | + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) |
3164 | + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; |
3165 | |
3166 | v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); |
3167 | |
3168 | if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) |
3169 | <= (isInterlaceTiming ? |
3170 | dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : |
3171 | (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { |
3172 | v->VREADY_AT_OR_AFTER_VSYNC[k] = true1; |
3173 | } else { |
3174 | v->VREADY_AT_OR_AFTER_VSYNC[k] = false0; |
3175 | } |
3176 | #ifdef __DML_VBA_DEBUG__ |
3177 | dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]){do { } while(0); }; |
3178 | dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]){do { } while(0); }; |
3179 | dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]){do { } while(0); }; |
3180 | dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]){do { } while(0); }; |
3181 | dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]){do { } while(0); }; |
3182 | dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]){do { } while(0); }; |
3183 | dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]){do { } while(0); }; |
3184 | dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]){do { } while(0); }; |
3185 | dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]){do { } while(0); }; |
3186 | dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]){do { } while(0); }; |
3187 | dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]){do { } while(0); }; |
3188 | #endif |
3189 | } |
3190 | |
3191 | { |
3192 | //Maximum Bandwidth Used |
3193 | double TotalWRBandwidth = 0; |
3194 | double MaxPerPlaneVActiveWRBandwidth = 0; |
3195 | double WRBandwidth = 0; |
3196 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
3197 | if (v->WritebackEnable[k] == true1 && v->WritebackPixelFormat[k] == dm_444_32) { |
3198 | WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] |
3199 | / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; |
3200 | } else if (v->WritebackEnable[k] == true1) { |
3201 | WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] |
3202 | / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; |
3203 | } |
3204 | TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; |
3205 | MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); |
3206 | } |
3207 | |
3208 | v->TotalDataReadBandwidth = 0; |
3209 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
3210 | v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; |
3211 | } |
3212 | } |
3213 | // Stutter Efficiency |
3214 | CalculateStutterEfficiency( |
3215 | mode_lib, |
3216 | v->CompressedBufferSizeInkByte, |
3217 | v->UnboundedRequestEnabled, |
3218 | v->ConfigReturnBufferSizeInKByte, |
3219 | v->MetaFIFOSizeInKEntries, |
3220 | v->ZeroSizeBufferEntries, |
3221 | v->NumberOfActivePlanes, |
3222 | v->ROBBufferSizeInKByte, |
3223 | v->TotalDataReadBandwidth, |
3224 | v->DCFCLK, |
3225 | v->ReturnBW, |
3226 | v->COMPBUF_RESERVED_SPACE_64B, |
3227 | v->COMPBUF_RESERVED_SPACE_ZS, |
3228 | v->SRExitTime, |
3229 | v->SRExitZ8Time, |
3230 | v->SynchronizedVBlank, |
3231 | v->StutterEnterPlusExitWatermark, |
3232 | v->Z8StutterEnterPlusExitWatermark, |
3233 | v->ProgressiveToInterlaceUnitInOPP, |
3234 | v->Interlace, |
3235 | v->MinTTUVBlank, |
3236 | v->DPPPerPlane, |
3237 | v->DETBufferSizeY, |
3238 | v->BytePerPixelY, |
3239 | v->BytePerPixelDETY, |
3240 | v->SwathWidthY, |
3241 | v->SwathHeightY, |
3242 | v->SwathHeightC, |
3243 | v->DCCRateLuma, |
3244 | v->DCCRateChroma, |
3245 | v->DCCFractionOfZeroSizeRequestsLuma, |
3246 | v->DCCFractionOfZeroSizeRequestsChroma, |
3247 | v->HTotal, |
3248 | v->VTotal, |
3249 | v->PixelClock, |
3250 | v->VRatio, |
3251 | v->SourceScan, |
3252 | v->BlockHeight256BytesY, |
3253 | v->BlockWidth256BytesY, |
3254 | v->BlockHeight256BytesC, |
3255 | v->BlockWidth256BytesC, |
3256 | v->DCCYMaxUncompressedBlock, |
3257 | v->DCCCMaxUncompressedBlock, |
3258 | v->VActive, |
3259 | v->DCCEnable, |
3260 | v->WritebackEnable, |
3261 | v->ReadBandwidthPlaneLuma, |
3262 | v->ReadBandwidthPlaneChroma, |
3263 | v->meta_row_bw, |
3264 | v->dpte_row_bw, |
3265 | &v->StutterEfficiencyNotIncludingVBlank, |
3266 | &v->StutterEfficiency, |
3267 | &v->NumberOfStutterBurstsPerFrame, |
3268 | &v->Z8StutterEfficiencyNotIncludingVBlank, |
3269 | &v->Z8StutterEfficiency, |
3270 | &v->Z8NumberOfStutterBurstsPerFrame, |
3271 | &v->StutterPeriod); |
3272 | } |
3273 | |
3274 | static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) |
3275 | { |
3276 | struct vba_vars_st *v = &mode_lib->vba; |
3277 | // Display Pipe Configuration |
3278 | double BytePerPixDETY[DC__NUM_DPP__MAX8]; |
3279 | double BytePerPixDETC[DC__NUM_DPP__MAX8]; |
3280 | int BytePerPixY[DC__NUM_DPP__MAX8]; |
3281 | int BytePerPixC[DC__NUM_DPP__MAX8]; |
3282 | int Read256BytesBlockHeightY[DC__NUM_DPP__MAX8]; |
3283 | int Read256BytesBlockHeightC[DC__NUM_DPP__MAX8]; |
3284 | int Read256BytesBlockWidthY[DC__NUM_DPP__MAX8]; |
3285 | int Read256BytesBlockWidthC[DC__NUM_DPP__MAX8]; |
3286 | double dummy1[DC__NUM_DPP__MAX8]; |
3287 | double dummy2[DC__NUM_DPP__MAX8]; |
3288 | double dummy3[DC__NUM_DPP__MAX8]; |
3289 | double dummy4[DC__NUM_DPP__MAX8]; |
3290 | int dummy5[DC__NUM_DPP__MAX8]; |
3291 | int dummy6[DC__NUM_DPP__MAX8]; |
3292 | bool_Bool dummy7[DC__NUM_DPP__MAX8]; |
3293 | bool_Bool dummysinglestring; |
3294 | |
3295 | unsigned int k; |
3296 | |
3297 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
3298 | |
3299 | dml30_CalculateBytePerPixelAnd256BBlockSizes( |
3300 | v->SourcePixelFormat[k], |
3301 | v->SurfaceTiling[k], |
3302 | &BytePerPixY[k], |
3303 | &BytePerPixC[k], |
3304 | &BytePerPixDETY[k], |
3305 | &BytePerPixDETC[k], |
3306 | &Read256BytesBlockHeightY[k], |
3307 | &Read256BytesBlockHeightC[k], |
3308 | &Read256BytesBlockWidthY[k], |
3309 | &Read256BytesBlockWidthC[k]); |
3310 | } |
3311 | |
3312 | CalculateSwathAndDETConfiguration( |
3313 | false0, |
3314 | v->NumberOfActivePlanes, |
3315 | mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], |
3316 | v->DETBufferSizeInKByte, |
3317 | dummy1, |
3318 | dummy2, |
3319 | v->SourceScan, |
3320 | v->SourcePixelFormat, |
3321 | v->SurfaceTiling, |
3322 | v->ViewportWidth, |
3323 | v->ViewportHeight, |
3324 | v->SurfaceWidthY, |
3325 | v->SurfaceWidthC, |
3326 | v->SurfaceHeightY, |
3327 | v->SurfaceHeightC, |
3328 | Read256BytesBlockHeightY, |
3329 | Read256BytesBlockHeightC, |
3330 | Read256BytesBlockWidthY, |
3331 | Read256BytesBlockWidthC, |
3332 | v->ODMCombineEnabled, |
3333 | v->BlendingAndTiming, |
3334 | BytePerPixY, |
3335 | BytePerPixC, |
3336 | BytePerPixDETY, |
3337 | BytePerPixDETC, |
3338 | v->HActive, |
3339 | v->HRatio, |
3340 | v->HRatioChroma, |
3341 | v->DPPPerPlane, |
3342 | dummy5, |
3343 | dummy6, |
3344 | dummy3, |
3345 | dummy4, |
3346 | v->SwathHeightY, |
3347 | v->SwathHeightC, |
3348 | v->DETBufferSizeY, |
3349 | v->DETBufferSizeC, |
3350 | dummy7, |
3351 | &dummysinglestring); |
3352 | } |
3353 | |
3354 | static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) |
3355 | { |
3356 | if (PrefetchMode == 0) { |
3357 | return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); |
3358 | } else if (PrefetchMode == 1) { |
3359 | return dml_max(SREnterPlusExitTime, UrgentLatency); |
3360 | } else { |
3361 | return UrgentLatency; |
3362 | } |
3363 | } |
3364 | |
3365 | double dml31_CalculateWriteBackDISPCLK( |
3366 | enum source_format_class WritebackPixelFormat, |
3367 | double PixelClock, |
3368 | double WritebackHRatio, |
3369 | double WritebackVRatio, |
3370 | unsigned int WritebackHTaps, |
3371 | unsigned int WritebackVTaps, |
3372 | long WritebackSourceWidth, |
3373 | long WritebackDestinationWidth, |
3374 | unsigned int HTotal, |
3375 | unsigned int WritebackLineBufferSize) |
3376 | { |
3377 | double DISPCLK_H, DISPCLK_V, DISPCLK_HB; |
3378 | |
3379 | DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; |
3380 | DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; |
3381 | DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; |
3382 | return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); |
3383 | } |
3384 | |
3385 | static double CalculateWriteBackDelay( |
3386 | enum source_format_class WritebackPixelFormat, |
3387 | double WritebackHRatio, |
3388 | double WritebackVRatio, |
3389 | unsigned int WritebackVTaps, |
3390 | int WritebackDestinationWidth, |
3391 | int WritebackDestinationHeight, |
3392 | int WritebackSourceHeight, |
3393 | unsigned int HTotal) |
3394 | { |
3395 | double CalculateWriteBackDelay; |
3396 | double Line_length; |
3397 | double Output_lines_last_notclamped; |
3398 | double WritebackVInit; |
3399 | |
3400 | WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; |
3401 | Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); |
3402 | Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); |
3403 | if (Output_lines_last_notclamped < 0) { |
3404 | CalculateWriteBackDelay = 0; |
3405 | } else { |
3406 | CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; |
3407 | } |
3408 | return CalculateWriteBackDelay; |
3409 | } |
3410 | |
3411 | static void CalculateVupdateAndDynamicMetadataParameters( |
3412 | int MaxInterDCNTileRepeaters, |
3413 | double DPPCLK, |
3414 | double DISPCLK, |
3415 | double DCFClkDeepSleep, |
3416 | double PixelClock, |
3417 | int HTotal, |
3418 | int VBlank, |
3419 | int DynamicMetadataTransmittedBytes, |
3420 | int DynamicMetadataLinesBeforeActiveRequired, |
3421 | int InterlaceEnable, |
3422 | bool_Bool ProgressiveToInterlaceUnitInOPP, |
3423 | double *TSetup, |
3424 | double *Tdmbf, |
3425 | double *Tdmec, |
3426 | double *Tdmsks, |
3427 | int *VUpdateOffsetPix, |
3428 | double *VUpdateWidthPix, |
3429 | double *VReadyOffsetPix) |
3430 | { |
3431 | double TotalRepeaterDelayTime; |
3432 | |
3433 | TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); |
3434 | *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); |
3435 | *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); |
3436 | *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); |
3437 | *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; |
3438 | *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; |
3439 | *Tdmec = HTotal / PixelClock; |
3440 | if (DynamicMetadataLinesBeforeActiveRequired == 0) { |
3441 | *Tdmsks = VBlank * HTotal / PixelClock / 2.0; |
3442 | } else { |
3443 | *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; |
3444 | } |
3445 | if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false0) { |
3446 | *Tdmsks = *Tdmsks / 2; |
3447 | } |
3448 | #ifdef __DML_VBA_DEBUG__ |
3449 | dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix){do { } while(0); }; |
3450 | dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix){do { } while(0); }; |
3451 | dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix){do { } while(0); }; |
3452 | #endif |
3453 | } |
3454 | |
3455 | static void CalculateRowBandwidth( |
3456 | bool_Bool GPUVMEnable, |
3457 | enum source_format_class SourcePixelFormat, |
3458 | double VRatio, |
3459 | double VRatioChroma, |
3460 | bool_Bool DCCEnable, |
3461 | double LineTime, |
3462 | unsigned int MetaRowByteLuma, |
3463 | unsigned int MetaRowByteChroma, |
3464 | unsigned int meta_row_height_luma, |
3465 | unsigned int meta_row_height_chroma, |
3466 | unsigned int PixelPTEBytesPerRowLuma, |
3467 | unsigned int PixelPTEBytesPerRowChroma, |
3468 | unsigned int dpte_row_height_luma, |
3469 | unsigned int dpte_row_height_chroma, |
3470 | double *meta_row_bw, |
3471 | double *dpte_row_bw) |
3472 | { |
3473 | if (DCCEnable != true1) { |
3474 | *meta_row_bw = 0; |
3475 | } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { |
3476 | *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); |
3477 | } else { |
3478 | *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); |
3479 | } |
3480 | |
3481 | if (GPUVMEnable != true1) { |
3482 | *dpte_row_bw = 0; |
3483 | } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { |
3484 | *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) |
3485 | + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); |
3486 | } else { |
3487 | *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); |
3488 | } |
3489 | } |
3490 | |
3491 | static void CalculateFlipSchedule( |
3492 | struct display_mode_lib *mode_lib, |
3493 | unsigned int k, |
3494 | double HostVMInefficiencyFactor, |
3495 | double UrgentExtraLatency, |
3496 | double UrgentLatency, |
3497 | double PDEAndMetaPTEBytesPerFrame, |
3498 | double MetaRowBytes, |
3499 | double DPTEBytesPerRow) |
3500 | { |
3501 | struct vba_vars_st *v = &mode_lib->vba; |
3502 | double min_row_time = 0.0; |
3503 | unsigned int HostVMDynamicLevelsTrips; |
3504 | double TimeForFetchingMetaPTEImmediateFlip; |
3505 | double TimeForFetchingRowInVBlankImmediateFlip; |
3506 | double ImmediateFlipBW; |
3507 | double LineTime = v->HTotal[k] / v->PixelClock[k]; |
3508 | |
3509 | if (v->GPUVMEnable == true1 && v->HostVMEnable == true1) { |
3510 | HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; |
3511 | } else { |
3512 | HostVMDynamicLevelsTrips = 0; |
3513 | } |
3514 | |
3515 | if (v->GPUVMEnable == true1 || v->DCCEnable[k] == true1) { |
3516 | ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; |
3517 | } |
3518 | |
3519 | if (v->GPUVMEnable == true1) { |
3520 | TimeForFetchingMetaPTEImmediateFlip = dml_max3( |
3521 | v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, |
3522 | UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), |
3523 | LineTime / 4.0); |
3524 | } else { |
3525 | TimeForFetchingMetaPTEImmediateFlip = 0; |
3526 | } |
3527 | |
3528 | v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; |
3529 | if ((v->GPUVMEnable == true1 || v->DCCEnable[k] == true1)) { |
3530 | TimeForFetchingRowInVBlankImmediateFlip = dml_max3( |
3531 | (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, |
3532 | UrgentLatency * (HostVMDynamicLevelsTrips + 1), |
3533 | LineTime / 4); |
3534 | } else { |
3535 | TimeForFetchingRowInVBlankImmediateFlip = 0; |
3536 | } |
3537 | |
3538 | v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; |
3539 | |
3540 | if (v->GPUVMEnable == true1) { |
3541 | v->final_flip_bw[k] = dml_max( |
3542 | PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), |
3543 | (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); |
3544 | } else if ((v->GPUVMEnable == true1 || v->DCCEnable[k] == true1)) { |
3545 | v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); |
3546 | } else { |
3547 | v->final_flip_bw[k] = 0; |
3548 | } |
3549 | |
3550 | if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { |
3551 | if (v->GPUVMEnable == true1 && v->DCCEnable[k] != true1) { |
3552 | min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); |
3553 | } else if (v->GPUVMEnable != true1 && v->DCCEnable[k] == true1) { |
3554 | min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); |
3555 | } else { |
3556 | min_row_time = dml_min4( |
3557 | v->dpte_row_height[k] * LineTime / v->VRatio[k], |
3558 | v->meta_row_height[k] * LineTime / v->VRatio[k], |
3559 | v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], |
3560 | v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); |
3561 | } |
3562 | } else { |
3563 | if (v->GPUVMEnable == true1 && v->DCCEnable[k] != true1) { |
3564 | min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; |
3565 | } else if (v->GPUVMEnable != true1 && v->DCCEnable[k] == true1) { |
3566 | min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; |
3567 | } else { |
3568 | min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); |
3569 | } |
3570 | } |
3571 | |
3572 | if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 |
3573 | || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { |
3574 | v->ImmediateFlipSupportedForPipe[k] = false0; |
3575 | } else { |
3576 | v->ImmediateFlipSupportedForPipe[k] = true1; |
3577 | } |
3578 | |
3579 | #ifdef __DML_VBA_DEBUG__ |
3580 | dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]){do { } while(0); }; |
3581 | dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]){do { } while(0); }; |
3582 | dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip){do { } while(0); }; |
3583 | dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip){do { } while(0); }; |
3584 | dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time){do { } while(0); }; |
3585 | dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]){do { } while(0); }; |
3586 | #endif |
3587 | |
3588 | } |
3589 | |
3590 | static double TruncToValidBPP( |
3591 | double LinkBitRate, |
3592 | int Lanes, |
3593 | int HTotal, |
3594 | int HActive, |
3595 | double PixelClock, |
3596 | double DesiredBPP, |
3597 | bool_Bool DSCEnable, |
3598 | enum output_encoder_class Output, |
3599 | enum output_format_class Format, |
3600 | unsigned int DSCInputBitPerComponent, |
3601 | int DSCSlices, |
3602 | int AudioRate, |
3603 | int AudioLayout, |
3604 | enum odm_combine_mode ODMCombine) |
3605 | { |
3606 | double MaxLinkBPP; |
3607 | int MinDSCBPP; |
3608 | double MaxDSCBPP; |
3609 | int NonDSCBPP0; |
3610 | int NonDSCBPP1; |
3611 | int NonDSCBPP2; |
3612 | |
3613 | if (Format == dm_420) { |
3614 | NonDSCBPP0 = 12; |
3615 | NonDSCBPP1 = 15; |
3616 | NonDSCBPP2 = 18; |
3617 | MinDSCBPP = 6; |
3618 | MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; |
3619 | } else if (Format == dm_444) { |
3620 | NonDSCBPP0 = 24; |
3621 | NonDSCBPP1 = 30; |
3622 | NonDSCBPP2 = 36; |
3623 | MinDSCBPP = 8; |
3624 | MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; |
3625 | } else { |
3626 | |
3627 | NonDSCBPP0 = 16; |
3628 | NonDSCBPP1 = 20; |
3629 | NonDSCBPP2 = 24; |
3630 | |
3631 | if (Format == dm_n422) { |
3632 | MinDSCBPP = 7; |
3633 | MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; |
3634 | } else { |
3635 | MinDSCBPP = 8; |
3636 | MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; |
3637 | } |
3638 | } |
3639 | |
3640 | if (DSCEnable && Output == dm_dp) { |
3641 | MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); |
3642 | } else { |
3643 | MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; |
3644 | } |
3645 | |
3646 | if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { |
3647 | MaxLinkBPP = 16; |
3648 | } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { |
3649 | MaxLinkBPP = 32; |
3650 | } |
3651 | |
3652 | if (DesiredBPP == 0) { |
3653 | if (DSCEnable) { |
3654 | if (MaxLinkBPP < MinDSCBPP) { |
3655 | return BPP_INVALID0; |
3656 | } else if (MaxLinkBPP >= MaxDSCBPP) { |
3657 | return MaxDSCBPP; |
3658 | } else { |
3659 | return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; |
3660 | } |
3661 | } else { |
3662 | if (MaxLinkBPP >= NonDSCBPP2) { |
3663 | return NonDSCBPP2; |
3664 | } else if (MaxLinkBPP >= NonDSCBPP1) { |
3665 | return NonDSCBPP1; |
3666 | } else if (MaxLinkBPP >= NonDSCBPP0) { |
3667 | return 16.0; |
3668 | } else { |
3669 | return BPP_INVALID0; |
3670 | } |
3671 | } |
3672 | } else { |
3673 | if (!((DSCEnable == false0 && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) |
3674 | || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { |
3675 | return BPP_INVALID0; |
3676 | } else { |
3677 | return DesiredBPP; |
3678 | } |
3679 | } |
3680 | return BPP_INVALID0; |
3681 | } |
3682 | |
3683 | static noinline__attribute__((__noinline__)) void CalculatePrefetchSchedulePerPlane( |
3684 | struct display_mode_lib *mode_lib, |
3685 | double HostVMInefficiencyFactor, |
3686 | int i, |
3687 | unsigned j, |
3688 | unsigned k) |
3689 | { |
3690 | struct vba_vars_st *v = &mode_lib->vba; |
3691 | Pipe myPipe; |
3692 | |
3693 | myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; |
3694 | myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; |
3695 | myPipe.PixelClock = v->PixelClock[k]; |
3696 | myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; |
3697 | myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; |
3698 | myPipe.ScalerEnabled = v->ScalerEnabled[k]; |
3699 | myPipe.VRatio = mode_lib->vba.VRatio[k]; |
3700 | myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; |
3701 | |
3702 | myPipe.SourceScan = v->SourceScan[k]; |
3703 | myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; |
3704 | myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; |
3705 | myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; |
3706 | myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; |
3707 | myPipe.InterlaceEnable = v->Interlace[k]; |
3708 | myPipe.NumberOfCursors = v->NumberOfCursors[k]; |
3709 | myPipe.VBlank = v->VTotal[k] - v->VActive[k]; |
3710 | myPipe.HTotal = v->HTotal[k]; |
3711 | myPipe.DCCEnable = v->DCCEnable[k]; |
3712 | myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 |
3713 | || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; |
3714 | myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; |
3715 | myPipe.BytePerPixelY = v->BytePerPixelY[k]; |
3716 | myPipe.BytePerPixelC = v->BytePerPixelC[k]; |
3717 | myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; |
3718 | v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( |
3719 | mode_lib, |
3720 | HostVMInefficiencyFactor, |
3721 | &myPipe, |
3722 | v->DSCDelayPerState[i][k], |
3723 | v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, |
3724 | v->DPPCLKDelaySCL, |
3725 | v->DPPCLKDelaySCLLBOnly, |
3726 | v->DPPCLKDelayCNVCCursor, |
3727 | v->DISPCLKDelaySubtotal, |
3728 | v->SwathWidthYThisState[k] / v->HRatio[k], |
3729 | v->OutputFormat[k], |
3730 | v->MaxInterDCNTileRepeaters, |
3731 | dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), |
3732 | v->MaximumVStartup[i][j][k], |
3733 | v->GPUVMMaxPageTableLevels, |
3734 | v->GPUVMEnable, |
3735 | v->HostVMEnable, |
3736 | v->HostVMMaxNonCachedPageTableLevels, |
3737 | v->HostVMMinPageSize, |
3738 | v->DynamicMetadataEnable[k], |
3739 | v->DynamicMetadataVMEnabled, |
3740 | v->DynamicMetadataLinesBeforeActiveRequired[k], |
3741 | v->DynamicMetadataTransmittedBytes[k], |
3742 | v->UrgLatency[i], |
3743 | v->ExtraLatency, |
3744 | v->TimeCalc, |
3745 | v->PDEAndMetaPTEBytesPerFrame[i][j][k], |
3746 | v->MetaRowBytes[i][j][k], |
3747 | v->DPTEBytesPerRow[i][j][k], |
3748 | v->PrefetchLinesY[i][j][k], |
3749 | v->SwathWidthYThisState[k], |
3750 | v->PrefillY[k], |
3751 | v->MaxNumSwY[k], |
3752 | v->PrefetchLinesC[i][j][k], |
3753 | v->SwathWidthCThisState[k], |
3754 | v->PrefillC[k], |
3755 | v->MaxNumSwC[k], |
3756 | v->swath_width_luma_ub_this_state[k], |
3757 | v->swath_width_chroma_ub_this_state[k], |
3758 | v->SwathHeightYThisState[k], |
3759 | v->SwathHeightCThisState[k], |
3760 | v->TWait, |
3761 | &v->DSTXAfterScaler[k], |
3762 | &v->DSTYAfterScaler[k], |
3763 | &v->LineTimesForPrefetch[k], |
3764 | &v->PrefetchBW[k], |
3765 | &v->LinesForMetaPTE[k], |
3766 | &v->LinesForMetaAndDPTERow[k], |
3767 | &v->VRatioPreY[i][j][k], |
3768 | &v->VRatioPreC[i][j][k], |
3769 | &v->RequiredPrefetchPixelDataBWLuma[i][j][k], |
3770 | &v->RequiredPrefetchPixelDataBWChroma[i][j][k], |
3771 | &v->NoTimeForDynamicMetadata[i][j][k], |
3772 | &v->Tno_bw[k], |
3773 | &v->prefetch_vmrow_bw[k], |
3774 | &v->dummy7[k], |
3775 | &v->dummy8[k], |
3776 | &v->dummy13[k], |
3777 | &v->VUpdateOffsetPix[k], |
3778 | &v->VUpdateWidthPix[k], |
3779 | &v->VReadyOffsetPix[k]); |
3780 | } |
3781 | |
3782 | static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[]) |
3783 | { |
3784 | int i, total_pipes = 0; |
3785 | for (i = 0; i < NumberOfActivePlanes; i++) |
3786 | total_pipes += NoOfDPPThisState[i]; |
3787 | DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB128) / 64 / total_pipes) * 64; |
3788 | if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE384) |
3789 | DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE384; |
3790 | for (i = 1; i < NumberOfActivePlanes; i++) |
3791 | DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0]; |
3792 | } |
3793 | |
3794 | |
3795 | void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) |
3796 | { |
3797 | struct vba_vars_st *v = &mode_lib->vba; |
3798 | |
3799 | int i, j; |
3800 | unsigned int k, m; |
3801 | int ReorderingBytes; |
3802 | int MinPrefetchMode = 0, MaxPrefetchMode = 2; |
3803 | bool_Bool NoChroma = true1; |
3804 | bool_Bool EnoughWritebackUnits = true1; |
3805 | bool_Bool P2IWith420 = false0; |
3806 | bool_Bool DSCOnlyIfNecessaryWithBPP = false0; |
3807 | bool_Bool DSC422NativeNotSupported = false0; |
3808 | double MaxTotalVActiveRDBandwidth; |
3809 | bool_Bool ViewportExceedsSurface = false0; |
3810 | bool_Bool FMTBufferExceeded = false0; |
3811 | |
3812 | /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ |
3813 | |
3814 | CalculateMinAndMaxPrefetchMode( |
3815 | mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, |
3816 | &MinPrefetchMode, &MaxPrefetchMode); |
3817 | |
3818 | /*Scale Ratio, taps Support Check*/ |
3819 | |
3820 | v->ScaleRatioAndTapsSupport = true1; |
3821 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3822 | if (v->ScalerEnabled[k] == false0 |
3823 | && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 |
3824 | && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 |
3825 | && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe |
3826 | && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 |
3827 | || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { |
3828 | v->ScaleRatioAndTapsSupport = false0; |
3829 | } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 |
3830 | || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio |
3831 | || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] |
3832 | || v->VRatio[k] > v->vtaps[k] |
3833 | || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 |
3834 | && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 |
3835 | && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe |
3836 | && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 |
3837 | || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) |
3838 | || v->HRatioChroma[k] > v->MaxHSCLRatio |
3839 | || v->VRatioChroma[k] > v->MaxVSCLRatio |
3840 | || v->HRatioChroma[k] > v->HTAPsChroma[k] |
3841 | || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { |
3842 | v->ScaleRatioAndTapsSupport = false0; |
3843 | } |
3844 | } |
3845 | /*Source Format, Pixel Format and Scan Support Check*/ |
3846 | |
3847 | v->SourceFormatPixelAndScanSupport = true1; |
3848 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3849 | if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true1)) |
3850 | || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t |
3851 | || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { |
3852 | v->SourceFormatPixelAndScanSupport = false0; |
3853 | } |
3854 | } |
3855 | /*Bandwidth Support Check*/ |
3856 | |
3857 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3858 | dml30_CalculateBytePerPixelAnd256BBlockSizes( |
3859 | v->SourcePixelFormat[k], |
3860 | v->SurfaceTiling[k], |
3861 | &v->BytePerPixelY[k], |
3862 | &v->BytePerPixelC[k], |
3863 | &v->BytePerPixelInDETY[k], |
3864 | &v->BytePerPixelInDETC[k], |
3865 | &v->Read256BlockHeightY[k], |
3866 | &v->Read256BlockHeightC[k], |
3867 | &v->Read256BlockWidthY[k], |
3868 | &v->Read256BlockWidthC[k]); |
3869 | } |
3870 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3871 | if (v->SourceScan[k] != dm_vert) { |
3872 | v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; |
3873 | v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; |
3874 | } else { |
3875 | v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; |
3876 | v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; |
3877 | } |
3878 | } |
3879 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3880 | v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) |
3881 | / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; |
3882 | v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) |
3883 | / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; |
3884 | } |
3885 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3886 | if (v->WritebackEnable[k] == true1 && v->WritebackPixelFormat[k] == dm_444_64) { |
3887 | v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] |
3888 | / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; |
3889 | } else if (v->WritebackEnable[k] == true1) { |
3890 | v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] |
3891 | / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; |
3892 | } else { |
3893 | v->WriteBandwidth[k] = 0.0; |
3894 | } |
3895 | } |
3896 | |
3897 | /*Writeback Latency support check*/ |
3898 | |
3899 | v->WritebackLatencySupport = true1; |
3900 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3901 | if (v->WritebackEnable[k] == true1 && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { |
3902 | v->WritebackLatencySupport = false0; |
3903 | } |
3904 | } |
3905 | |
3906 | /*Writeback Mode Support Check*/ |
3907 | |
3908 | v->TotalNumberOfActiveWriteback = 0; |
3909 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3910 | if (v->WritebackEnable[k] == true1) { |
3911 | v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; |
3912 | } |
3913 | } |
3914 | |
3915 | if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { |
3916 | EnoughWritebackUnits = false0; |
3917 | } |
3918 | |
3919 | /*Writeback Scale Ratio and Taps Support Check*/ |
3920 | |
3921 | v->WritebackScaleRatioAndTapsSupport = true1; |
3922 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3923 | if (v->WritebackEnable[k] == true1) { |
3924 | if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio |
3925 | || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio |
3926 | || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio |
3927 | || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps |
3928 | || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps |
3929 | || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] |
3930 | || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { |
3931 | v->WritebackScaleRatioAndTapsSupport = false0; |
3932 | } |
3933 | if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { |
3934 | v->WritebackScaleRatioAndTapsSupport = false0; |
3935 | } |
3936 | } |
3937 | } |
3938 | /*Maximum DISPCLK/DPPCLK Support check*/ |
3939 | |
3940 | v->WritebackRequiredDISPCLK = 0.0; |
3941 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3942 | if (v->WritebackEnable[k] == true1) { |
3943 | v->WritebackRequiredDISPCLK = dml_max( |
3944 | v->WritebackRequiredDISPCLK, |
3945 | dml31_CalculateWriteBackDISPCLK( |
3946 | v->WritebackPixelFormat[k], |
3947 | v->PixelClock[k], |
3948 | v->WritebackHRatio[k], |
3949 | v->WritebackVRatio[k], |
3950 | v->WritebackHTaps[k], |
3951 | v->WritebackVTaps[k], |
3952 | v->WritebackSourceWidth[k], |
3953 | v->WritebackDestinationWidth[k], |
3954 | v->HTotal[k], |
3955 | v->WritebackLineBufferSize)); |
3956 | } |
3957 | } |
3958 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3959 | if (v->HRatio[k] > 1.0) { |
3960 | v->PSCL_FACTOR[k] = dml_min( |
3961 | v->MaxDCHUBToPSCLThroughput, |
3962 | v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); |
3963 | } else { |
3964 | v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); |
3965 | } |
3966 | if (v->BytePerPixelC[k] == 0.0) { |
3967 | v->PSCL_FACTOR_CHROMA[k] = 0.0; |
3968 | v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] |
3969 | * dml_max3( |
3970 | v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), |
3971 | v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], |
3972 | 1.0); |
3973 | if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { |
3974 | v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; |
3975 | } |
3976 | } else { |
3977 | if (v->HRatioChroma[k] > 1.0) { |
3978 | v->PSCL_FACTOR_CHROMA[k] = dml_min( |
3979 | v->MaxDCHUBToPSCLThroughput, |
3980 | v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); |
3981 | } else { |
3982 | v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); |
3983 | } |
3984 | v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] |
3985 | * dml_max5( |
3986 | v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), |
3987 | v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], |
3988 | v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), |
3989 | v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], |
3990 | 1.0); |
3991 | if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) |
3992 | && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { |
3993 | v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; |
3994 | } |
3995 | } |
3996 | } |
3997 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
3998 | int MaximumSwathWidthSupportLuma; |
3999 | int MaximumSwathWidthSupportChroma; |
4000 | |
4001 | if (v->SurfaceTiling[k] == dm_sw_linear) { |
4002 | MaximumSwathWidthSupportLuma = 8192.0; |
4003 | } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { |
4004 | MaximumSwathWidthSupportLuma = 2880.0; |
4005 | } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { |
4006 | MaximumSwathWidthSupportLuma = 3840.0; |
4007 | } else { |
4008 | MaximumSwathWidthSupportLuma = 5760.0; |
4009 | } |
4010 | |
4011 | if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { |
4012 | MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; |
4013 | } else { |
4014 | MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; |
4015 | } |
4016 | v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] |
4017 | / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); |
4018 | if (v->BytePerPixelC[k] == 0.0) { |
4019 | v->MaximumSwathWidthInLineBufferChroma = 0; |
4020 | } else { |
4021 | v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] |
4022 | / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); |
4023 | } |
4024 | v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); |
4025 | v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); |
4026 | } |
4027 | |
4028 | CalculateSwathAndDETConfiguration( |
4029 | true1, |
4030 | v->NumberOfActivePlanes, |
4031 | mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], |
4032 | v->DETBufferSizeInKByte, |
4033 | v->MaximumSwathWidthLuma, |
4034 | v->MaximumSwathWidthChroma, |
4035 | v->SourceScan, |
4036 | v->SourcePixelFormat, |
4037 | v->SurfaceTiling, |
4038 | v->ViewportWidth, |
4039 | v->ViewportHeight, |
4040 | v->SurfaceWidthY, |
4041 | v->SurfaceWidthC, |
4042 | v->SurfaceHeightY, |
4043 | v->SurfaceHeightC, |
4044 | v->Read256BlockHeightY, |
4045 | v->Read256BlockHeightC, |
4046 | v->Read256BlockWidthY, |
4047 | v->Read256BlockWidthC, |
4048 | v->odm_combine_dummy, |
4049 | v->BlendingAndTiming, |
4050 | v->BytePerPixelY, |
4051 | v->BytePerPixelC, |
4052 | v->BytePerPixelInDETY, |
4053 | v->BytePerPixelInDETC, |
4054 | v->HActive, |
4055 | v->HRatio, |
4056 | v->HRatioChroma, |
4057 | v->NoOfDPPThisState, |
4058 | v->swath_width_luma_ub_this_state, |
4059 | v->swath_width_chroma_ub_this_state, |
4060 | v->SwathWidthYThisState, |
4061 | v->SwathWidthCThisState, |
4062 | v->SwathHeightYThisState, |
4063 | v->SwathHeightCThisState, |
4064 | v->DETBufferSizeYThisState, |
4065 | v->DETBufferSizeCThisState, |
4066 | v->SingleDPPViewportSizeSupportPerPlane, |
4067 | &v->ViewportSizeSupport[0][0]); |
4068 | |
4069 | for (i = 0; i < v->soc.num_states; i++) { |
4070 | for (j = 0; j < 2; j++) { |
4071 | v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); |
4072 | v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); |
4073 | v->RequiredDISPCLK[i][j] = 0.0; |
4074 | v->DISPCLK_DPPCLK_Support[i][j] = true1; |
4075 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4076 | v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4077 | * (1.0 + v->DISPCLKRampingMargin / 100.0); |
4078 | if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] |
4079 | && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] |
4080 | && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { |
4081 | v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] |
4082 | * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); |
4083 | } |
4084 | v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4085 | * (1 + v->DISPCLKRampingMargin / 100.0); |
4086 | if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] |
4087 | && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] |
4088 | && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { |
4089 | v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 |
4090 | * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); |
4091 | } |
4092 | v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4093 | * (1 + v->DISPCLKRampingMargin / 100.0); |
4094 | if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] |
4095 | && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] |
4096 | && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { |
4097 | v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 |
4098 | * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); |
4099 | } |
4100 | |
4101 | if (v->ODMCombinePolicy == dm_odm_combine_policy_none |
4102 | || !(v->Output[k] == dm_dp || |
4103 | v->Output[k] == dm_dp2p0 || |
4104 | v->Output[k] == dm_edp)) { |
4105 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; |
4106 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; |
4107 | |
4108 | if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH4096) |
4109 | FMTBufferExceeded = true1; |
4110 | } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { |
4111 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; |
4112 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; |
4113 | } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 |
4114 | || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { |
4115 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; |
4116 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; |
4117 | } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { |
4118 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; |
4119 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; |
4120 | } else { |
4121 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; |
4122 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; |
4123 | } |
4124 | if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH5184 |
4125 | && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { |
4126 | if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH5184) { |
4127 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; |
4128 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; |
4129 | } else { |
4130 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; |
4131 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; |
4132 | } |
4133 | } |
4134 | if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH4096 |
4135 | && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { |
4136 | if (v->Output[k] == dm_hdmi) { |
4137 | FMTBufferExceeded = true1; |
4138 | } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH4096) { |
4139 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; |
4140 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; |
4141 | |
4142 | if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH4096) |
4143 | FMTBufferExceeded = true1; |
4144 | } else { |
4145 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; |
4146 | v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; |
4147 | } |
4148 | } |
4149 | if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { |
4150 | v->MPCCombine[i][j][k] = false0; |
4151 | v->NoOfDPP[i][j][k] = 4; |
4152 | v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; |
4153 | } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { |
4154 | v->MPCCombine[i][j][k] = false0; |
4155 | v->NoOfDPP[i][j][k] = 2; |
4156 | v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; |
4157 | } else if ((v->WhenToDoMPCCombine == dm_mpc_never |
4158 | || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4159 | <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true1))) { |
4160 | v->MPCCombine[i][j][k] = false0; |
4161 | v->NoOfDPP[i][j][k] = 1; |
4162 | v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); |
4163 | } else { |
4164 | v->MPCCombine[i][j][k] = true1; |
4165 | v->NoOfDPP[i][j][k] = 2; |
4166 | v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; |
4167 | } |
4168 | v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); |
4169 | if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4170 | > v->MaxDppclkRoundedDownToDFSGranularity) |
4171 | || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { |
4172 | v->DISPCLK_DPPCLK_Support[i][j] = false0; |
4173 | } |
4174 | if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE384 && v->NoOfDPP[i][j][k] < 2) { |
4175 | v->MPCCombine[i][j][k] = true1; |
4176 | v->NoOfDPP[i][j][k] = 2; |
4177 | } |
4178 | } |
4179 | v->TotalNumberOfActiveDPP[i][j] = 0; |
4180 | v->TotalNumberOfSingleDPPPlanes[i][j] = 0; |
4181 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4182 | v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; |
4183 | if (v->NoOfDPP[i][j][k] == 1) |
4184 | v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; |
4185 | if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 |
4186 | || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) |
4187 | NoChroma = false0; |
4188 | } |
4189 | |
4190 | // UPTO |
4191 | if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never |
4192 | && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { |
4193 | while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { |
4194 | double BWOfNonSplitPlaneOfMaximumBandwidth; |
4195 | unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; |
4196 | BWOfNonSplitPlaneOfMaximumBandwidth = 0; |
4197 | NumberOfNonSplitPlaneOfMaximumBandwidth = 0; |
4198 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4199 | if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth |
4200 | && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false0) { |
4201 | BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; |
4202 | NumberOfNonSplitPlaneOfMaximumBandwidth = k; |
4203 | } |
4204 | } |
4205 | v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true1; |
4206 | v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; |
4207 | v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = |
4208 | v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] |
4209 | * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; |
4210 | v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; |
4211 | v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; |
4212 | } |
4213 | } |
4214 | if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { |
4215 | v->RequiredDISPCLK[i][j] = 0.0; |
4216 | v->DISPCLK_DPPCLK_Support[i][j] = true1; |
4217 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4218 | v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; |
4219 | if (v->SingleDPPViewportSizeSupportPerPlane[k] == false0 && v->WhenToDoMPCCombine != dm_mpc_never) { |
4220 | v->MPCCombine[i][j][k] = true1; |
4221 | v->NoOfDPP[i][j][k] = 2; |
4222 | v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] |
4223 | * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; |
4224 | } else { |
4225 | v->MPCCombine[i][j][k] = false0; |
4226 | v->NoOfDPP[i][j][k] = 1; |
4227 | v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] |
4228 | * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); |
4229 | } |
4230 | if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] |
4231 | && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { |
4232 | v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4233 | * (1.0 + v->DISPCLKRampingMargin / 100.0); |
4234 | } else { |
4235 | v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); |
4236 | } |
4237 | v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); |
4238 | if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) |
4239 | > v->MaxDppclkRoundedDownToDFSGranularity) |
4240 | || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { |
4241 | v->DISPCLK_DPPCLK_Support[i][j] = false0; |
4242 | } |
4243 | } |
4244 | v->TotalNumberOfActiveDPP[i][j] = 0.0; |
4245 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4246 | v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; |
4247 | } |
4248 | } |
4249 | v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); |
4250 | if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { |
4251 | v->DISPCLK_DPPCLK_Support[i][j] = false0; |
4252 | } |
4253 | } |
4254 | } |
4255 | |
4256 | /*Total Available Pipes Support Check*/ |
4257 | |
4258 | for (i = 0; i < v->soc.num_states; i++) { |
4259 | for (j = 0; j < 2; j++) { |
4260 | if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { |
4261 | v->TotalAvailablePipesSupport[i][j] = true1; |
4262 | } else { |
4263 | v->TotalAvailablePipesSupport[i][j] = false0; |
4264 | } |
4265 | } |
4266 | } |
4267 | /*Display IO and DSC Support Check*/ |
4268 | |
4269 | v->NonsupportedDSCInputBPC = false0; |
4270 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4271 | if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) |
4272 | || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { |
4273 | v->NonsupportedDSCInputBPC = true1; |
4274 | } |
4275 | } |
4276 | |
4277 | /*Number Of DSC Slices*/ |
4278 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4279 | if (v->BlendingAndTiming[k] == k) { |
4280 | if (v->PixelClockBackEnd[k] > 3200) { |
4281 | v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); |
4282 | } else if (v->PixelClockBackEnd[k] > 1360) { |
4283 | v->NumberOfDSCSlices[k] = 8; |
4284 | } else if (v->PixelClockBackEnd[k] > 680) { |
4285 | v->NumberOfDSCSlices[k] = 4; |
4286 | } else if (v->PixelClockBackEnd[k] > 340) { |
4287 | v->NumberOfDSCSlices[k] = 2; |
4288 | } else { |
4289 | v->NumberOfDSCSlices[k] = 1; |
4290 | } |
4291 | } else { |
4292 | v->NumberOfDSCSlices[k] = 0; |
4293 | } |
4294 | } |
4295 | |
4296 | for (i = 0; i < v->soc.num_states; i++) { |
4297 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4298 | v->RequiresDSC[i][k] = false0; |
4299 | v->RequiresFEC[i][k] = false0; |
4300 | if (v->BlendingAndTiming[k] == k) { |
4301 | if (v->Output[k] == dm_hdmi) { |
4302 | v->RequiresDSC[i][k] = false0; |
4303 | v->RequiresFEC[i][k] = false0; |
4304 | v->OutputBppPerState[i][k] = TruncToValidBPP( |
4305 | dml_min(600.0, v->PHYCLKPerState[i]) * 10, |
4306 | 3, |
4307 | v->HTotal[k], |
4308 | v->HActive[k], |
4309 | v->PixelClockBackEnd[k], |
4310 | v->ForcedOutputLinkBPP[k], |
4311 | false0, |
4312 | v->Output[k], |
4313 | v->OutputFormat[k], |
4314 | v->DSCInputBitPerComponent[k], |
4315 | v->NumberOfDSCSlices[k], |
4316 | v->AudioSampleRate[k], |
4317 | v->AudioSampleLayout[k], |
4318 | v->ODMCombineEnablePerState[i][k]); |
4319 | } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) { |
4320 | if (v->DSCEnable[k] == true1) { |
4321 | v->RequiresDSC[i][k] = true1; |
4322 | v->LinkDSCEnable = true1; |
4323 | if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) { |
4324 | v->RequiresFEC[i][k] = true1; |
4325 | } else { |
4326 | v->RequiresFEC[i][k] = false0; |
4327 | } |
4328 | } else { |
4329 | v->RequiresDSC[i][k] = false0; |
4330 | v->LinkDSCEnable = false0; |
4331 | if (v->Output[k] == dm_dp2p0) { |
4332 | v->RequiresFEC[i][k] = true1; |
4333 | } else { |
4334 | v->RequiresFEC[i][k] = false0; |
4335 | } |
4336 | } |
4337 | if (v->Output[k] == dm_dp2p0) { |
4338 | v->Outbpp = BPP_INVALID0; |
4339 | if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) && |
4340 | v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) { |
4341 | v->Outbpp = TruncToValidBPP( |
4342 | (1.0 - v->Downspreading / 100.0) * 10000, |
4343 | v->OutputLinkDPLanes[k], |
4344 | v->HTotal[k], |
4345 | v->HActive[k], |
4346 | v->PixelClockBackEnd[k], |
4347 | v->ForcedOutputLinkBPP[k], |
4348 | v->LinkDSCEnable, |
4349 | v->Output[k], |
4350 | v->OutputFormat[k], |
4351 | v->DSCInputBitPerComponent[k], |
4352 | v->NumberOfDSCSlices[k], |
4353 | v->AudioSampleRate[k], |
4354 | v->AudioSampleLayout[k], |
4355 | v->ODMCombineEnablePerState[i][k]); |
4356 | if (v->Outbpp == BPP_INVALID0 && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 && |
4357 | v->DSCEnable[k] == true1 && v->ForcedOutputLinkBPP[k] == 0) { |
4358 | v->RequiresDSC[i][k] = true1; |
4359 | v->LinkDSCEnable = true1; |
4360 | v->Outbpp = TruncToValidBPP( |
4361 | (1.0 - v->Downspreading / 100.0) * 10000, |
4362 | v->OutputLinkDPLanes[k], |
4363 | v->HTotal[k], |
4364 | v->HActive[k], |
4365 | v->PixelClockBackEnd[k], |
4366 | v->ForcedOutputLinkBPP[k], |
4367 | v->LinkDSCEnable, |
4368 | v->Output[k], |
4369 | v->OutputFormat[k], |
4370 | v->DSCInputBitPerComponent[k], |
4371 | v->NumberOfDSCSlices[k], |
4372 | v->AudioSampleRate[k], |
4373 | v->AudioSampleLayout[k], |
4374 | v->ODMCombineEnablePerState[i][k]); |
4375 | } |
4376 | v->OutputBppPerState[i][k] = v->Outbpp; |
4377 | // TODO: Need some other way to handle this nonsense |
4378 | // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10" |
4379 | } |
4380 | if (v->Outbpp == BPP_INVALID0 && |
4381 | (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) && |
4382 | v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) { |
4383 | v->Outbpp = TruncToValidBPP( |
4384 | (1.0 - v->Downspreading / 100.0) * 13500, |
4385 | v->OutputLinkDPLanes[k], |
4386 | v->HTotal[k], |
4387 | v->HActive[k], |
4388 | v->PixelClockBackEnd[k], |
4389 | v->ForcedOutputLinkBPP[k], |
4390 | v->LinkDSCEnable, |
4391 | v->Output[k], |
4392 | v->OutputFormat[k], |
4393 | v->DSCInputBitPerComponent[k], |
4394 | v->NumberOfDSCSlices[k], |
4395 | v->AudioSampleRate[k], |
4396 | v->AudioSampleLayout[k], |
4397 | v->ODMCombineEnablePerState[i][k]); |
4398 | if (v->Outbpp == BPP_INVALID0 && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 && |
4399 | v->DSCEnable[k] == true1 && v->ForcedOutputLinkBPP[k] == 0) { |
4400 | v->RequiresDSC[i][k] = true1; |
4401 | v->LinkDSCEnable = true1; |
4402 | v->Outbpp = TruncToValidBPP( |
4403 | (1.0 - v->Downspreading / 100.0) * 13500, |
4404 | v->OutputLinkDPLanes[k], |
4405 | v->HTotal[k], |
4406 | v->HActive[k], |
4407 | v->PixelClockBackEnd[k], |
4408 | v->ForcedOutputLinkBPP[k], |
4409 | v->LinkDSCEnable, |
4410 | v->Output[k], |
4411 | v->OutputFormat[k], |
4412 | v->DSCInputBitPerComponent[k], |
4413 | v->NumberOfDSCSlices[k], |
4414 | v->AudioSampleRate[k], |
4415 | v->AudioSampleLayout[k], |
4416 | v->ODMCombineEnablePerState[i][k]); |
4417 | } |
4418 | v->OutputBppPerState[i][k] = v->Outbpp; |
4419 | // TODO: Need some other way to handle this nonsense |
4420 | // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5" |
4421 | } |
4422 | if (v->Outbpp == BPP_INVALID0 && |
4423 | (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) && |
4424 | v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) { |
4425 | v->Outbpp = TruncToValidBPP( |
4426 | (1.0 - v->Downspreading / 100.0) * 20000, |
4427 | v->OutputLinkDPLanes[k], |
4428 | v->HTotal[k], |
4429 | v->HActive[k], |
4430 | v->PixelClockBackEnd[k], |
4431 | v->ForcedOutputLinkBPP[k], |
4432 | v->LinkDSCEnable, |
4433 | v->Output[k], |
4434 | v->OutputFormat[k], |
4435 | v->DSCInputBitPerComponent[k], |
4436 | v->NumberOfDSCSlices[k], |
4437 | v->AudioSampleRate[k], |
4438 | v->AudioSampleLayout[k], |
4439 | v->ODMCombineEnablePerState[i][k]); |
4440 | if (v->Outbpp == BPP_INVALID0 && v->DSCEnable[k] == true1 && |
4441 | v->ForcedOutputLinkBPP[k] == 0) { |
4442 | v->RequiresDSC[i][k] = true1; |
4443 | v->LinkDSCEnable = true1; |
4444 | v->Outbpp = TruncToValidBPP( |
4445 | (1.0 - v->Downspreading / 100.0) * 20000, |
4446 | v->OutputLinkDPLanes[k], |
4447 | v->HTotal[k], |
4448 | v->HActive[k], |
4449 | v->PixelClockBackEnd[k], |
4450 | v->ForcedOutputLinkBPP[k], |
4451 | v->LinkDSCEnable, |
4452 | v->Output[k], |
4453 | v->OutputFormat[k], |
4454 | v->DSCInputBitPerComponent[k], |
4455 | v->NumberOfDSCSlices[k], |
4456 | v->AudioSampleRate[k], |
4457 | v->AudioSampleLayout[k], |
4458 | v->ODMCombineEnablePerState[i][k]); |
4459 | } |
4460 | v->OutputBppPerState[i][k] = v->Outbpp; |
4461 | // TODO: Need some other way to handle this nonsense |
4462 | // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20" |
4463 | } |
4464 | } else { |
4465 | v->Outbpp = BPP_INVALID0; |
4466 | if (v->PHYCLKPerState[i] >= 270.0) { |
4467 | v->Outbpp = TruncToValidBPP( |
4468 | (1.0 - v->Downspreading / 100.0) * 2700, |
4469 | v->OutputLinkDPLanes[k], |
4470 | v->HTotal[k], |
4471 | v->HActive[k], |
4472 | v->PixelClockBackEnd[k], |
4473 | v->ForcedOutputLinkBPP[k], |
4474 | v->LinkDSCEnable, |
4475 | v->Output[k], |
4476 | v->OutputFormat[k], |
4477 | v->DSCInputBitPerComponent[k], |
4478 | v->NumberOfDSCSlices[k], |
4479 | v->AudioSampleRate[k], |
4480 | v->AudioSampleLayout[k], |
4481 | v->ODMCombineEnablePerState[i][k]); |
4482 | v->OutputBppPerState[i][k] = v->Outbpp; |
4483 | // TODO: Need some other way to handle this nonsense |
4484 | // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" |
4485 | } |
4486 | if (v->Outbpp == BPP_INVALID0 && v->PHYCLKPerState[i] >= 540.0) { |
4487 | v->Outbpp = TruncToValidBPP( |
4488 | (1.0 - v->Downspreading / 100.0) * 5400, |
4489 | v->OutputLinkDPLanes[k], |
4490 | v->HTotal[k], |
4491 | v->HActive[k], |
4492 | v->PixelClockBackEnd[k], |
4493 | v->ForcedOutputLinkBPP[k], |
4494 | v->LinkDSCEnable, |
4495 | v->Output[k], |
4496 | v->OutputFormat[k], |
4497 | v->DSCInputBitPerComponent[k], |
4498 | v->NumberOfDSCSlices[k], |
4499 | v->AudioSampleRate[k], |
4500 | v->AudioSampleLayout[k], |
4501 | v->ODMCombineEnablePerState[i][k]); |
4502 | v->OutputBppPerState[i][k] = v->Outbpp; |
4503 | // TODO: Need some other way to handle this nonsense |
4504 | // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" |
4505 | } |
4506 | if (v->Outbpp == BPP_INVALID0 && v->PHYCLKPerState[i] >= 810.0) { |
4507 | v->Outbpp = TruncToValidBPP( |
4508 | (1.0 - v->Downspreading / 100.0) * 8100, |
4509 | v->OutputLinkDPLanes[k], |
4510 | v->HTotal[k], |
4511 | v->HActive[k], |
4512 | v->PixelClockBackEnd[k], |
4513 | v->ForcedOutputLinkBPP[k], |
4514 | v->LinkDSCEnable, |
4515 | v->Output[k], |
4516 | v->OutputFormat[k], |
4517 | v->DSCInputBitPerComponent[k], |
4518 | v->NumberOfDSCSlices[k], |
4519 | v->AudioSampleRate[k], |
4520 | v->AudioSampleLayout[k], |
4521 | v->ODMCombineEnablePerState[i][k]); |
4522 | v->OutputBppPerState[i][k] = v->Outbpp; |
4523 | // TODO: Need some other way to handle this nonsense |
4524 | // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" |
4525 | } |
4526 | } |
4527 | } |
4528 | } else { |
4529 | v->OutputBppPerState[i][k] = 0; |
4530 | } |
4531 | } |
4532 | } |
4533 | |
4534 | for (i = 0; i < v->soc.num_states; i++) { |
4535 | v->LinkCapacitySupport[i] = true1; |
4536 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4537 | if (v->BlendingAndTiming[k] == k |
4538 | && (v->Output[k] == dm_dp || |
4539 | v->Output[k] == dm_edp || |
4540 | v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { |
4541 | v->LinkCapacitySupport[i] = false0; |
4542 | } |
4543 | } |
4544 | } |
4545 | |
4546 | // UPTO 2172 |
4547 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4548 | if (v->BlendingAndTiming[k] == k |
4549 | && (v->Output[k] == dm_dp || |
4550 | v->Output[k] == dm_edp || |
4551 | v->Output[k] == dm_hdmi)) { |
4552 | if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true1) { |
4553 | P2IWith420 = true1; |
4554 | } |
4555 | if (v->DSCEnable[k] == true1 && v->OutputFormat[k] == dm_n422 |
4556 | && !v->DSC422NativeSupport) { |
4557 | DSC422NativeNotSupported = true1; |
4558 | } |
4559 | } |
4560 | } |
4561 | |
4562 | for (i = 0; i < v->soc.num_states; ++i) { |
4563 | v->ODMCombine4To1SupportCheckOK[i] = true1; |
4564 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4565 | if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 |
4566 | && (v->ODMCombine4To1Supported == false0 || v->Output[k] == dm_dp || v->Output[k] == dm_edp |
4567 | || v->Output[k] == dm_hdmi)) { |
4568 | v->ODMCombine4To1SupportCheckOK[i] = false0; |
4569 | } |
4570 | } |
4571 | } |
4572 | |
4573 | /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ |
4574 | |
4575 | for (i = 0; i < v->soc.num_states; i++) { |
4576 | v->NotEnoughDSCUnits[i] = false0; |
4577 | v->TotalDSCUnitsRequired = 0.0; |
4578 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4579 | if (v->RequiresDSC[i][k] == true1) { |
4580 | if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { |
4581 | v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; |
4582 | } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { |
4583 | v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; |
4584 | } else { |
4585 | v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; |
4586 | } |
4587 | } |
4588 | } |
4589 | if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { |
4590 | v->NotEnoughDSCUnits[i] = true1; |
4591 | } |
4592 | } |
4593 | /*DSC Delay per state*/ |
4594 | |
4595 | for (i = 0; i < v->soc.num_states; i++) { |
4596 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4597 | if (v->OutputBppPerState[i][k] == BPP_INVALID0) { |
4598 | v->BPP = 0.0; |
4599 | } else { |
4600 | v->BPP = v->OutputBppPerState[i][k]; |
4601 | } |
4602 | if (v->RequiresDSC[i][k] == true1 && v->BPP != 0.0) { |
4603 | if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { |
4604 | v->DSCDelayPerState[i][k] = dscceComputeDelay( |
4605 | v->DSCInputBitPerComponent[k], |
4606 | v->BPP, |
4607 | dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), |
4608 | v->NumberOfDSCSlices[k], |
4609 | v->OutputFormat[k], |
4610 | v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); |
4611 | } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { |
4612 | v->DSCDelayPerState[i][k] = 2.0 |
4613 | * (dscceComputeDelay( |
4614 | v->DSCInputBitPerComponent[k], |
4615 | v->BPP, |
4616 | dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), |
4617 | v->NumberOfDSCSlices[k] / 2, |
4618 | v->OutputFormat[k], |
4619 | v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); |
4620 | } else { |
4621 | v->DSCDelayPerState[i][k] = 4.0 |
4622 | * (dscceComputeDelay( |
4623 | v->DSCInputBitPerComponent[k], |
4624 | v->BPP, |
4625 | dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), |
4626 | v->NumberOfDSCSlices[k] / 4, |
4627 | v->OutputFormat[k], |
4628 | v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); |
4629 | } |
4630 | v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; |
4631 | } else { |
4632 | v->DSCDelayPerState[i][k] = 0.0; |
4633 | } |
4634 | } |
4635 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4636 | for (m = 0; m < v->NumberOfActivePlanes; m++) { |
4637 | if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true1) { |
4638 | v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; |
4639 | } |
4640 | } |
4641 | } |
4642 | } |
4643 | |
4644 | //Calculate Swath, DET Configuration, DCFCLKDeepSleep |
4645 | // |
4646 | for (i = 0; i < v->soc.num_states; ++i) { |
4647 | for (j = 0; j <= 1; ++j) { |
4648 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4649 | v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; |
4650 | v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; |
4651 | v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; |
4652 | } |
4653 | |
4654 | if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0]) |
4655 | PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte); |
4656 | CalculateSwathAndDETConfiguration( |
4657 | false0, |
4658 | v->NumberOfActivePlanes, |
4659 | mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], |
4660 | v->DETBufferSizeInKByte, |
4661 | v->MaximumSwathWidthLuma, |
4662 | v->MaximumSwathWidthChroma, |
4663 | v->SourceScan, |
4664 | v->SourcePixelFormat, |
4665 | v->SurfaceTiling, |
4666 | v->ViewportWidth, |
4667 | v->ViewportHeight, |
4668 | v->SurfaceWidthY, |
4669 | v->SurfaceWidthC, |
4670 | v->SurfaceHeightY, |
4671 | v->SurfaceHeightC, |
4672 | v->Read256BlockHeightY, |
4673 | v->Read256BlockHeightC, |
4674 | v->Read256BlockWidthY, |
4675 | v->Read256BlockWidthC, |
4676 | v->ODMCombineEnableThisState, |
4677 | v->BlendingAndTiming, |
4678 | v->BytePerPixelY, |
4679 | v->BytePerPixelC, |
4680 | v->BytePerPixelInDETY, |
4681 | v->BytePerPixelInDETC, |
4682 | v->HActive, |
4683 | v->HRatio, |
4684 | v->HRatioChroma, |
4685 | v->NoOfDPPThisState, |
4686 | v->swath_width_luma_ub_this_state, |
4687 | v->swath_width_chroma_ub_this_state, |
4688 | v->SwathWidthYThisState, |
4689 | v->SwathWidthCThisState, |
4690 | v->SwathHeightYThisState, |
4691 | v->SwathHeightCThisState, |
4692 | v->DETBufferSizeYThisState, |
4693 | v->DETBufferSizeCThisState, |
4694 | v->dummystring, |
4695 | &v->ViewportSizeSupport[i][j]); |
4696 | |
4697 | CalculateDCFCLKDeepSleep( |
4698 | mode_lib, |
4699 | v->NumberOfActivePlanes, |
4700 | v->BytePerPixelY, |
4701 | v->BytePerPixelC, |
4702 | v->VRatio, |
4703 | v->VRatioChroma, |
4704 | v->SwathWidthYThisState, |
4705 | v->SwathWidthCThisState, |
4706 | v->NoOfDPPThisState, |
4707 | v->HRatio, |
4708 | v->HRatioChroma, |
4709 | v->PixelClock, |
4710 | v->PSCL_FACTOR, |
4711 | v->PSCL_FACTOR_CHROMA, |
4712 | v->RequiredDPPCLKThisState, |
4713 | v->ReadBandwidthLuma, |
4714 | v->ReadBandwidthChroma, |
4715 | v->ReturnBusWidth, |
4716 | &v->ProjectedDCFCLKDeepSleep[i][j]); |
4717 | |
4718 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4719 | v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; |
4720 | v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; |
4721 | v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; |
4722 | v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; |
4723 | v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; |
4724 | v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; |
4725 | v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; |
4726 | v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; |
4727 | } |
4728 | } |
4729 | } |
4730 | |
4731 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4732 | v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 |
4733 | / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; |
4734 | } |
4735 | |
4736 | for (i = 0; i < v->soc.num_states; i++) { |
4737 | for (j = 0; j < 2; j++) { |
4738 | bool_Bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX8]; |
4739 | |
4740 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4741 | v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; |
4742 | v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; |
4743 | v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; |
4744 | v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; |
4745 | v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; |
4746 | v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; |
4747 | v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; |
4748 | v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; |
4749 | } |
4750 | |
4751 | v->TotalNumberOfDCCActiveDPP[i][j] = 0; |
4752 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4753 | if (v->DCCEnable[k] == true1) { |
4754 | v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; |
4755 | } |
4756 | } |
4757 | |
4758 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4759 | if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 |
4760 | || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { |
4761 | |
4762 | if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) |
4763 | && v->SourceScan[k] != dm_vert) { |
4764 | v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) |
4765 | / 2; |
4766 | v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; |
4767 | } else { |
4768 | v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; |
4769 | v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; |
4770 | } |
4771 | |
4772 | v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( |
4773 | mode_lib, |
4774 | v->DCCEnable[k], |
4775 | v->Read256BlockHeightC[k], |
4776 | v->Read256BlockWidthC[k], |
4777 | v->SourcePixelFormat[k], |
4778 | v->SurfaceTiling[k], |
4779 | v->BytePerPixelC[k], |
4780 | v->SourceScan[k], |
4781 | v->SwathWidthCThisState[k], |
4782 | v->ViewportHeightChroma[k], |
4783 | v->GPUVMEnable, |
4784 | v->HostVMEnable, |
4785 | v->HostVMMaxNonCachedPageTableLevels, |
4786 | v->GPUVMMinPageSize, |
4787 | v->HostVMMinPageSize, |
4788 | v->PTEBufferSizeInRequestsForChroma, |
4789 | v->PitchC[k], |
4790 | 0.0, |
4791 | &v->MacroTileWidthC[k], |
4792 | &v->MetaRowBytesC, |
4793 | &v->DPTEBytesPerRowC, |
4794 | &v->PTEBufferSizeNotExceededC[i][j][k], |
4795 | &v->dummyinteger7, |
4796 | &v->dpte_row_height_chroma[k], |
4797 | &v->dummyinteger28, |
4798 | &v->dummyinteger26, |
4799 | &v->dummyinteger23, |
4800 | &v->meta_row_height_chroma[k], |
4801 | &v->dummyinteger8, |
4802 | &v->dummyinteger9, |
4803 | &v->dummyinteger19, |
4804 | &v->dummyinteger20, |
4805 | &v->dummyinteger17, |
4806 | &v->dummyinteger10, |
4807 | &v->dummyinteger11); |
4808 | |
4809 | v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( |
4810 | mode_lib, |
4811 | v->VRatioChroma[k], |
4812 | v->VTAPsChroma[k], |
4813 | v->Interlace[k], |
4814 | v->ProgressiveToInterlaceUnitInOPP, |
4815 | v->SwathHeightCThisState[k], |
4816 | v->ViewportYStartC[k], |
4817 | &v->PrefillC[k], |
4818 | &v->MaxNumSwC[k]); |
4819 | } else { |
4820 | v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; |
4821 | v->PTEBufferSizeInRequestsForChroma = 0; |
4822 | v->PDEAndMetaPTEBytesPerFrameC = 0.0; |
4823 | v->MetaRowBytesC = 0.0; |
4824 | v->DPTEBytesPerRowC = 0.0; |
4825 | v->PrefetchLinesC[i][j][k] = 0.0; |
4826 | v->PTEBufferSizeNotExceededC[i][j][k] = true1; |
4827 | } |
4828 | v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( |
4829 | mode_lib, |
4830 | v->DCCEnable[k], |
4831 | v->Read256BlockHeightY[k], |
4832 | v->Read256BlockWidthY[k], |
4833 | v->SourcePixelFormat[k], |
4834 | v->SurfaceTiling[k], |
4835 | v->BytePerPixelY[k], |
4836 | v->SourceScan[k], |
4837 | v->SwathWidthYThisState[k], |
4838 | v->ViewportHeight[k], |
4839 | v->GPUVMEnable, |
4840 | v->HostVMEnable, |
4841 | v->HostVMMaxNonCachedPageTableLevels, |
4842 | v->GPUVMMinPageSize, |
4843 | v->HostVMMinPageSize, |
4844 | v->PTEBufferSizeInRequestsForLuma, |
4845 | v->PitchY[k], |
4846 | v->DCCMetaPitchY[k], |
4847 | &v->MacroTileWidthY[k], |
4848 | &v->MetaRowBytesY, |
4849 | &v->DPTEBytesPerRowY, |
4850 | &v->PTEBufferSizeNotExceededY[i][j][k], |
4851 | &v->dummyinteger7, |
4852 | &v->dpte_row_height[k], |
4853 | &v->dummyinteger29, |
4854 | &v->dummyinteger27, |
4855 | &v->dummyinteger24, |
4856 | &v->meta_row_height[k], |
4857 | &v->dummyinteger25, |
4858 | &v->dpte_group_bytes[k], |
4859 | &v->dummyinteger21, |
4860 | &v->dummyinteger22, |
4861 | &v->dummyinteger18, |
4862 | &v->dummyinteger5, |
4863 | &v->dummyinteger6); |
4864 | v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( |
4865 | mode_lib, |
4866 | v->VRatio[k], |
4867 | v->vtaps[k], |
4868 | v->Interlace[k], |
4869 | v->ProgressiveToInterlaceUnitInOPP, |
4870 | v->SwathHeightYThisState[k], |
4871 | v->ViewportYStartY[k], |
4872 | &v->PrefillY[k], |
4873 | &v->MaxNumSwY[k]); |
4874 | v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; |
4875 | v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; |
4876 | v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; |
4877 | |
4878 | CalculateRowBandwidth( |
4879 | v->GPUVMEnable, |
4880 | v->SourcePixelFormat[k], |
4881 | v->VRatio[k], |
4882 | v->VRatioChroma[k], |
4883 | v->DCCEnable[k], |
4884 | v->HTotal[k] / v->PixelClock[k], |
4885 | v->MetaRowBytesY, |
4886 | v->MetaRowBytesC, |
4887 | v->meta_row_height[k], |
4888 | v->meta_row_height_chroma[k], |
4889 | v->DPTEBytesPerRowY, |
4890 | v->DPTEBytesPerRowC, |
4891 | v->dpte_row_height[k], |
4892 | v->dpte_row_height_chroma[k], |
4893 | &v->meta_row_bandwidth[i][j][k], |
4894 | &v->dpte_row_bandwidth[i][j][k]); |
4895 | } |
4896 | /*DCCMetaBufferSizeSupport(i, j) = True |
4897 | For k = 0 To NumberOfActivePlanes - 1 |
4898 | If MetaRowBytes(i, j, k) > 24064 Then |
4899 | DCCMetaBufferSizeSupport(i, j) = False |
4900 | End If |
4901 | Next k*/ |
4902 | v->DCCMetaBufferSizeSupport[i][j] = true1; |
4903 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4904 | if (v->MetaRowBytes[i][j][k] > 24064) |
4905 | v->DCCMetaBufferSizeSupport[i][j] = false0; |
4906 | } |
4907 | v->UrgLatency[i] = CalculateUrgentLatency( |
4908 | v->UrgentLatencyPixelDataOnly, |
4909 | v->UrgentLatencyPixelMixedWithVMData, |
4910 | v->UrgentLatencyVMDataOnly, |
4911 | v->DoUrgentLatencyAdjustment, |
4912 | v->UrgentLatencyAdjustmentFabricClockComponent, |
4913 | v->UrgentLatencyAdjustmentFabricClockReference, |
4914 | v->FabricClockPerState[i]); |
4915 | |
4916 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4917 | CalculateUrgentBurstFactor( |
4918 | v->swath_width_luma_ub_this_state[k], |
4919 | v->swath_width_chroma_ub_this_state[k], |
4920 | v->SwathHeightYThisState[k], |
4921 | v->SwathHeightCThisState[k], |
4922 | v->HTotal[k] / v->PixelClock[k], |
4923 | v->UrgLatency[i], |
4924 | v->CursorBufferSize, |
4925 | v->CursorWidth[k][0], |
4926 | v->CursorBPP[k][0], |
4927 | v->VRatio[k], |
4928 | v->VRatioChroma[k], |
4929 | v->BytePerPixelInDETY[k], |
4930 | v->BytePerPixelInDETC[k], |
4931 | v->DETBufferSizeYThisState[k], |
4932 | v->DETBufferSizeCThisState[k], |
4933 | &v->UrgentBurstFactorCursor[k], |
4934 | &v->UrgentBurstFactorLuma[k], |
4935 | &v->UrgentBurstFactorChroma[k], |
4936 | &NotUrgentLatencyHiding[k]); |
4937 | } |
4938 | |
4939 | v->NotEnoughUrgentLatencyHidingA[i][j] = false0; |
4940 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4941 | if (NotUrgentLatencyHiding[k]) { |
4942 | v->NotEnoughUrgentLatencyHidingA[i][j] = true1; |
4943 | } |
4944 | } |
4945 | |
4946 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4947 | v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] |
4948 | + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; |
4949 | v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; |
4950 | } |
4951 | |
4952 | v->TotalVActivePixelBandwidth[i][j] = 0; |
4953 | v->TotalVActiveCursorBandwidth[i][j] = 0; |
4954 | v->TotalMetaRowBandwidth[i][j] = 0; |
4955 | v->TotalDPTERowBandwidth[i][j] = 0; |
4956 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
4957 | v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; |
4958 | v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; |
4959 | v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; |
4960 | v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; |
4961 | } |
4962 | } |
4963 | } |
4964 | |
4965 | //Calculate Return BW |
4966 | for (i = 0; i < v->soc.num_states; ++i) { |
4967 | for (j = 0; j <= 1; ++j) { |
4968 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
4969 | if (v->BlendingAndTiming[k] == k) { |
4970 | if (v->WritebackEnable[k] == true1) { |
4971 | v->WritebackDelayTime[k] = v->WritebackLatency |
4972 | + CalculateWriteBackDelay( |
4973 | v->WritebackPixelFormat[k], |
4974 | v->WritebackHRatio[k], |
4975 | v->WritebackVRatio[k], |
4976 | v->WritebackVTaps[k], |
4977 | v->WritebackDestinationWidth[k], |
4978 | v->WritebackDestinationHeight[k], |
4979 | v->WritebackSourceHeight[k], |
4980 | v->HTotal[k]) / v->RequiredDISPCLK[i][j]; |
4981 | } else { |
4982 | v->WritebackDelayTime[k] = 0.0; |
4983 | } |
4984 | for (m = 0; m < v->NumberOfActivePlanes; m++) { |
4985 | if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true1) { |
4986 | v->WritebackDelayTime[k] = dml_max( |
4987 | v->WritebackDelayTime[k], |
4988 | v->WritebackLatency |
4989 | + CalculateWriteBackDelay( |
4990 | v->WritebackPixelFormat[m], |
4991 | v->WritebackHRatio[m], |
4992 | v->WritebackVRatio[m], |
4993 | v->WritebackVTaps[m], |
4994 | v->WritebackDestinationWidth[m], |
4995 | v->WritebackDestinationHeight[m], |
4996 | v->WritebackSourceHeight[m], |
4997 | v->HTotal[m]) / v->RequiredDISPCLK[i][j]); |
4998 | } |
4999 | } |
5000 | } |
5001 | } |
5002 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5003 | for (m = 0; m < v->NumberOfActivePlanes; m++) { |
5004 | if (v->BlendingAndTiming[k] == m) { |
5005 | v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; |
5006 | } |
5007 | } |
5008 | } |
5009 | v->MaxMaxVStartup[i][j] = 0; |
5010 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5011 | v->MaximumVStartup[i][j][k] = |
5012 | (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? |
5013 | dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : |
5014 | v->VTotal[k] - v->VActive[k] |
5015 | - dml_max( |
5016 | 1.0, |
5017 | dml_ceil( |
5018 | 1.0 * v->WritebackDelayTime[k] |
5019 | / (v->HTotal[k] |
5020 | / v->PixelClock[k]), |
5021 | 1.0)); |
5022 | if (v->MaximumVStartup[i][j][k] > 1023) |
5023 | v->MaximumVStartup[i][j][k] = 1023; |
5024 | v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); |
5025 | } |
5026 | } |
5027 | } |
5028 | |
5029 | ReorderingBytes = v->NumberOfChannels |
5030 | * dml_max3( |
5031 | v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, |
5032 | v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, |
5033 | v->UrgentOutOfOrderReturnPerChannelVMDataOnly); |
5034 | |
5035 | for (i = 0; i < v->soc.num_states; ++i) { |
5036 | for (j = 0; j <= 1; ++j) { |
5037 | v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; |
5038 | } |
5039 | } |
5040 | |
5041 | if (v->UseMinimumRequiredDCFCLK == true1) |
5042 | UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); |
5043 | |
5044 | for (i = 0; i < v->soc.num_states; ++i) { |
5045 | for (j = 0; j <= 1; ++j) { |
5046 | double IdealFabricAndSDPPortBandwidthPerState = dml_min( |
5047 | v->ReturnBusWidth * v->DCFCLKState[i][j], |
5048 | v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); |
5049 | double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; |
5050 | double PixelDataOnlyReturnBWPerState = dml_min( |
5051 | IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, |
5052 | IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); |
5053 | double PixelMixedWithVMDataReturnBWPerState = dml_min( |
5054 | IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, |
5055 | IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); |
5056 | |
5057 | if (v->HostVMEnable != true1) { |
5058 | v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; |
5059 | } else { |
5060 | v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; |
5061 | } |
5062 | } |
5063 | } |
5064 | |
5065 | //Re-ordering Buffer Support Check |
5066 | for (i = 0; i < v->soc.num_states; ++i) { |
5067 | for (j = 0; j <= 1; ++j) { |
5068 | if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] |
5069 | > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__(7 + 95)) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { |
5070 | v->ROBSupport[i][j] = true1; |
5071 | } else { |
5072 | v->ROBSupport[i][j] = false0; |
5073 | } |
5074 | } |
5075 | } |
5076 | |
5077 | //Vertical Active BW support check |
5078 | |
5079 | MaxTotalVActiveRDBandwidth = 0; |
5080 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5081 | MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; |
5082 | } |
5083 | |
5084 | for (i = 0; i < v->soc.num_states; ++i) { |
5085 | for (j = 0; j <= 1; ++j) { |
5086 | v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( |
5087 | dml_min( |
5088 | v->ReturnBusWidth * v->DCFCLKState[i][j], |
5089 | v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) |
5090 | * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, |
5091 | v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth |
5092 | * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); |
5093 | |
5094 | if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { |
5095 | v->TotalVerticalActiveBandwidthSupport[i][j] = true1; |
5096 | } else { |
5097 | v->TotalVerticalActiveBandwidthSupport[i][j] = false0; |
5098 | } |
5099 | } |
5100 | } |
5101 | |
5102 | v->UrgentLatency = CalculateUrgentLatency( |
5103 | v->UrgentLatencyPixelDataOnly, |
5104 | v->UrgentLatencyPixelMixedWithVMData, |
5105 | v->UrgentLatencyVMDataOnly, |
5106 | v->DoUrgentLatencyAdjustment, |
5107 | v->UrgentLatencyAdjustmentFabricClockComponent, |
5108 | v->UrgentLatencyAdjustmentFabricClockReference, |
5109 | v->FabricClock); |
5110 | //Prefetch Check |
5111 | for (i = 0; i < v->soc.num_states; ++i) { |
5112 | for (j = 0; j <= 1; ++j) { |
5113 | double VMDataOnlyReturnBWPerState; |
5114 | double HostVMInefficiencyFactor = 1; |
5115 | int NextPrefetchModeState = MinPrefetchMode; |
5116 | bool_Bool UnboundedRequestEnabledThisState = false0; |
5117 | int CompressedBufferSizeInkByteThisState = 0; |
5118 | double dummy; |
5119 | |
5120 | v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; |
5121 | |
5122 | v->BandwidthWithoutPrefetchSupported[i][j] = true1; |
5123 | if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] |
5124 | + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { |
5125 | v->BandwidthWithoutPrefetchSupported[i][j] = false0; |
5126 | } |
5127 | |
5128 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5129 | v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; |
5130 | v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; |
5131 | v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; |
5132 | v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; |
5133 | v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; |
5134 | v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; |
5135 | v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; |
5136 | v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; |
5137 | v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; |
5138 | } |
5139 | |
5140 | VMDataOnlyReturnBWPerState = dml_min( |
5141 | dml_min( |
5142 | v->ReturnBusWidth * v->DCFCLKState[i][j], |
5143 | v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) |
5144 | * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, |
5145 | v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth |
5146 | * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); |
5147 | if (v->GPUVMEnable && v->HostVMEnable) |
5148 | HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; |
5149 | |
5150 | v->ExtraLatency = CalculateExtraLatency( |
5151 | v->RoundTripPingLatencyCycles, |
5152 | ReorderingBytes, |
5153 | v->DCFCLKState[i][j], |
5154 | v->TotalNumberOfActiveDPP[i][j], |
5155 | v->PixelChunkSizeInKByte, |
5156 | v->TotalNumberOfDCCActiveDPP[i][j], |
5157 | v->MetaChunkSize, |
5158 | v->ReturnBWPerState[i][j], |
5159 | v->GPUVMEnable, |
5160 | v->HostVMEnable, |
5161 | v->NumberOfActivePlanes, |
5162 | v->NoOfDPPThisState, |
5163 | v->dpte_group_bytes, |
5164 | HostVMInefficiencyFactor, |
5165 | v->HostVMMinPageSize, |
5166 | v->HostVMMaxNonCachedPageTableLevels); |
5167 | |
5168 | v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; |
5169 | do { |
5170 | v->PrefetchModePerState[i][j] = NextPrefetchModeState; |
5171 | v->MaxVStartup = v->NextMaxVStartup; |
5172 | |
5173 | v->TWait = CalculateTWait( |
5174 | v->PrefetchModePerState[i][j], |
5175 | v->DRAMClockChangeLatency, |
5176 | v->UrgLatency[i], |
5177 | v->SREnterPlusExitTime); |
5178 | |
5179 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5180 | CalculatePrefetchSchedulePerPlane(mode_lib, |
5181 | HostVMInefficiencyFactor, |
5182 | i, j, k); |
5183 | } |
5184 | |
5185 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5186 | CalculateUrgentBurstFactor( |
5187 | v->swath_width_luma_ub_this_state[k], |
5188 | v->swath_width_chroma_ub_this_state[k], |
5189 | v->SwathHeightYThisState[k], |
5190 | v->SwathHeightCThisState[k], |
5191 | v->HTotal[k] / v->PixelClock[k], |
5192 | v->UrgentLatency, |
5193 | v->CursorBufferSize, |
5194 | v->CursorWidth[k][0], |
5195 | v->CursorBPP[k][0], |
5196 | v->VRatioPreY[i][j][k], |
5197 | v->VRatioPreC[i][j][k], |
5198 | v->BytePerPixelInDETY[k], |
5199 | v->BytePerPixelInDETC[k], |
5200 | v->DETBufferSizeYThisState[k], |
5201 | v->DETBufferSizeCThisState[k], |
5202 | &v->UrgentBurstFactorCursorPre[k], |
5203 | &v->UrgentBurstFactorLumaPre[k], |
5204 | &v->UrgentBurstFactorChromaPre[k], |
5205 | &v->NotUrgentLatencyHidingPre[k]); |
5206 | } |
5207 | |
5208 | v->MaximumReadBandwidthWithPrefetch = 0.0; |
5209 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5210 | v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 |
5211 | / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; |
5212 | |
5213 | v->MaximumReadBandwidthWithPrefetch = |
5214 | v->MaximumReadBandwidthWithPrefetch |
5215 | + dml_max3( |
5216 | v->VActivePixelBandwidth[i][j][k] |
5217 | + v->VActiveCursorBandwidth[i][j][k] |
5218 | + v->NoOfDPP[i][j][k] |
5219 | * (v->meta_row_bandwidth[i][j][k] |
5220 | + v->dpte_row_bandwidth[i][j][k]), |
5221 | v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], |
5222 | v->NoOfDPP[i][j][k] |
5223 | * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] |
5224 | * v->UrgentBurstFactorLumaPre[k] |
5225 | + v->RequiredPrefetchPixelDataBWChroma[i][j][k] |
5226 | * v->UrgentBurstFactorChromaPre[k]) |
5227 | + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); |
5228 | } |
5229 | |
5230 | v->NotEnoughUrgentLatencyHidingPre = false0; |
5231 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5232 | if (v->NotUrgentLatencyHidingPre[k] == true1) { |
5233 | v->NotEnoughUrgentLatencyHidingPre = true1; |
5234 | } |
5235 | } |
5236 | |
5237 | v->PrefetchSupported[i][j] = true1; |
5238 | if (v->BandwidthWithoutPrefetchSupported[i][j] == false0 || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] |
5239 | || v->NotEnoughUrgentLatencyHidingPre == 1) { |
5240 | v->PrefetchSupported[i][j] = false0; |
5241 | } |
5242 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5243 | if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 |
5244 | || v->NoTimeForPrefetch[i][j][k] == true1) { |
5245 | v->PrefetchSupported[i][j] = false0; |
5246 | } |
5247 | } |
5248 | |
5249 | v->DynamicMetadataSupported[i][j] = true1; |
5250 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5251 | if (v->NoTimeForDynamicMetadata[i][j][k] == true1) { |
5252 | v->DynamicMetadataSupported[i][j] = false0; |
5253 | } |
5254 | } |
5255 | |
5256 | v->VRatioInPrefetchSupported[i][j] = true1; |
5257 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5258 | if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true1) { |
5259 | v->VRatioInPrefetchSupported[i][j] = false0; |
5260 | } |
5261 | } |
5262 | v->AnyLinesForVMOrRowTooLarge = false0; |
5263 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5264 | if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { |
5265 | v->AnyLinesForVMOrRowTooLarge = true1; |
5266 | } |
5267 | } |
5268 | |
5269 | v->NextPrefetchMode = v->NextPrefetchMode + 1; |
5270 | |
5271 | if (v->PrefetchSupported[i][j] == true1 && v->VRatioInPrefetchSupported[i][j] == true1) { |
5272 | v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; |
5273 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5274 | v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip |
5275 | - dml_max( |
5276 | v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], |
5277 | v->NoOfDPP[i][j][k] |
5278 | * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] |
5279 | * v->UrgentBurstFactorLumaPre[k] |
5280 | + v->RequiredPrefetchPixelDataBWChroma[i][j][k] |
5281 | * v->UrgentBurstFactorChromaPre[k]) |
5282 | + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); |
5283 | } |
5284 | v->TotImmediateFlipBytes = 0.0; |
5285 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5286 | v->TotImmediateFlipBytes = v->TotImmediateFlipBytes |
5287 | + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] |
5288 | + v->DPTEBytesPerRow[i][j][k]; |
5289 | } |
5290 | |
5291 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5292 | CalculateFlipSchedule( |
5293 | mode_lib, |
5294 | k, |
5295 | HostVMInefficiencyFactor, |
5296 | v->ExtraLatency, |
5297 | v->UrgLatency[i], |
5298 | v->PDEAndMetaPTEBytesPerFrame[i][j][k], |
5299 | v->MetaRowBytes[i][j][k], |
5300 | v->DPTEBytesPerRow[i][j][k]); |
5301 | } |
5302 | v->total_dcn_read_bw_with_flip = 0.0; |
5303 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5304 | v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip |
5305 | + dml_max3( |
5306 | v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], |
5307 | v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] |
5308 | + v->VActiveCursorBandwidth[i][j][k], |
5309 | v->NoOfDPP[i][j][k] |
5310 | * (v->final_flip_bw[k] |
5311 | + v->RequiredPrefetchPixelDataBWLuma[i][j][k] |
5312 | * v->UrgentBurstFactorLumaPre[k] |
5313 | + v->RequiredPrefetchPixelDataBWChroma[i][j][k] |
5314 | * v->UrgentBurstFactorChromaPre[k]) |
5315 | + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); |
5316 | } |
5317 | v->ImmediateFlipSupportedForState[i][j] = true1; |
5318 | if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { |
5319 | v->ImmediateFlipSupportedForState[i][j] = false0; |
5320 | } |
5321 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5322 | if (v->ImmediateFlipSupportedForPipe[k] == false0) { |
5323 | v->ImmediateFlipSupportedForState[i][j] = false0; |
5324 | } |
5325 | } |
5326 | } else { |
5327 | v->ImmediateFlipSupportedForState[i][j] = false0; |
5328 | } |
5329 | |
5330 | if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__9 || v->AnyLinesForVMOrRowTooLarge == false0) { |
5331 | v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; |
5332 | NextPrefetchModeState = NextPrefetchModeState + 1; |
5333 | } else { |
5334 | v->NextMaxVStartup = v->NextMaxVStartup - 1; |
5335 | } |
5336 | v->NextPrefetchMode = v->NextPrefetchMode + 1; |
5337 | } while (!((v->PrefetchSupported[i][j] == true1 && v->DynamicMetadataSupported[i][j] == true1 && v->VRatioInPrefetchSupported[i][j] == true1 |
5338 | && ((v->HostVMEnable == false0 && |
5339 | v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) |
5340 | || v->ImmediateFlipSupportedForState[i][j] == true1)) |
5341 | || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); |
5342 | |
5343 | CalculateUnboundedRequestAndCompressedBufferSize( |
5344 | v->DETBufferSizeInKByte[0], |
5345 | v->ConfigReturnBufferSizeInKByte, |
5346 | v->UseUnboundedRequesting, |
5347 | v->TotalNumberOfActiveDPP[i][j], |
5348 | NoChroma, |
5349 | v->MaxNumDPP, |
5350 | v->CompressedBufferSegmentSizeInkByte, |
5351 | v->Output, |
5352 | &UnboundedRequestEnabledThisState, |
5353 | &CompressedBufferSizeInkByteThisState); |
5354 | |
5355 | CalculateWatermarksAndDRAMSpeedChangeSupport( |
5356 | mode_lib, |
5357 | v->PrefetchModePerState[i][j], |
5358 | v->DCFCLKState[i][j], |
5359 | v->ReturnBWPerState[i][j], |
5360 | v->UrgLatency[i], |
5361 | v->ExtraLatency, |
5362 | v->SOCCLKPerState[i], |
5363 | v->ProjectedDCFCLKDeepSleep[i][j], |
5364 | v->DETBufferSizeYThisState, |
5365 | v->DETBufferSizeCThisState, |
5366 | v->SwathHeightYThisState, |
5367 | v->SwathHeightCThisState, |
5368 | v->SwathWidthYThisState, |
5369 | v->SwathWidthCThisState, |
5370 | v->NoOfDPPThisState, |
5371 | v->BytePerPixelInDETY, |
5372 | v->BytePerPixelInDETC, |
5373 | UnboundedRequestEnabledThisState, |
5374 | CompressedBufferSizeInkByteThisState, |
5375 | &v->DRAMClockChangeSupport[i][j], |
5376 | &dummy, |
5377 | &dummy, |
5378 | &dummy, |
5379 | &dummy); |
5380 | } |
5381 | } |
5382 | |
5383 | /*PTE Buffer Size Check*/ |
5384 | for (i = 0; i < v->soc.num_states; i++) { |
5385 | for (j = 0; j < 2; j++) { |
5386 | v->PTEBufferSizeNotExceeded[i][j] = true1; |
5387 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5388 | if (v->PTEBufferSizeNotExceededY[i][j][k] == false0 || v->PTEBufferSizeNotExceededC[i][j][k] == false0) { |
5389 | v->PTEBufferSizeNotExceeded[i][j] = false0; |
5390 | } |
5391 | } |
5392 | } |
5393 | } |
5394 | |
5395 | /*Cursor Support Check*/ |
5396 | v->CursorSupport = true1; |
5397 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5398 | if (v->CursorWidth[k][0] > 0.0) { |
5399 | if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false0) { |
5400 | v->CursorSupport = false0; |
5401 | } |
5402 | } |
5403 | } |
5404 | |
5405 | /*Valid Pitch Check*/ |
5406 | v->PitchSupport = true1; |
5407 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5408 | v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); |
5409 | if (v->DCCEnable[k] == true1) { |
5410 | v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); |
5411 | } else { |
5412 | v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; |
5413 | } |
5414 | if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 |
5415 | && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe |
5416 | && v->SourcePixelFormat[k] != dm_mono_8) { |
5417 | v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); |
5418 | if (v->DCCEnable[k] == true1) { |
5419 | v->AlignedDCCMetaPitchC[k] = dml_ceil( |
5420 | dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), |
5421 | 64.0 * v->Read256BlockWidthC[k]); |
5422 | } else { |
5423 | v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; |
5424 | } |
5425 | } else { |
5426 | v->AlignedCPitch[k] = v->PitchC[k]; |
5427 | v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; |
5428 | } |
5429 | if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] |
5430 | || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { |
5431 | v->PitchSupport = false0; |
5432 | } |
5433 | } |
5434 | |
5435 | for (k = 0; k < v->NumberOfActivePlanes; k++) { |
5436 | if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { |
5437 | ViewportExceedsSurface = true1; |
5438 | if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 |
5439 | && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 |
5440 | && v->SourcePixelFormat[k] != dm_rgbe) { |
5441 | if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] |
5442 | || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { |
5443 | ViewportExceedsSurface = true1; |
5444 | } |
5445 | } |
5446 | } |
5447 | } |
5448 | |
5449 | /*Mode Support, Voltage State and SOC Configuration*/ |
5450 | for (i = v->soc.num_states - 1; i >= 0; i--) { |
5451 | for (j = 0; j < 2; j++) { |
5452 | if (v->ScaleRatioAndTapsSupport == true1 && v->SourceFormatPixelAndScanSupport == true1 && v->ViewportSizeSupport[i][j] == true1 |
5453 | && v->LinkCapacitySupport[i] == true1 && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP |
5454 | && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true1 && v->NotEnoughDSCUnits[i] == false0 |
5455 | && v->DTBCLKRequiredMoreThanSupported[i] == false0 |
5456 | && v->ROBSupport[i][j] == true1 && v->DISPCLK_DPPCLK_Support[i][j] == true1 |
5457 | && v->TotalAvailablePipesSupport[i][j] == true1 && EnoughWritebackUnits == true1 |
5458 | && v->WritebackLatencySupport == true1 && v->WritebackScaleRatioAndTapsSupport == true1 |
5459 | && v->CursorSupport == true1 && v->PitchSupport == true1 && ViewportExceedsSurface == false0 |
5460 | && v->PrefetchSupported[i][j] == true1 && v->DynamicMetadataSupported[i][j] == true1 |
5461 | && v->TotalVerticalActiveBandwidthSupport[i][j] == true1 && v->VRatioInPrefetchSupported[i][j] == true1 |
5462 | && v->PTEBufferSizeNotExceeded[i][j] == true1 && v->NonsupportedDSCInputBPC == false0 |
5463 | && ((v->HostVMEnable == false0 |
5464 | && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) |
5465 | || v->ImmediateFlipSupportedForState[i][j] == true1) |
5466 | && FMTBufferExceeded == false0) { |
5467 | v->ModeSupport[i][j] = true1; |
5468 | } else { |
5469 | v->ModeSupport[i][j] = false0; |
5470 | } |
5471 | } |
5472 | } |
5473 | |
5474 | { |
5475 | unsigned int MaximumMPCCombine = 0; |
5476 | for (i = v->soc.num_states; i >= 0; i--) { |
5477 | if (i == v->soc.num_states || v->ModeSupport[i][0] == true1 || v->ModeSupport[i][1] == true1) { |
5478 | v->VoltageLevel = i; |
5479 | v->ModeIsSupported = v->ModeSupport[i][0] == true1 || v->ModeSupport[i][1] == true1; |
5480 | if (v->ModeSupport[i][0] == true1) { |
5481 | MaximumMPCCombine = 0; |
5482 | } else { |
5483 | MaximumMPCCombine = 1; |
5484 | } |
5485 | } |
5486 | } |
5487 | v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; |
5488 | for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { |
5489 | v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; |
5490 | v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; |
5491 | } |
5492 | v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; |
5493 | v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; |
5494 | v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; |
5495 | v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; |
5496 | v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; |
5497 | v->maxMpcComb = MaximumMPCCombine; |
5498 | } |
5499 | } |
5500 | |
5501 | static void CalculateWatermarksAndDRAMSpeedChangeSupport( |
5502 | struct display_mode_lib *mode_lib, |
5503 | unsigned int PrefetchMode, |
5504 | double DCFCLK, |
5505 | double ReturnBW, |
5506 | double UrgentLatency, |
5507 | double ExtraLatency, |
5508 | double SOCCLK, |
5509 | double DCFCLKDeepSleep, |
5510 | unsigned int DETBufferSizeY[], |
5511 | unsigned int DETBufferSizeC[], |
5512 | unsigned int SwathHeightY[], |
5513 | unsigned int SwathHeightC[], |
5514 | double SwathWidthY[], |
5515 | double SwathWidthC[], |
5516 | unsigned int DPPPerPlane[], |
5517 | double BytePerPixelDETY[], |
5518 | double BytePerPixelDETC[], |
5519 | bool_Bool UnboundedRequestEnabled, |
5520 | int unsigned CompressedBufferSizeInkByte, |
5521 | enum clock_change_support *DRAMClockChangeSupport, |
5522 | double *StutterExitWatermark, |
5523 | double *StutterEnterPlusExitWatermark, |
5524 | double *Z8StutterExitWatermark, |
5525 | double *Z8StutterEnterPlusExitWatermark) |
5526 | { |
5527 | struct vba_vars_st *v = &mode_lib->vba; |
5528 | double EffectiveLBLatencyHidingY; |
5529 | double EffectiveLBLatencyHidingC; |
5530 | double LinesInDETY[DC__NUM_DPP__MAX8]; |
5531 | double LinesInDETC; |
5532 | unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX8]; |
5533 | unsigned int LinesInDETCRoundedDownToSwath; |
5534 | double FullDETBufferingTimeY; |
5535 | double FullDETBufferingTimeC; |
5536 | double ActiveDRAMClockChangeLatencyMarginY; |
5537 | double ActiveDRAMClockChangeLatencyMarginC; |
5538 | double WritebackDRAMClockChangeLatencyMargin; |
5539 | double PlaneWithMinActiveDRAMClockChangeMargin; |
5540 | double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; |
5541 | double WritebackDRAMClockChangeLatencyHiding; |
5542 | double TotalPixelBW = 0.0; |
5543 | int k, j; |
5544 | |
5545 | v->UrgentWatermark = UrgentLatency + ExtraLatency; |
5546 | |
5547 | #ifdef __DML_VBA_DEBUG__ |
5548 | dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency){do { } while(0); }; |
5549 | dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency){do { } while(0); }; |
5550 | dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark){do { } while(0); }; |
5551 | #endif |
5552 | |
5553 | v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; |
5554 | |
5555 | #ifdef __DML_VBA_DEBUG__ |
5556 | dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency){do { } while(0); }; |
5557 | dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark){do { } while(0); }; |
5558 | #endif |
5559 | |
5560 | v->TotalActiveWriteback = 0; |
5561 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5562 | if (v->WritebackEnable[k] == true1) { |
5563 | v->TotalActiveWriteback = v->TotalActiveWriteback + 1; |
5564 | } |
5565 | } |
5566 | |
5567 | if (v->TotalActiveWriteback <= 1) { |
5568 | v->WritebackUrgentWatermark = v->WritebackLatency; |
5569 | } else { |
5570 | v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; |
5571 | } |
5572 | |
5573 | if (v->TotalActiveWriteback <= 1) { |
5574 | v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; |
5575 | } else { |
5576 | v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; |
5577 | } |
5578 | |
5579 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5580 | TotalPixelBW = TotalPixelBW |
5581 | + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) |
5582 | / (v->HTotal[k] / v->PixelClock[k]); |
5583 | } |
5584 | |
5585 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5586 | double EffectiveDETBufferSizeY = DETBufferSizeY[k]; |
5587 | |
5588 | v->LBLatencyHidingSourceLinesY = dml_min( |
5589 | (double) v->MaxLineBufferLines, |
5590 | dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); |
5591 | |
5592 | v->LBLatencyHidingSourceLinesC = dml_min( |
5593 | (double) v->MaxLineBufferLines, |
5594 | dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); |
5595 | |
5596 | EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); |
5597 | |
5598 | EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); |
5599 | |
5600 | if (UnboundedRequestEnabled) { |
5601 | EffectiveDETBufferSizeY = EffectiveDETBufferSizeY |
5602 | + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; |
5603 | } |
5604 | |
5605 | LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; |
5606 | LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); |
5607 | FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; |
5608 | if (BytePerPixelDETC[k] > 0) { |
5609 | LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; |
5610 | LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); |
5611 | FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; |
5612 | } else { |
5613 | LinesInDETC = 0; |
Value stored to 'LinesInDETC' is never read | |
5614 | FullDETBufferingTimeC = 999999; |
5615 | } |
5616 | |
5617 | ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY |
5618 | - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; |
5619 | |
5620 | if (v->NumberOfActivePlanes > 1) { |
5621 | ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY |
5622 | - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; |
5623 | } |
5624 | |
5625 | if (BytePerPixelDETC[k] > 0) { |
5626 | ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC |
5627 | - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; |
5628 | |
5629 | if (v->NumberOfActivePlanes > 1) { |
5630 | ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC |
5631 | - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; |
5632 | } |
5633 | v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); |
5634 | } else { |
5635 | v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; |
5636 | } |
5637 | |
5638 | if (v->WritebackEnable[k] == true1) { |
5639 | WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 |
5640 | / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); |
5641 | if (v->WritebackPixelFormat[k] == dm_444_64) { |
5642 | WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; |
5643 | } |
5644 | WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; |
5645 | v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); |
5646 | } |
5647 | } |
5648 | |
5649 | v->MinActiveDRAMClockChangeMargin = 999999; |
5650 | PlaneWithMinActiveDRAMClockChangeMargin = 0; |
5651 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5652 | if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { |
5653 | v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; |
5654 | if (v->BlendingAndTiming[k] == k) { |
5655 | PlaneWithMinActiveDRAMClockChangeMargin = k; |
5656 | } else { |
5657 | for (j = 0; j < v->NumberOfActivePlanes; ++j) { |
5658 | if (v->BlendingAndTiming[k] == j) { |
5659 | PlaneWithMinActiveDRAMClockChangeMargin = j; |
5660 | } |
5661 | } |
5662 | } |
5663 | } |
5664 | } |
5665 | |
5666 | v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; |
5667 | |
5668 | SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; |
5669 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5670 | if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) |
5671 | && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { |
5672 | SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; |
5673 | } |
5674 | } |
5675 | |
5676 | v->TotalNumberOfActiveOTG = 0; |
5677 | |
5678 | for (k = 0; k < v->NumberOfActivePlanes; ++k) { |
5679 | if (v->BlendingAndTiming[k] == k) { |
5680 | v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; |
5681 | } |
5682 | } |
5683 | |
5684 | if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { |
5685 | *DRAMClockChangeSupport = dm_dram_clock_change_vactive; |
5686 | } else if ((v->SynchronizedVBlank == true1 || v->TotalNumberOfActiveOTG == 1 |
5687 | || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { |
5688 | *DRAMClockChangeSupport = dm_dram_clock_change_vblank; |
5689 | } else { |
5690 | *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; |
5691 | } |
5692 | |
5693 | *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; |
5694 | *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); |
5695 | *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; |
5696 | *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; |
5697 | |
5698 | #ifdef __DML_VBA_DEBUG__ |
5699 | dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark){do { } while(0); }; |
5700 | dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark){do { } while(0); }; |
5701 | dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark){do { } while(0); }; |
5702 | dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark){do { } while(0); }; |
5703 | #endif |
5704 | } |
5705 | |
5706 | static void CalculateDCFCLKDeepSleep( |
5707 | struct display_mode_lib *mode_lib, |
5708 | unsigned int NumberOfActivePlanes, |
5709 | int BytePerPixelY[], |
5710 | int BytePerPixelC[], |
5711 | double VRatio[], |
5712 | double VRatioChroma[], |
5713 | double SwathWidthY[], |
5714 | double SwathWidthC[], |
5715 | unsigned int DPPPerPlane[], |
5716 | double HRatio[], |
5717 | double HRatioChroma[], |
5718 | double PixelClock[], |
5719 | double PSCL_THROUGHPUT[], |
5720 | double PSCL_THROUGHPUT_CHROMA[], |
5721 | double DPPCLK[], |
5722 | double ReadBandwidthLuma[], |
5723 | double ReadBandwidthChroma[], |
5724 | int ReturnBusWidth, |
5725 | double *DCFCLKDeepSleep) |
5726 | { |
5727 | struct vba_vars_st *v = &mode_lib->vba; |
5728 | double DisplayPipeLineDeliveryTimeLuma; |
5729 | double DisplayPipeLineDeliveryTimeChroma; |
5730 | double ReadBandwidth = 0.0; |
5731 | int k; |
5732 | |
5733 | for (k = 0; k < NumberOfActivePlanes; ++k) { |
5734 | |
5735 | if (VRatio[k] <= 1) { |
5736 | DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; |
5737 | } else { |
5738 | DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; |
5739 | } |
5740 | if (BytePerPixelC[k] == 0) { |
5741 | DisplayPipeLineDeliveryTimeChroma = 0; |
5742 | } else { |
5743 | if (VRatioChroma[k] <= 1) { |
5744 | DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; |
5745 | } else { |
5746 | DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; |
5747 | } |
5748 | } |
5749 | |
5750 | if (BytePerPixelC[k] > 0) { |
5751 | v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__1.15 * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, |
5752 | __DML_MIN_DCFCLK_FACTOR__1.15 * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); |
5753 | } else { |
5754 | v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__1.15 * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; |
5755 | } |
5756 | v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); |
5757 | |
5758 | } |
5759 | |
5760 | for (k = 0; k < NumberOfActivePlanes; ++k) { |
5761 | ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; |
5762 | } |
5763 | |
5764 | *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__1.15 * ReadBandwidth / ReturnBusWidth); |
5765 | |
5766 | for (k = 0; k < NumberOfActivePlanes; ++k) { |
5767 | *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); |
5768 | } |
5769 | } |
5770 | |
5771 | static void CalculateUrgentBurstFactor( |
5772 | int swath_width_luma_ub, |
5773 | int swath_width_chroma_ub, |
5774 | unsigned int SwathHeightY, |
5775 | unsigned int SwathHeightC, |
5776 | double LineTime, |
5777 | double UrgentLatency, |
5778 | double CursorBufferSize, |
5779 | unsigned int CursorWidth, |
5780 | unsigned int CursorBPP, |
5781 | double VRatio, |
5782 | double VRatioC, |
5783 | double BytePerPixelInDETY, |
5784 | double BytePerPixelInDETC, |
5785 | double DETBufferSizeY, |
5786 | double DETBufferSizeC, |
5787 | double *UrgentBurstFactorCursor, |
5788 | double *UrgentBurstFactorLuma, |
5789 | double *UrgentBurstFactorChroma, |
5790 | bool_Bool *NotEnoughUrgentLatencyHiding) |
5791 | { |
5792 | double LinesInDETLuma; |
5793 | double LinesInDETChroma; |
5794 | unsigned int LinesInCursorBuffer; |
5795 | double CursorBufferSizeInTime; |
5796 | double DETBufferSizeInTimeLuma; |
5797 | double DETBufferSizeInTimeChroma; |
5798 | |
5799 | *NotEnoughUrgentLatencyHiding = 0; |
5800 | |
5801 | if (CursorWidth > 0) { |
5802 | LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); |
5803 | if (VRatio > 0) { |
5804 | CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; |
5805 | if (CursorBufferSizeInTime - UrgentLatency <= 0) { |
5806 | *NotEnoughUrgentLatencyHiding = 1; |
5807 | *UrgentBurstFactorCursor = 0; |
5808 | } else { |
5809 | *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); |
5810 | } |
5811 | } else { |
5812 | *UrgentBurstFactorCursor = 1; |
5813 | } |
5814 | } |
5815 | |
5816 | LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; |
5817 | if (VRatio > 0) { |
5818 | DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; |
5819 | if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { |
5820 | *NotEnoughUrgentLatencyHiding = 1; |
5821 | *UrgentBurstFactorLuma = 0; |
5822 | } else { |
5823 | *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); |
5824 | } |
5825 | } else { |
5826 | *UrgentBurstFactorLuma = 1; |
5827 | } |
5828 | |
5829 | if (BytePerPixelInDETC > 0) { |
5830 | LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; |
5831 | if (VRatio > 0) { |
5832 | DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; |
5833 | if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { |
5834 | *NotEnoughUrgentLatencyHiding = 1; |
5835 | *UrgentBurstFactorChroma = 0; |
5836 | } else { |
5837 | *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); |
5838 | } |
5839 | } else { |
5840 | *UrgentBurstFactorChroma = 1; |
5841 | } |
5842 | } |
5843 | } |
5844 | |
5845 | static void CalculatePixelDeliveryTimes( |
5846 | unsigned int NumberOfActivePlanes, |
5847 | double VRatio[], |
5848 | double VRatioChroma[], |
5849 | double VRatioPrefetchY[], |
5850 | double VRatioPrefetchC[], |
5851 | unsigned int swath_width_luma_ub[], |
5852 | unsigned int swath_width_chroma_ub[], |
5853 | unsigned int DPPPerPlane[], |
5854 | double HRatio[], |
5855 | double HRatioChroma[], |
5856 | double PixelClock[], |
5857 | double PSCL_THROUGHPUT[], |
5858 | double PSCL_THROUGHPUT_CHROMA[], |
5859 | double DPPCLK[], |
5860 | int BytePerPixelC[], |
5861 | enum scan_direction_class SourceScan[], |
5862 | unsigned int NumberOfCursors[], |
5863 | unsigned int CursorWidth[][DC__NUM_CURSOR__MAX2], |
5864 | unsigned int CursorBPP[][DC__NUM_CURSOR__MAX2], |
5865 | unsigned int BlockWidth256BytesY[], |
5866 | unsigned int BlockHeight256BytesY[], |
5867 | unsigned int BlockWidth256BytesC[], |
5868 | unsigned int BlockHeight256BytesC[], |
5869 | double DisplayPipeLineDeliveryTimeLuma[], |
5870 | double DisplayPipeLineDeliveryTimeChroma[], |
5871 | double DisplayPipeLineDeliveryTimeLumaPrefetch[], |
5872 | double DisplayPipeLineDeliveryTimeChromaPrefetch[], |
5873 | double DisplayPipeRequestDeliveryTimeLuma[], |
5874 | double DisplayPipeRequestDeliveryTimeChroma[], |
5875 | double DisplayPipeRequestDeliveryTimeLumaPrefetch[], |
5876 | double DisplayPipeRequestDeliveryTimeChromaPrefetch[], |
5877 | double CursorRequestDeliveryTime[], |
5878 | double CursorRequestDeliveryTimePrefetch[]) |
5879 | { |
5880 | double req_per_swath_ub; |
5881 | int k; |
5882 | |
5883 | for (k = 0; k < NumberOfActivePlanes; ++k) { |
5884 | if (VRatio[k] <= 1) { |
5885 | DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; |
5886 | } else { |
5887 | DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; |
5888 | } |
5889 | |
5890 | if (BytePerPixelC[k] == 0) { |
5891 | DisplayPipeLineDeliveryTimeChroma[k] = 0; |
5892 | } else { |
5893 | if (VRatioChroma[k] <= 1) { |
5894 | DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; |
5895 | } else { |
5896 | DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; |
5897 | } |
5898 | } |
5899 | |
5900 | if (VRatioPrefetchY[k] <= 1) { |
5901 | DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; |
5902 | } else { |
5903 | DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; |
5904 | } |
5905 | |
5906 | if (BytePerPixelC[k] == 0) { |
5907 | DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; |
5908 | } else { |
5909 | if (VRatioPrefetchC[k] <= 1) { |
5910 | DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; |
5911 | } else { |
5912 | DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; |
5913 | } |
5914 | } |
5915 | } |
5916 | |
5917 | for (k = 0; k < NumberOfActivePlanes; ++k) { |
5918 | if (SourceScan[k] != dm_vert) { |
5919 | req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; |
5920 | } else { |
5921 | req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; |
5922 | } |
5923 | DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; |
5924 | DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; |
5925 | if (BytePerPixelC[k] == 0) { |
5926 | DisplayPipeRequestDeliveryTimeChroma[k] = 0; |
5927 | DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; |
5928 | } else { |
5929 | if (SourceScan[k] != dm_vert) { |
5930 | req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; |
5931 | } else { |
5932 | req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; |
5933 | } |
5934 | DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; |
5935 | DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; |
5936 | } |
5937 | #ifdef __DML_VBA_DEBUG__ |
5938 | dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]){do { } while(0); }; |
5939 | dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]){do { } while(0); }; |
5940 | dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]){do { } while(0); }; |
5941 | dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]){do { } while(0); }; |
5942 | dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]){do { } while(0); }; |
5943 | dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]){do { } while(0); }; |
5944 | dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]){do { } while(0); }; |
5945 | dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]){do { } while(0); }; |
5946 | dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]){do { } while(0); }; |
5947 | dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]){do { } while(0); }; |
5948 | dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]){do { } while(0); }; |
5949 | dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]){do { } while(0); }; |
5950 | #endif |
5951 | } |
5952 | |
5953 | for (k = 0; k < NumberOfActivePlanes; ++k) { |
5954 | int cursor_req_per_width; |
5955 | cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); |
5956 | if (NumberOfCursors[k] > 0) { |
5957 | if (VRatio[k] <= 1) { |
5958 | CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; |
5959 | } else { |
5960 | CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; |
5961 | } |
5962 | if (VRatioPrefetchY[k] <= 1) { |
5963 | CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; |
5964 | } else { |
5965 | CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; |
5966 | } |
5967 | } else { |
5968 | CursorRequestDeliveryTime[k] = 0; |
5969 | CursorRequestDeliveryTimePrefetch[k] = 0; |
5970 | } |
5971 | #ifdef __DML_VBA_DEBUG__ |
5972 | dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]){do { } while(0); }; |
5973 | dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]){do { } while(0); }; |
5974 | dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]){do { } while(0); }; |
5975 | #endif |
5976 | } |
5977 | } |
5978 | |
5979 | static void CalculateMetaAndPTETimes( |
5980 | int NumberOfActivePlanes, |
5981 | bool_Bool GPUVMEnable, |
5982 | int MetaChunkSize, |
5983 | int MinMetaChunkSizeBytes, |
5984 | int HTotal[], |
5985 | double VRatio[], |
5986 | double VRatioChroma[], |
5987 | double DestinationLinesToRequestRowInVBlank[], |
5988 | double DestinationLinesToRequestRowInImmediateFlip[], |
5989 | bool_Bool DCCEnable[], |
5990 | double PixelClock[], |
5991 | int BytePerPixelY[], |
5992 | int BytePerPixelC[], |
5993 | enum scan_direction_class SourceScan[], |
5994 | int dpte_row_height[], |
5995 | int dpte_row_height_chroma[], |
5996 | int meta_row_width[], |
5997 | int meta_row_width_chroma[], |
5998 | int meta_row_height[], |
5999 | int meta_row_height_chroma[], |
6000 | int meta_req_width[], |
6001 | int meta_req_width_chroma[], |
6002 | int meta_req_height[], |
6003 | int meta_req_height_chroma[], |
6004 | int dpte_group_bytes[], |
6005 | int PTERequestSizeY[], |
6006 | int PTERequestSizeC[], |
6007 | int PixelPTEReqWidthY[], |
6008 | int PixelPTEReqHeightY[], |
6009 | int PixelPTEReqWidthC[], |
6010 | int PixelPTEReqHeightC[], |
6011 | int dpte_row_width_luma_ub[], |
6012 | int dpte_row_width_chroma_ub[], |
6013 | double DST_Y_PER_PTE_ROW_NOM_L[], |
6014 | double DST_Y_PER_PTE_ROW_NOM_C[], |
6015 | double DST_Y_PER_META_ROW_NOM_L[], |
6016 | double DST_Y_PER_META_ROW_NOM_C[], |
6017 | double TimePerMetaChunkNominal[], |
6018 | double TimePerChromaMetaChunkNominal[], |
6019 | double TimePerMetaChunkVBlank[], |
6020 | double TimePerChromaMetaChunkVBlank[], |
6021 | double TimePerMetaChunkFlip[], |
6022 | double TimePerChromaMetaChunkFlip[], |
6023 | double time_per_pte_group_nom_luma[], |
6024 | double time_per_pte_group_vblank_luma[], |
6025 | double time_per_pte_group_flip_luma[], |
6026 | double time_per_pte_group_nom_chroma[], |
6027 | double time_per_pte_group_vblank_chroma[], |
6028 | double time_per_pte_group_flip_chroma[]) |
6029 | { |
6030 | unsigned int meta_chunk_width; |
6031 | unsigned int min_meta_chunk_width; |
6032 | unsigned int meta_chunk_per_row_int; |
6033 | unsigned int meta_row_remainder; |
6034 | unsigned int meta_chunk_threshold; |
6035 | unsigned int meta_chunks_per_row_ub; |
6036 | unsigned int meta_chunk_width_chroma; |
6037 | unsigned int min_meta_chunk_width_chroma; |