File: | dev/pci/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c |
Warning: | line 5508, column 44 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | |||
2 | * Copyright 2017 Advanced Micro Devices, Inc. | |||
3 | * | |||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |||
5 | * copy of this software and associated documentation files (the "Software"), | |||
6 | * to deal in the Software without restriction, including without limitation | |||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |||
8 | * and/or sell copies of the Software, and to permit persons to whom the | |||
9 | * Software is furnished to do so, subject to the following conditions: | |||
10 | * | |||
11 | * The above copyright notice and this permission notice shall be included in | |||
12 | * all copies or substantial portions of the Software. | |||
13 | * | |||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |||
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |||
20 | * OTHER DEALINGS IN THE SOFTWARE. | |||
21 | * | |||
22 | * Authors: AMD | |||
23 | * | |||
24 | */ | |||
25 | ||||
26 | ||||
27 | #include "../display_mode_lib.h" | |||
28 | #include "../dml_inline_defs.h" | |||
29 | #include "../display_mode_vba.h" | |||
30 | #include "display_mode_vba_21.h" | |||
31 | ||||
32 | ||||
33 | /* | |||
34 | * NOTE: | |||
35 | * This file is gcc-parsable HW gospel, coming straight from HW engineers. | |||
36 | * | |||
37 | * It doesn't adhere to Linux kernel style and sometimes will do things in odd | |||
38 | * ways. Unless there is something clearly wrong with it the code should | |||
39 | * remain as-is as it provides us with a guarantee from HW that it is correct. | |||
40 | */ | |||
41 | typedef struct { | |||
42 | double DPPCLK; | |||
43 | double DISPCLK; | |||
44 | double PixelClock; | |||
45 | double DCFCLKDeepSleep; | |||
46 | unsigned int DPPPerPlane; | |||
47 | bool_Bool ScalerEnabled; | |||
48 | enum scan_direction_class SourceScan; | |||
49 | unsigned int BlockWidth256BytesY; | |||
50 | unsigned int BlockHeight256BytesY; | |||
51 | unsigned int BlockWidth256BytesC; | |||
52 | unsigned int BlockHeight256BytesC; | |||
53 | unsigned int InterlaceEnable; | |||
54 | unsigned int NumberOfCursors; | |||
55 | unsigned int VBlank; | |||
56 | unsigned int HTotal; | |||
57 | } Pipe; | |||
58 | ||||
59 | typedef struct { | |||
60 | bool_Bool Enable; | |||
61 | unsigned int MaxPageTableLevels; | |||
62 | unsigned int CachedPageTableLevels; | |||
63 | } HostVM; | |||
64 | ||||
65 | #define BPP_INVALID0 0 | |||
66 | #define BPP_BLENDED_PIPE0xffffffff 0xffffffff | |||
67 | #define DCN21_MAX_DSC_IMAGE_WIDTH5184 5184 | |||
68 | #define DCN21_MAX_420_IMAGE_WIDTH4096 4096 | |||
69 | ||||
70 | static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); | |||
71 | static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( | |||
72 | struct display_mode_lib *mode_lib); | |||
73 | static unsigned int dscceComputeDelay( | |||
74 | unsigned int bpc, | |||
75 | double bpp, | |||
76 | unsigned int sliceWidth, | |||
77 | unsigned int numSlices, | |||
78 | enum output_format_class pixelFormat); | |||
79 | static unsigned int dscComputeDelay(enum output_format_class pixelFormat); | |||
80 | // Super monster function with some 45 argument | |||
81 | static bool_Bool CalculatePrefetchSchedule( | |||
82 | struct display_mode_lib *mode_lib, | |||
83 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
84 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
85 | Pipe *myPipe, | |||
86 | unsigned int DSCDelay, | |||
87 | double DPPCLKDelaySubtotal, | |||
88 | double DPPCLKDelaySCL, | |||
89 | double DPPCLKDelaySCLLBOnly, | |||
90 | double DPPCLKDelayCNVCFormater, | |||
91 | double DPPCLKDelayCNVCCursor, | |||
92 | double DISPCLKDelaySubtotal, | |||
93 | unsigned int ScalerRecoutWidth, | |||
94 | enum output_format_class OutputFormat, | |||
95 | unsigned int MaxInterDCNTileRepeaters, | |||
96 | unsigned int VStartup, | |||
97 | unsigned int MaxVStartup, | |||
98 | unsigned int GPUVMPageTableLevels, | |||
99 | bool_Bool GPUVMEnable, | |||
100 | HostVM *myHostVM, | |||
101 | bool_Bool DynamicMetadataEnable, | |||
102 | int DynamicMetadataLinesBeforeActiveRequired, | |||
103 | unsigned int DynamicMetadataTransmittedBytes, | |||
104 | bool_Bool DCCEnable, | |||
105 | double UrgentLatency, | |||
106 | double UrgentExtraLatency, | |||
107 | double TCalc, | |||
108 | unsigned int PDEAndMetaPTEBytesFrame, | |||
109 | unsigned int MetaRowByte, | |||
110 | unsigned int PixelPTEBytesPerRow, | |||
111 | double PrefetchSourceLinesY, | |||
112 | unsigned int SwathWidthY, | |||
113 | double BytePerPixelDETY, | |||
114 | double VInitPreFillY, | |||
115 | unsigned int MaxNumSwathY, | |||
116 | double PrefetchSourceLinesC, | |||
117 | double BytePerPixelDETC, | |||
118 | double VInitPreFillC, | |||
119 | unsigned int MaxNumSwathC, | |||
120 | unsigned int SwathHeightY, | |||
121 | unsigned int SwathHeightC, | |||
122 | double TWait, | |||
123 | bool_Bool XFCEnabled, | |||
124 | double XFCRemoteSurfaceFlipDelay, | |||
125 | bool_Bool ProgressiveToInterlaceUnitInOPP, | |||
126 | double *DSTXAfterScaler, | |||
127 | double *DSTYAfterScaler, | |||
128 | double *DestinationLinesForPrefetch, | |||
129 | double *PrefetchBandwidth, | |||
130 | double *DestinationLinesToRequestVMInVBlank, | |||
131 | double *DestinationLinesToRequestRowInVBlank, | |||
132 | double *VRatioPrefetchY, | |||
133 | double *VRatioPrefetchC, | |||
134 | double *RequiredPrefetchPixDataBWLuma, | |||
135 | double *RequiredPrefetchPixDataBWChroma, | |||
136 | unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, | |||
137 | double *Tno_bw, | |||
138 | double *prefetch_vmrow_bw, | |||
139 | unsigned int *swath_width_luma_ub, | |||
140 | unsigned int *swath_width_chroma_ub, | |||
141 | unsigned int *VUpdateOffsetPix, | |||
142 | double *VUpdateWidthPix, | |||
143 | double *VReadyOffsetPix); | |||
144 | static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); | |||
145 | static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); | |||
146 | static double CalculateDCCConfiguration( | |||
147 | bool_Bool DCCEnabled, | |||
148 | bool_Bool DCCProgrammingAssumesScanDirectionUnknown, | |||
149 | unsigned int ViewportWidth, | |||
150 | unsigned int ViewportHeight, | |||
151 | unsigned int DETBufferSize, | |||
152 | unsigned int RequestHeight256Byte, | |||
153 | unsigned int SwathHeight, | |||
154 | enum dm_swizzle_mode TilingFormat, | |||
155 | unsigned int BytePerPixel, | |||
156 | enum scan_direction_class ScanOrientation, | |||
157 | unsigned int *MaxUncompressedBlock, | |||
158 | unsigned int *MaxCompressedBlock, | |||
159 | unsigned int *Independent64ByteBlock); | |||
160 | static double CalculatePrefetchSourceLines( | |||
161 | struct display_mode_lib *mode_lib, | |||
162 | double VRatio, | |||
163 | double vtaps, | |||
164 | bool_Bool Interlace, | |||
165 | bool_Bool ProgressiveToInterlaceUnitInOPP, | |||
166 | unsigned int SwathHeight, | |||
167 | unsigned int ViewportYStart, | |||
168 | double *VInitPreFill, | |||
169 | unsigned int *MaxNumSwath); | |||
170 | static unsigned int CalculateVMAndRowBytes( | |||
171 | struct display_mode_lib *mode_lib, | |||
172 | bool_Bool DCCEnable, | |||
173 | unsigned int BlockHeight256Bytes, | |||
174 | unsigned int BlockWidth256Bytes, | |||
175 | enum source_format_class SourcePixelFormat, | |||
176 | unsigned int SurfaceTiling, | |||
177 | unsigned int BytePerPixel, | |||
178 | enum scan_direction_class ScanDirection, | |||
179 | unsigned int ViewportWidth, | |||
180 | unsigned int ViewportHeight, | |||
181 | unsigned int SwathWidthY, | |||
182 | bool_Bool GPUVMEnable, | |||
183 | bool_Bool HostVMEnable, | |||
184 | unsigned int HostVMMaxPageTableLevels, | |||
185 | unsigned int HostVMCachedPageTableLevels, | |||
186 | unsigned int VMMPageSize, | |||
187 | unsigned int PTEBufferSizeInRequests, | |||
188 | unsigned int Pitch, | |||
189 | unsigned int DCCMetaPitch, | |||
190 | unsigned int *MacroTileWidth, | |||
191 | unsigned int *MetaRowByte, | |||
192 | unsigned int *PixelPTEBytesPerRow, | |||
193 | bool_Bool *PTEBufferSizeNotExceeded, | |||
194 | unsigned int *dpte_row_width_ub, | |||
195 | unsigned int *dpte_row_height, | |||
196 | unsigned int *MetaRequestWidth, | |||
197 | unsigned int *MetaRequestHeight, | |||
198 | unsigned int *meta_row_width, | |||
199 | unsigned int *meta_row_height, | |||
200 | unsigned int *vm_group_bytes, | |||
201 | unsigned int *dpte_group_bytes, | |||
202 | unsigned int *PixelPTEReqWidth, | |||
203 | unsigned int *PixelPTEReqHeight, | |||
204 | unsigned int *PTERequestSize, | |||
205 | unsigned int *DPDE0BytesFrame, | |||
206 | unsigned int *MetaPTEBytesFrame); | |||
207 | ||||
208 | static double CalculateTWait( | |||
209 | unsigned int PrefetchMode, | |||
210 | double DRAMClockChangeLatency, | |||
211 | double UrgentLatency, | |||
212 | double SREnterPlusExitTime); | |||
213 | static double CalculateRemoteSurfaceFlipDelay( | |||
214 | struct display_mode_lib *mode_lib, | |||
215 | double VRatio, | |||
216 | double SwathWidth, | |||
217 | double Bpp, | |||
218 | double LineTime, | |||
219 | double XFCTSlvVupdateOffset, | |||
220 | double XFCTSlvVupdateWidth, | |||
221 | double XFCTSlvVreadyOffset, | |||
222 | double XFCXBUFLatencyTolerance, | |||
223 | double XFCFillBWOverhead, | |||
224 | double XFCSlvChunkSize, | |||
225 | double XFCBusTransportTime, | |||
226 | double TCalc, | |||
227 | double TWait, | |||
228 | double *SrcActiveDrainRate, | |||
229 | double *TInitXFill, | |||
230 | double *TslvChk); | |||
231 | static void CalculateActiveRowBandwidth( | |||
232 | bool_Bool GPUVMEnable, | |||
233 | enum source_format_class SourcePixelFormat, | |||
234 | double VRatio, | |||
235 | bool_Bool DCCEnable, | |||
236 | double LineTime, | |||
237 | unsigned int MetaRowByteLuma, | |||
238 | unsigned int MetaRowByteChroma, | |||
239 | unsigned int meta_row_height_luma, | |||
240 | unsigned int meta_row_height_chroma, | |||
241 | unsigned int PixelPTEBytesPerRowLuma, | |||
242 | unsigned int PixelPTEBytesPerRowChroma, | |||
243 | unsigned int dpte_row_height_luma, | |||
244 | unsigned int dpte_row_height_chroma, | |||
245 | double *meta_row_bw, | |||
246 | double *dpte_row_bw); | |||
247 | static void CalculateFlipSchedule( | |||
248 | struct display_mode_lib *mode_lib, | |||
249 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
250 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
251 | double UrgentExtraLatency, | |||
252 | double UrgentLatency, | |||
253 | unsigned int GPUVMMaxPageTableLevels, | |||
254 | bool_Bool HostVMEnable, | |||
255 | unsigned int HostVMMaxPageTableLevels, | |||
256 | unsigned int HostVMCachedPageTableLevels, | |||
257 | bool_Bool GPUVMEnable, | |||
258 | double PDEAndMetaPTEBytesPerFrame, | |||
259 | double MetaRowBytes, | |||
260 | double DPTEBytesPerRow, | |||
261 | double BandwidthAvailableForImmediateFlip, | |||
262 | unsigned int TotImmediateFlipBytes, | |||
263 | enum source_format_class SourcePixelFormat, | |||
264 | double LineTime, | |||
265 | double VRatio, | |||
266 | double Tno_bw, | |||
267 | bool_Bool DCCEnable, | |||
268 | unsigned int dpte_row_height, | |||
269 | unsigned int meta_row_height, | |||
270 | unsigned int dpte_row_height_chroma, | |||
271 | unsigned int meta_row_height_chroma, | |||
272 | double *DestinationLinesToRequestVMInImmediateFlip, | |||
273 | double *DestinationLinesToRequestRowInImmediateFlip, | |||
274 | double *final_flip_bw, | |||
275 | bool_Bool *ImmediateFlipSupportedForPipe); | |||
276 | static double CalculateWriteBackDelay( | |||
277 | enum source_format_class WritebackPixelFormat, | |||
278 | double WritebackHRatio, | |||
279 | double WritebackVRatio, | |||
280 | unsigned int WritebackLumaHTaps, | |||
281 | unsigned int WritebackLumaVTaps, | |||
282 | unsigned int WritebackChromaHTaps, | |||
283 | unsigned int WritebackChromaVTaps, | |||
284 | unsigned int WritebackDestinationWidth); | |||
285 | static void CalculateWatermarksAndDRAMSpeedChangeSupport( | |||
286 | struct display_mode_lib *mode_lib, | |||
287 | unsigned int PrefetchMode, | |||
288 | unsigned int NumberOfActivePlanes, | |||
289 | unsigned int MaxLineBufferLines, | |||
290 | unsigned int LineBufferSize, | |||
291 | unsigned int DPPOutputBufferPixels, | |||
292 | unsigned int DETBufferSizeInKByte, | |||
293 | unsigned int WritebackInterfaceLumaBufferSize, | |||
294 | unsigned int WritebackInterfaceChromaBufferSize, | |||
295 | double DCFCLK, | |||
296 | double UrgentOutOfOrderReturn, | |||
297 | double ReturnBW, | |||
298 | bool_Bool GPUVMEnable, | |||
299 | int dpte_group_bytes[], | |||
300 | unsigned int MetaChunkSize, | |||
301 | double UrgentLatency, | |||
302 | double ExtraLatency, | |||
303 | double WritebackLatency, | |||
304 | double WritebackChunkSize, | |||
305 | double SOCCLK, | |||
306 | double DRAMClockChangeLatency, | |||
307 | double SRExitTime, | |||
308 | double SREnterPlusExitTime, | |||
309 | double DCFCLKDeepSleep, | |||
310 | int DPPPerPlane[], | |||
311 | bool_Bool DCCEnable[], | |||
312 | double DPPCLK[], | |||
313 | double SwathWidthSingleDPPY[], | |||
314 | unsigned int SwathHeightY[], | |||
315 | double ReadBandwidthPlaneLuma[], | |||
316 | unsigned int SwathHeightC[], | |||
317 | double ReadBandwidthPlaneChroma[], | |||
318 | unsigned int LBBitPerPixel[], | |||
319 | double SwathWidthY[], | |||
320 | double HRatio[], | |||
321 | unsigned int vtaps[], | |||
322 | unsigned int VTAPsChroma[], | |||
323 | double VRatio[], | |||
324 | unsigned int HTotal[], | |||
325 | double PixelClock[], | |||
326 | unsigned int BlendingAndTiming[], | |||
327 | double BytePerPixelDETY[], | |||
328 | double BytePerPixelDETC[], | |||
329 | bool_Bool WritebackEnable[], | |||
330 | enum source_format_class WritebackPixelFormat[], | |||
331 | double WritebackDestinationWidth[], | |||
332 | double WritebackDestinationHeight[], | |||
333 | double WritebackSourceHeight[], | |||
334 | enum clock_change_support *DRAMClockChangeSupport, | |||
335 | double *UrgentWatermark, | |||
336 | double *WritebackUrgentWatermark, | |||
337 | double *DRAMClockChangeWatermark, | |||
338 | double *WritebackDRAMClockChangeWatermark, | |||
339 | double *StutterExitWatermark, | |||
340 | double *StutterEnterPlusExitWatermark, | |||
341 | double *MinActiveDRAMClockChangeLatencySupported); | |||
342 | static void CalculateDCFCLKDeepSleep( | |||
343 | struct display_mode_lib *mode_lib, | |||
344 | unsigned int NumberOfActivePlanes, | |||
345 | double BytePerPixelDETY[], | |||
346 | double BytePerPixelDETC[], | |||
347 | double VRatio[], | |||
348 | double SwathWidthY[], | |||
349 | int DPPPerPlane[], | |||
350 | double HRatio[], | |||
351 | double PixelClock[], | |||
352 | double PSCL_THROUGHPUT[], | |||
353 | double PSCL_THROUGHPUT_CHROMA[], | |||
354 | double DPPCLK[], | |||
355 | double *DCFCLKDeepSleep); | |||
356 | static void CalculateDETBufferSize( | |||
357 | unsigned int DETBufferSizeInKByte, | |||
358 | unsigned int SwathHeightY, | |||
359 | unsigned int SwathHeightC, | |||
360 | unsigned int *DETBufferSizeY, | |||
361 | unsigned int *DETBufferSizeC); | |||
362 | static void CalculateUrgentBurstFactor( | |||
363 | unsigned int DETBufferSizeInKByte, | |||
364 | unsigned int SwathHeightY, | |||
365 | unsigned int SwathHeightC, | |||
366 | unsigned int SwathWidthY, | |||
367 | double LineTime, | |||
368 | double UrgentLatency, | |||
369 | double CursorBufferSize, | |||
370 | unsigned int CursorWidth, | |||
371 | unsigned int CursorBPP, | |||
372 | double VRatio, | |||
373 | double VRatioPreY, | |||
374 | double VRatioPreC, | |||
375 | double BytePerPixelInDETY, | |||
376 | double BytePerPixelInDETC, | |||
377 | double *UrgentBurstFactorCursor, | |||
378 | double *UrgentBurstFactorCursorPre, | |||
379 | double *UrgentBurstFactorLuma, | |||
380 | double *UrgentBurstFactorLumaPre, | |||
381 | double *UrgentBurstFactorChroma, | |||
382 | double *UrgentBurstFactorChromaPre, | |||
383 | unsigned int *NotEnoughUrgentLatencyHiding, | |||
384 | unsigned int *NotEnoughUrgentLatencyHidingPre); | |||
385 | ||||
386 | static void CalculatePixelDeliveryTimes( | |||
387 | unsigned int NumberOfActivePlanes, | |||
388 | double VRatio[], | |||
389 | double VRatioPrefetchY[], | |||
390 | double VRatioPrefetchC[], | |||
391 | unsigned int swath_width_luma_ub[], | |||
392 | unsigned int swath_width_chroma_ub[], | |||
393 | int DPPPerPlane[], | |||
394 | double HRatio[], | |||
395 | double PixelClock[], | |||
396 | double PSCL_THROUGHPUT[], | |||
397 | double PSCL_THROUGHPUT_CHROMA[], | |||
398 | double DPPCLK[], | |||
399 | double BytePerPixelDETC[], | |||
400 | enum scan_direction_class SourceScan[], | |||
401 | unsigned int BlockWidth256BytesY[], | |||
402 | unsigned int BlockHeight256BytesY[], | |||
403 | unsigned int BlockWidth256BytesC[], | |||
404 | unsigned int BlockHeight256BytesC[], | |||
405 | double DisplayPipeLineDeliveryTimeLuma[], | |||
406 | double DisplayPipeLineDeliveryTimeChroma[], | |||
407 | double DisplayPipeLineDeliveryTimeLumaPrefetch[], | |||
408 | double DisplayPipeLineDeliveryTimeChromaPrefetch[], | |||
409 | double DisplayPipeRequestDeliveryTimeLuma[], | |||
410 | double DisplayPipeRequestDeliveryTimeChroma[], | |||
411 | double DisplayPipeRequestDeliveryTimeLumaPrefetch[], | |||
412 | double DisplayPipeRequestDeliveryTimeChromaPrefetch[]); | |||
413 | ||||
414 | static void CalculateMetaAndPTETimes( | |||
415 | unsigned int NumberOfActivePlanes, | |||
416 | bool_Bool GPUVMEnable, | |||
417 | unsigned int MetaChunkSize, | |||
418 | unsigned int MinMetaChunkSizeBytes, | |||
419 | unsigned int GPUVMMaxPageTableLevels, | |||
420 | unsigned int HTotal[], | |||
421 | double VRatio[], | |||
422 | double VRatioPrefetchY[], | |||
423 | double VRatioPrefetchC[], | |||
424 | double DestinationLinesToRequestRowInVBlank[], | |||
425 | double DestinationLinesToRequestRowInImmediateFlip[], | |||
426 | double DestinationLinesToRequestVMInVBlank[], | |||
427 | double DestinationLinesToRequestVMInImmediateFlip[], | |||
428 | bool_Bool DCCEnable[], | |||
429 | double PixelClock[], | |||
430 | double BytePerPixelDETY[], | |||
431 | double BytePerPixelDETC[], | |||
432 | enum scan_direction_class SourceScan[], | |||
433 | unsigned int dpte_row_height[], | |||
434 | unsigned int dpte_row_height_chroma[], | |||
435 | unsigned int meta_row_width[], | |||
436 | unsigned int meta_row_height[], | |||
437 | unsigned int meta_req_width[], | |||
438 | unsigned int meta_req_height[], | |||
439 | int dpte_group_bytes[], | |||
440 | unsigned int PTERequestSizeY[], | |||
441 | unsigned int PTERequestSizeC[], | |||
442 | unsigned int PixelPTEReqWidthY[], | |||
443 | unsigned int PixelPTEReqHeightY[], | |||
444 | unsigned int PixelPTEReqWidthC[], | |||
445 | unsigned int PixelPTEReqHeightC[], | |||
446 | unsigned int dpte_row_width_luma_ub[], | |||
447 | unsigned int dpte_row_width_chroma_ub[], | |||
448 | unsigned int vm_group_bytes[], | |||
449 | unsigned int dpde0_bytes_per_frame_ub_l[], | |||
450 | unsigned int dpde0_bytes_per_frame_ub_c[], | |||
451 | unsigned int meta_pte_bytes_per_frame_ub_l[], | |||
452 | unsigned int meta_pte_bytes_per_frame_ub_c[], | |||
453 | double DST_Y_PER_PTE_ROW_NOM_L[], | |||
454 | double DST_Y_PER_PTE_ROW_NOM_C[], | |||
455 | double DST_Y_PER_META_ROW_NOM_L[], | |||
456 | double TimePerMetaChunkNominal[], | |||
457 | double TimePerMetaChunkVBlank[], | |||
458 | double TimePerMetaChunkFlip[], | |||
459 | double time_per_pte_group_nom_luma[], | |||
460 | double time_per_pte_group_vblank_luma[], | |||
461 | double time_per_pte_group_flip_luma[], | |||
462 | double time_per_pte_group_nom_chroma[], | |||
463 | double time_per_pte_group_vblank_chroma[], | |||
464 | double time_per_pte_group_flip_chroma[], | |||
465 | double TimePerVMGroupVBlank[], | |||
466 | double TimePerVMGroupFlip[], | |||
467 | double TimePerVMRequestVBlank[], | |||
468 | double TimePerVMRequestFlip[]); | |||
469 | ||||
470 | static double CalculateExtraLatency( | |||
471 | double UrgentRoundTripAndOutOfOrderLatency, | |||
472 | int TotalNumberOfActiveDPP, | |||
473 | int PixelChunkSizeInKByte, | |||
474 | int TotalNumberOfDCCActiveDPP, | |||
475 | int MetaChunkSize, | |||
476 | double ReturnBW, | |||
477 | bool_Bool GPUVMEnable, | |||
478 | bool_Bool HostVMEnable, | |||
479 | int NumberOfActivePlanes, | |||
480 | int NumberOfDPP[], | |||
481 | int dpte_group_bytes[], | |||
482 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
483 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
484 | int HostVMMaxPageTableLevels, | |||
485 | int HostVMCachedPageTableLevels); | |||
486 | ||||
487 | void dml21_recalculate(struct display_mode_lib *mode_lib) | |||
488 | { | |||
489 | ModeSupportAndSystemConfiguration(mode_lib); | |||
490 | PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); | |||
491 | DisplayPipeConfiguration(mode_lib); | |||
492 | DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); | |||
493 | } | |||
494 | ||||
495 | static unsigned int dscceComputeDelay( | |||
496 | unsigned int bpc, | |||
497 | double bpp, | |||
498 | unsigned int sliceWidth, | |||
499 | unsigned int numSlices, | |||
500 | enum output_format_class pixelFormat) | |||
501 | { | |||
502 | // valid bpc = source bits per component in the set of {8, 10, 12} | |||
503 | // valid bpp = increments of 1/16 of a bit | |||
504 | // min = 6/7/8 in N420/N422/444, respectively | |||
505 | // max = such that compression is 1:1 | |||
506 | //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) | |||
507 | //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} | |||
508 | //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} | |||
509 | ||||
510 | // fixed value | |||
511 | unsigned int rcModelSize = 8192; | |||
512 | ||||
513 | // N422/N420 operate at 2 pixels per clock | |||
514 | unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, S, ix, wx, p, l0, a, ax, l, | |||
515 | Delay, pixels; | |||
516 | ||||
517 | if (pixelFormat == dm_n422 || pixelFormat == dm_420) | |||
518 | pixelsPerClock = 2; | |||
519 | // #all other modes operate at 1 pixel per clock | |||
520 | else | |||
521 | pixelsPerClock = 1; | |||
522 | ||||
523 | //initial transmit delay as per PPS | |||
524 | initalXmitDelay = dml_round(rcModelSize / 2.0 / bpp / pixelsPerClock); | |||
525 | ||||
526 | //compute ssm delay | |||
527 | if (bpc == 8) | |||
528 | D = 81; | |||
529 | else if (bpc == 10) | |||
530 | D = 89; | |||
531 | else | |||
532 | D = 113; | |||
533 | ||||
534 | //divide by pixel per cycle to compute slice width as seen by DSC | |||
535 | w = sliceWidth / pixelsPerClock; | |||
536 | ||||
537 | //422 mode has an additional cycle of delay | |||
538 | if (pixelFormat == dm_s422) | |||
539 | S = 1; | |||
540 | else | |||
541 | S = 0; | |||
542 | ||||
543 | //main calculation for the dscce | |||
544 | ix = initalXmitDelay + 45; | |||
545 | wx = (w + 2) / 3; | |||
546 | p = 3 * wx - w; | |||
547 | l0 = ix / w; | |||
548 | a = ix + p * l0; | |||
549 | ax = (a + 2) / 3 + D + 6 + 1; | |||
550 | l = (ax + wx - 1) / wx; | |||
551 | if ((ix % w) == 0 && p != 0) | |||
552 | lstall = 1; | |||
553 | else | |||
554 | lstall = 0; | |||
555 | Delay = l * wx * (numSlices - 1) + ax + S + lstall + 22; | |||
556 | ||||
557 | //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels | |||
558 | pixels = Delay * 3 * pixelsPerClock; | |||
559 | return pixels; | |||
560 | } | |||
561 | ||||
562 | static unsigned int dscComputeDelay(enum output_format_class pixelFormat) | |||
563 | { | |||
564 | unsigned int Delay = 0; | |||
565 | ||||
566 | if (pixelFormat == dm_420) { | |||
567 | // sfr | |||
568 | Delay = Delay + 2; | |||
569 | // dsccif | |||
570 | Delay = Delay + 0; | |||
571 | // dscc - input deserializer | |||
572 | Delay = Delay + 3; | |||
573 | // dscc gets pixels every other cycle | |||
574 | Delay = Delay + 2; | |||
575 | // dscc - input cdc fifo | |||
576 | Delay = Delay + 12; | |||
577 | // dscc gets pixels every other cycle | |||
578 | Delay = Delay + 13; | |||
579 | // dscc - cdc uncertainty | |||
580 | Delay = Delay + 2; | |||
581 | // dscc - output cdc fifo | |||
582 | Delay = Delay + 7; | |||
583 | // dscc gets pixels every other cycle | |||
584 | Delay = Delay + 3; | |||
585 | // dscc - cdc uncertainty | |||
586 | Delay = Delay + 2; | |||
587 | // dscc - output serializer | |||
588 | Delay = Delay + 1; | |||
589 | // sft | |||
590 | Delay = Delay + 1; | |||
591 | } else if (pixelFormat == dm_n422) { | |||
592 | // sfr | |||
593 | Delay = Delay + 2; | |||
594 | // dsccif | |||
595 | Delay = Delay + 1; | |||
596 | // dscc - input deserializer | |||
597 | Delay = Delay + 5; | |||
598 | // dscc - input cdc fifo | |||
599 | Delay = Delay + 25; | |||
600 | // dscc - cdc uncertainty | |||
601 | Delay = Delay + 2; | |||
602 | // dscc - output cdc fifo | |||
603 | Delay = Delay + 10; | |||
604 | // dscc - cdc uncertainty | |||
605 | Delay = Delay + 2; | |||
606 | // dscc - output serializer | |||
607 | Delay = Delay + 1; | |||
608 | // sft | |||
609 | Delay = Delay + 1; | |||
610 | } else { | |||
611 | // sfr | |||
612 | Delay = Delay + 2; | |||
613 | // dsccif | |||
614 | Delay = Delay + 0; | |||
615 | // dscc - input deserializer | |||
616 | Delay = Delay + 3; | |||
617 | // dscc - input cdc fifo | |||
618 | Delay = Delay + 12; | |||
619 | // dscc - cdc uncertainty | |||
620 | Delay = Delay + 2; | |||
621 | // dscc - output cdc fifo | |||
622 | Delay = Delay + 7; | |||
623 | // dscc - output serializer | |||
624 | Delay = Delay + 1; | |||
625 | // dscc - cdc uncertainty | |||
626 | Delay = Delay + 2; | |||
627 | // sft | |||
628 | Delay = Delay + 1; | |||
629 | } | |||
630 | ||||
631 | return Delay; | |||
632 | } | |||
633 | ||||
634 | static bool_Bool CalculatePrefetchSchedule( | |||
635 | struct display_mode_lib *mode_lib, | |||
636 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
637 | double PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
638 | Pipe *myPipe, | |||
639 | unsigned int DSCDelay, | |||
640 | double DPPCLKDelaySubtotal, | |||
641 | double DPPCLKDelaySCL, | |||
642 | double DPPCLKDelaySCLLBOnly, | |||
643 | double DPPCLKDelayCNVCFormater, | |||
644 | double DPPCLKDelayCNVCCursor, | |||
645 | double DISPCLKDelaySubtotal, | |||
646 | unsigned int ScalerRecoutWidth, | |||
647 | enum output_format_class OutputFormat, | |||
648 | unsigned int MaxInterDCNTileRepeaters, | |||
649 | unsigned int VStartup, | |||
650 | unsigned int MaxVStartup, | |||
651 | unsigned int GPUVMPageTableLevels, | |||
652 | bool_Bool GPUVMEnable, | |||
653 | HostVM *myHostVM, | |||
654 | bool_Bool DynamicMetadataEnable, | |||
655 | int DynamicMetadataLinesBeforeActiveRequired, | |||
656 | unsigned int DynamicMetadataTransmittedBytes, | |||
657 | bool_Bool DCCEnable, | |||
658 | double UrgentLatency, | |||
659 | double UrgentExtraLatency, | |||
660 | double TCalc, | |||
661 | unsigned int PDEAndMetaPTEBytesFrame, | |||
662 | unsigned int MetaRowByte, | |||
663 | unsigned int PixelPTEBytesPerRow, | |||
664 | double PrefetchSourceLinesY, | |||
665 | unsigned int SwathWidthY, | |||
666 | double BytePerPixelDETY, | |||
667 | double VInitPreFillY, | |||
668 | unsigned int MaxNumSwathY, | |||
669 | double PrefetchSourceLinesC, | |||
670 | double BytePerPixelDETC, | |||
671 | double VInitPreFillC, | |||
672 | unsigned int MaxNumSwathC, | |||
673 | unsigned int SwathHeightY, | |||
674 | unsigned int SwathHeightC, | |||
675 | double TWait, | |||
676 | bool_Bool XFCEnabled, | |||
677 | double XFCRemoteSurfaceFlipDelay, | |||
678 | bool_Bool ProgressiveToInterlaceUnitInOPP, | |||
679 | double *DSTXAfterScaler, | |||
680 | double *DSTYAfterScaler, | |||
681 | double *DestinationLinesForPrefetch, | |||
682 | double *PrefetchBandwidth, | |||
683 | double *DestinationLinesToRequestVMInVBlank, | |||
684 | double *DestinationLinesToRequestRowInVBlank, | |||
685 | double *VRatioPrefetchY, | |||
686 | double *VRatioPrefetchC, | |||
687 | double *RequiredPrefetchPixDataBWLuma, | |||
688 | double *RequiredPrefetchPixDataBWChroma, | |||
689 | unsigned int *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, | |||
690 | double *Tno_bw, | |||
691 | double *prefetch_vmrow_bw, | |||
692 | unsigned int *swath_width_luma_ub, | |||
693 | unsigned int *swath_width_chroma_ub, | |||
694 | unsigned int *VUpdateOffsetPix, | |||
695 | double *VUpdateWidthPix, | |||
696 | double *VReadyOffsetPix) | |||
697 | { | |||
698 | bool_Bool MyError = false0; | |||
699 | unsigned int DPPCycles, DISPCLKCycles; | |||
700 | double DSTTotalPixelsAfterScaler, TotalRepeaterDelayTime; | |||
701 | double Tdm, LineTime, Tsetup; | |||
702 | double dst_y_prefetch_equ; | |||
703 | double Tsw_oto; | |||
704 | double prefetch_bw_oto; | |||
705 | double Tvm_oto; | |||
706 | double Tr0_oto; | |||
707 | double Tvm_oto_lines; | |||
708 | double Tr0_oto_lines; | |||
709 | double Tsw_oto_lines; | |||
710 | double dst_y_prefetch_oto; | |||
711 | double TimeForFetchingMetaPTE = 0; | |||
712 | double TimeForFetchingRowInVBlank = 0; | |||
713 | double LinesToRequestPrefetchPixelData = 0; | |||
714 | double HostVMInefficiencyFactor; | |||
715 | unsigned int HostVMDynamicLevels; | |||
716 | ||||
717 | if (GPUVMEnable == true1 && myHostVM->Enable == true1) { | |||
718 | HostVMInefficiencyFactor = | |||
719 | PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData | |||
720 | / PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly; | |||
721 | HostVMDynamicLevels = myHostVM->MaxPageTableLevels | |||
722 | - myHostVM->CachedPageTableLevels; | |||
723 | } else { | |||
724 | HostVMInefficiencyFactor = 1; | |||
725 | HostVMDynamicLevels = 0; | |||
726 | } | |||
727 | ||||
728 | if (myPipe->ScalerEnabled) | |||
729 | DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCL; | |||
730 | else | |||
731 | DPPCycles = DPPCLKDelaySubtotal + DPPCLKDelaySCLLBOnly; | |||
732 | ||||
733 | DPPCycles = DPPCycles + DPPCLKDelayCNVCFormater + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; | |||
734 | ||||
735 | DISPCLKCycles = DISPCLKDelaySubtotal; | |||
736 | ||||
737 | if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) | |||
738 | return true1; | |||
739 | ||||
740 | *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK | |||
741 | + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; | |||
742 | ||||
743 | if (myPipe->DPPPerPlane > 1) | |||
744 | *DSTXAfterScaler = *DSTXAfterScaler + ScalerRecoutWidth; | |||
745 | ||||
746 | if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && ProgressiveToInterlaceUnitInOPP)) | |||
747 | *DSTYAfterScaler = 1; | |||
748 | else | |||
749 | *DSTYAfterScaler = 0; | |||
750 | ||||
751 | DSTTotalPixelsAfterScaler = ((double) (*DSTYAfterScaler * myPipe->HTotal)) + *DSTXAfterScaler; | |||
752 | *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); | |||
753 | *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); | |||
754 | ||||
755 | *VUpdateOffsetPix = dml_ceil(myPipe->HTotal / 4.0, 1); | |||
756 | TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2.0 / myPipe->DPPCLK + 3.0 / myPipe->DISPCLK); | |||
757 | *VUpdateWidthPix = (14.0 / myPipe->DCFCLKDeepSleep + 12.0 / myPipe->DPPCLK + TotalRepeaterDelayTime) | |||
758 | * myPipe->PixelClock; | |||
759 | ||||
760 | *VReadyOffsetPix = dml_max( | |||
761 | 150.0 / myPipe->DPPCLK, | |||
762 | TotalRepeaterDelayTime + 20.0 / myPipe->DCFCLKDeepSleep + 10.0 / myPipe->DPPCLK) | |||
763 | * myPipe->PixelClock; | |||
764 | ||||
765 | Tsetup = (double) (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / myPipe->PixelClock; | |||
766 | ||||
767 | LineTime = (double) myPipe->HTotal / myPipe->PixelClock; | |||
768 | ||||
769 | if (DynamicMetadataEnable) { | |||
770 | double Tdmbf, Tdmec, Tdmsks; | |||
771 | ||||
772 | Tdm = dml_max(0.0, UrgentExtraLatency - TCalc); | |||
773 | Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / myPipe->DISPCLK; | |||
774 | Tdmec = LineTime; | |||
775 | if (DynamicMetadataLinesBeforeActiveRequired == -1) | |||
776 | Tdmsks = myPipe->VBlank * LineTime / 2.0; | |||
777 | else | |||
778 | Tdmsks = DynamicMetadataLinesBeforeActiveRequired * LineTime; | |||
779 | if (myPipe->InterlaceEnable && !ProgressiveToInterlaceUnitInOPP) | |||
780 | Tdmsks = Tdmsks / 2; | |||
781 | if (VStartup * LineTime | |||
782 | < Tsetup + TWait + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) { | |||
783 | MyError = true1; | |||
784 | *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = (Tsetup + TWait | |||
785 | + UrgentExtraLatency + Tdmbf + Tdmec + Tdmsks) / LineTime; | |||
786 | } else | |||
787 | *VStartupRequiredWhenNotEnoughTimeForDynamicMetadata = 0.0; | |||
788 | } else | |||
789 | Tdm = 0; | |||
790 | ||||
791 | if (GPUVMEnable) { | |||
792 | if (GPUVMPageTableLevels >= 3) | |||
793 | *Tno_bw = UrgentExtraLatency + UrgentLatency * ((GPUVMPageTableLevels - 2) * (myHostVM->MaxPageTableLevels + 1) - 1); | |||
794 | else | |||
795 | *Tno_bw = 0; | |||
796 | } else if (!DCCEnable) | |||
797 | *Tno_bw = LineTime; | |||
798 | else | |||
799 | *Tno_bw = LineTime / 4; | |||
800 | ||||
801 | dst_y_prefetch_equ = VStartup - dml_max(TCalc + TWait, XFCRemoteSurfaceFlipDelay) / LineTime | |||
802 | - (Tsetup + Tdm) / LineTime | |||
803 | - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); | |||
804 | ||||
805 | Tsw_oto = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; | |||
806 | ||||
807 | if (myPipe->SourceScan == dm_horz) { | |||
808 | *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY; | |||
809 | if (myPipe->BlockWidth256BytesC > 0) | |||
810 | *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC; | |||
811 | } else { | |||
812 | *swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY; | |||
813 | if (myPipe->BlockHeight256BytesC > 0) | |||
814 | *swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC; | |||
815 | } | |||
816 | ||||
817 | prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto; | |||
818 | ||||
819 | ||||
820 | if (GPUVMEnable == true1) { | |||
821 | Tvm_oto = dml_max(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, | |||
822 | dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), | |||
823 | LineTime / 4.0)); | |||
824 | } else | |||
825 | Tvm_oto = LineTime / 4.0; | |||
826 | ||||
827 | if ((GPUVMEnable == true1 || DCCEnable == true1)) { | |||
828 | Tr0_oto = dml_max( | |||
829 | (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, | |||
830 | dml_max(UrgentLatency * (HostVMDynamicLevels + 1), dml_max(LineTime - Tvm_oto, LineTime / 4))); | |||
831 | } else | |||
832 | Tr0_oto = (LineTime - Tvm_oto) / 2.0; | |||
833 | ||||
834 | Tvm_oto_lines = dml_ceil(4 * Tvm_oto / LineTime, 1) / 4.0; | |||
835 | Tr0_oto_lines = dml_ceil(4 * Tr0_oto / LineTime, 1) / 4.0; | |||
836 | Tsw_oto_lines = dml_ceil(4 * Tsw_oto / LineTime, 1) / 4.0; | |||
837 | dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Tsw_oto_lines + 0.75; | |||
838 | ||||
839 | dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; | |||
840 | ||||
841 | if (dst_y_prefetch_oto < dst_y_prefetch_equ) | |||
842 | *DestinationLinesForPrefetch = dst_y_prefetch_oto; | |||
843 | else | |||
844 | *DestinationLinesForPrefetch = dst_y_prefetch_equ; | |||
845 | ||||
846 | // Limit to prevent overflow in DST_Y_PREFETCH register | |||
847 | *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75); | |||
848 | ||||
849 | dml_print("DML: VStartup: %d\n", VStartup){do { } while(0); }; | |||
850 | dml_print("DML: TCalc: %f\n", TCalc){do { } while(0); }; | |||
851 | dml_print("DML: TWait: %f\n", TWait){do { } while(0); }; | |||
852 | dml_print("DML: XFCRemoteSurfaceFlipDelay: %f\n", XFCRemoteSurfaceFlipDelay){do { } while(0); }; | |||
853 | dml_print("DML: LineTime: %f\n", LineTime){do { } while(0); }; | |||
854 | dml_print("DML: Tsetup: %f\n", Tsetup){do { } while(0); }; | |||
855 | dml_print("DML: Tdm: %f\n", Tdm){do { } while(0); }; | |||
856 | dml_print("DML: DSTYAfterScaler: %f\n", *DSTYAfterScaler){do { } while(0); }; | |||
857 | dml_print("DML: DSTXAfterScaler: %f\n", *DSTXAfterScaler){do { } while(0); }; | |||
858 | dml_print("DML: HTotal: %d\n", myPipe->HTotal){do { } while(0); }; | |||
859 | ||||
860 | *PrefetchBandwidth = 0; | |||
861 | *DestinationLinesToRequestVMInVBlank = 0; | |||
862 | *DestinationLinesToRequestRowInVBlank = 0; | |||
863 | *VRatioPrefetchY = 0; | |||
864 | *VRatioPrefetchC = 0; | |||
865 | *RequiredPrefetchPixDataBWLuma = 0; | |||
866 | if (*DestinationLinesForPrefetch > 1) { | |||
867 | double PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte | |||
868 | + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor | |||
869 | + PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) | |||
870 | + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) | |||
871 | / (*DestinationLinesForPrefetch * LineTime - *Tno_bw); | |||
872 | ||||
873 | double PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * | |||
874 | HostVMInefficiencyFactor + PrefetchSourceLinesY * | |||
875 | *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + | |||
876 | PrefetchSourceLinesC * *swath_width_chroma_ub * | |||
877 | dml_ceil(BytePerPixelDETC, 2)) / | |||
878 | (*DestinationLinesForPrefetch * LineTime - *Tno_bw - 2 * | |||
879 | UrgentLatency * (1 + HostVMDynamicLevels)); | |||
880 | ||||
881 | double PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow | |||
882 | * HostVMInefficiencyFactor + PrefetchSourceLinesY * | |||
883 | *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + | |||
884 | PrefetchSourceLinesC * *swath_width_chroma_ub * | |||
885 | dml_ceil(BytePerPixelDETC, 2)) / | |||
886 | (*DestinationLinesForPrefetch * LineTime - | |||
887 | UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels | |||
888 | * (HostVMDynamicLevels + 1) - 1)); | |||
889 | ||||
890 | double PrefetchBandwidth4 = (PrefetchSourceLinesY * *swath_width_luma_ub * | |||
891 | dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * | |||
892 | *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / | |||
893 | (*DestinationLinesForPrefetch * LineTime - | |||
894 | UrgentExtraLatency - UrgentLatency * (GPUVMPageTableLevels | |||
895 | * (HostVMDynamicLevels + 1) - 1) - 2 * UrgentLatency * | |||
896 | (1 + HostVMDynamicLevels)); | |||
897 | ||||
898 | if (VStartup == MaxVStartup && (PrefetchBandwidth1 > 4 * prefetch_bw_oto) && (*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw > 0) { | |||
899 | PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / ((*DestinationLinesForPrefetch - dml_ceil(Tsw_oto_lines, 1) / 4.0 - 0.75) * LineTime - *Tno_bw); | |||
900 | } | |||
901 | if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= UrgentLatency * (1 + HostVMDynamicLevels)) { | |||
902 | *PrefetchBandwidth = PrefetchBandwidth1; | |||
903 | } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < UrgentLatency * (1 + HostVMDynamicLevels)) { | |||
904 | *PrefetchBandwidth = PrefetchBandwidth2; | |||
905 | } else if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1) && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= UrgentLatency * (1 + HostVMDynamicLevels)) { | |||
906 | *PrefetchBandwidth = PrefetchBandwidth3; | |||
907 | } else { | |||
908 | *PrefetchBandwidth = PrefetchBandwidth4; | |||
909 | } | |||
910 | ||||
911 | if (GPUVMEnable) { | |||
912 | TimeForFetchingMetaPTE = dml_max(*Tno_bw + (double) PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / *PrefetchBandwidth, | |||
913 | dml_max(UrgentExtraLatency + UrgentLatency * (GPUVMPageTableLevels * (HostVMDynamicLevels + 1) - 1), LineTime / 4)); | |||
914 | } else { | |||
915 | // 5/30/2018 - This was an optimization requested from Sy but now NumberOfCursors is no longer a factor | |||
916 | // so if this needs to be reinstated, then it should be officially done in the VBA code as well. | |||
917 | // if (mode_lib->NumberOfCursors > 0 || XFCEnabled) | |||
918 | TimeForFetchingMetaPTE = LineTime / 4; | |||
919 | // else | |||
920 | // TimeForFetchingMetaPTE = 0.0; | |||
921 | } | |||
922 | ||||
923 | if ((GPUVMEnable == true1 || DCCEnable == true1)) { | |||
924 | TimeForFetchingRowInVBlank = | |||
925 | dml_max( | |||
926 | (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) | |||
927 | / *PrefetchBandwidth, | |||
928 | dml_max( | |||
929 | UrgentLatency * (1 + HostVMDynamicLevels), | |||
930 | dml_max( | |||
931 | (LineTime | |||
932 | - TimeForFetchingMetaPTE) / 2.0, | |||
933 | LineTime | |||
934 | / 4.0))); | |||
935 | } else { | |||
936 | // See note above dated 5/30/2018 | |||
937 | // if (NumberOfCursors > 0 || XFCEnabled) | |||
938 | TimeForFetchingRowInVBlank = (LineTime - TimeForFetchingMetaPTE) / 2.0; | |||
939 | // else // TODO: Did someone else add this?? | |||
940 | // TimeForFetchingRowInVBlank = 0.0; | |||
941 | } | |||
942 | ||||
943 | *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; | |||
944 | ||||
945 | *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; | |||
946 | ||||
947 | LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch | |||
948 | // See note above dated 5/30/2018 | |||
949 | // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? | |||
950 | - ((GPUVMEnable || DCCEnable) ? | |||
951 | (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : | |||
952 | 0.0); // TODO: Did someone else add this?? | |||
953 | ||||
954 | if (LinesToRequestPrefetchPixelData > 0) { | |||
955 | ||||
956 | *VRatioPrefetchY = (double) PrefetchSourceLinesY | |||
957 | / LinesToRequestPrefetchPixelData; | |||
958 | *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); | |||
959 | if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { | |||
960 | if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { | |||
961 | *VRatioPrefetchY = | |||
962 | dml_max( | |||
963 | (double) PrefetchSourceLinesY | |||
964 | / LinesToRequestPrefetchPixelData, | |||
965 | (double) MaxNumSwathY | |||
966 | * SwathHeightY | |||
967 | / (LinesToRequestPrefetchPixelData | |||
968 | - (VInitPreFillY | |||
969 | - 3.0) | |||
970 | / 2.0)); | |||
971 | *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); | |||
972 | } else { | |||
973 | MyError = true1; | |||
974 | *VRatioPrefetchY = 0; | |||
975 | } | |||
976 | } | |||
977 | ||||
978 | *VRatioPrefetchC = (double) PrefetchSourceLinesC | |||
979 | / LinesToRequestPrefetchPixelData; | |||
980 | *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); | |||
981 | ||||
982 | if ((SwathHeightC > 4)) { | |||
983 | if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { | |||
984 | *VRatioPrefetchC = | |||
985 | dml_max( | |||
986 | *VRatioPrefetchC, | |||
987 | (double) MaxNumSwathC | |||
988 | * SwathHeightC | |||
989 | / (LinesToRequestPrefetchPixelData | |||
990 | - (VInitPreFillC | |||
991 | - 3.0) | |||
992 | / 2.0)); | |||
993 | *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); | |||
994 | } else { | |||
995 | MyError = true1; | |||
996 | *VRatioPrefetchC = 0; | |||
997 | } | |||
998 | } | |||
999 | ||||
1000 | *RequiredPrefetchPixDataBWLuma = myPipe->DPPPerPlane | |||
1001 | * (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData | |||
1002 | * dml_ceil(BytePerPixelDETY, 1) | |||
1003 | * *swath_width_luma_ub / LineTime; | |||
1004 | *RequiredPrefetchPixDataBWChroma = myPipe->DPPPerPlane | |||
1005 | * (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData | |||
1006 | * dml_ceil(BytePerPixelDETC, 2) | |||
1007 | * *swath_width_chroma_ub / LineTime; | |||
1008 | } else { | |||
1009 | MyError = true1; | |||
1010 | *VRatioPrefetchY = 0; | |||
1011 | *VRatioPrefetchC = 0; | |||
1012 | *RequiredPrefetchPixDataBWLuma = 0; | |||
1013 | *RequiredPrefetchPixDataBWChroma = 0; | |||
1014 | } | |||
1015 | ||||
1016 | dml_print("DML: Tvm: %fus\n", TimeForFetchingMetaPTE){do { } while(0); }; | |||
1017 | dml_print("DML: Tr0: %fus\n", TimeForFetchingRowInVBlank){do { } while(0); }; | |||
1018 | dml_print("DML: Tsw: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime - TimeForFetchingMetaPTE - TimeForFetchingRowInVBlank){do { } while(0); }; | |||
1019 | dml_print("DML: Tpre: %fus\n", (double)(*DestinationLinesForPrefetch) * LineTime){do { } while(0); }; | |||
1020 | dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow){do { } while(0); }; | |||
1021 | ||||
1022 | } else { | |||
1023 | MyError = true1; | |||
1024 | } | |||
1025 | ||||
1026 | { | |||
1027 | double prefetch_vm_bw; | |||
1028 | double prefetch_row_bw; | |||
1029 | ||||
1030 | if (PDEAndMetaPTEBytesFrame == 0) { | |||
1031 | prefetch_vm_bw = 0; | |||
1032 | } else if (*DestinationLinesToRequestVMInVBlank > 0) { | |||
1033 | prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); | |||
1034 | } else { | |||
1035 | prefetch_vm_bw = 0; | |||
1036 | MyError = true1; | |||
1037 | } | |||
1038 | if (MetaRowByte + PixelPTEBytesPerRow == 0) { | |||
1039 | prefetch_row_bw = 0; | |||
1040 | } else if (*DestinationLinesToRequestRowInVBlank > 0) { | |||
1041 | prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); | |||
1042 | } else { | |||
1043 | prefetch_row_bw = 0; | |||
1044 | MyError = true1; | |||
1045 | } | |||
1046 | ||||
1047 | *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); | |||
1048 | } | |||
1049 | ||||
1050 | if (MyError) { | |||
1051 | *PrefetchBandwidth = 0; | |||
1052 | TimeForFetchingMetaPTE = 0; | |||
1053 | TimeForFetchingRowInVBlank = 0; | |||
1054 | *DestinationLinesToRequestVMInVBlank = 0; | |||
1055 | *DestinationLinesToRequestRowInVBlank = 0; | |||
1056 | *DestinationLinesForPrefetch = 0; | |||
1057 | LinesToRequestPrefetchPixelData = 0; | |||
1058 | *VRatioPrefetchY = 0; | |||
1059 | *VRatioPrefetchC = 0; | |||
1060 | *RequiredPrefetchPixDataBWLuma = 0; | |||
1061 | *RequiredPrefetchPixDataBWChroma = 0; | |||
1062 | } | |||
1063 | ||||
1064 | return MyError; | |||
1065 | } | |||
1066 | ||||
1067 | static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) | |||
1068 | { | |||
1069 | return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); | |||
1070 | } | |||
1071 | ||||
1072 | static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) | |||
1073 | { | |||
1074 | return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4 / Clock, 1); | |||
1075 | } | |||
1076 | ||||
1077 | static double CalculateDCCConfiguration( | |||
1078 | bool_Bool DCCEnabled, | |||
1079 | bool_Bool DCCProgrammingAssumesScanDirectionUnknown, | |||
1080 | unsigned int ViewportWidth, | |||
1081 | unsigned int ViewportHeight, | |||
1082 | unsigned int DETBufferSize, | |||
1083 | unsigned int RequestHeight256Byte, | |||
1084 | unsigned int SwathHeight, | |||
1085 | enum dm_swizzle_mode TilingFormat, | |||
1086 | unsigned int BytePerPixel, | |||
1087 | enum scan_direction_class ScanOrientation, | |||
1088 | unsigned int *MaxUncompressedBlock, | |||
1089 | unsigned int *MaxCompressedBlock, | |||
1090 | unsigned int *Independent64ByteBlock) | |||
1091 | { | |||
1092 | double MaximumDCCCompressionSurface = 0.0; | |||
1093 | enum { | |||
1094 | REQ_256Bytes, | |||
1095 | REQ_128BytesNonContiguous, | |||
1096 | REQ_128BytesContiguous, | |||
1097 | REQ_NA | |||
1098 | } Request = REQ_NA; | |||
1099 | ||||
1100 | if (DCCEnabled == true1) { | |||
1101 | if (DCCProgrammingAssumesScanDirectionUnknown == true1) { | |||
1102 | if (DETBufferSize >= RequestHeight256Byte * ViewportWidth * BytePerPixel | |||
1103 | && DETBufferSize | |||
1104 | >= 256 / RequestHeight256Byte | |||
1105 | * ViewportHeight) { | |||
1106 | Request = REQ_256Bytes; | |||
1107 | } else if ((DETBufferSize | |||
1108 | < RequestHeight256Byte * ViewportWidth * BytePerPixel | |||
1109 | && (BytePerPixel == 2 || BytePerPixel == 4)) | |||
1110 | || (DETBufferSize | |||
1111 | < 256 / RequestHeight256Byte | |||
1112 | * ViewportHeight | |||
1113 | && BytePerPixel == 8 | |||
1114 | && (TilingFormat == dm_sw_4kb_d | |||
1115 | || TilingFormat | |||
1116 | == dm_sw_4kb_d_x | |||
1117 | || TilingFormat | |||
1118 | == dm_sw_var_d | |||
1119 | || TilingFormat | |||
1120 | == dm_sw_var_d_x | |||
1121 | || TilingFormat | |||
1122 | == dm_sw_64kb_d | |||
1123 | || TilingFormat | |||
1124 | == dm_sw_64kb_d_x | |||
1125 | || TilingFormat | |||
1126 | == dm_sw_64kb_d_t | |||
1127 | || TilingFormat | |||
1128 | == dm_sw_64kb_r_x))) { | |||
1129 | Request = REQ_128BytesNonContiguous; | |||
1130 | } else { | |||
1131 | Request = REQ_128BytesContiguous; | |||
1132 | } | |||
1133 | } else { | |||
1134 | if (BytePerPixel == 1) { | |||
1135 | if (ScanOrientation == dm_vert || SwathHeight == 16) { | |||
1136 | Request = REQ_256Bytes; | |||
1137 | } else { | |||
1138 | Request = REQ_128BytesContiguous; | |||
1139 | } | |||
1140 | } else if (BytePerPixel == 2) { | |||
1141 | if ((ScanOrientation == dm_vert && SwathHeight == 16) || (ScanOrientation != dm_vert && SwathHeight == 8)) { | |||
1142 | Request = REQ_256Bytes; | |||
1143 | } else if (ScanOrientation == dm_vert) { | |||
1144 | Request = REQ_128BytesContiguous; | |||
1145 | } else { | |||
1146 | Request = REQ_128BytesNonContiguous; | |||
1147 | } | |||
1148 | } else if (BytePerPixel == 4) { | |||
1149 | if (SwathHeight == 8) { | |||
1150 | Request = REQ_256Bytes; | |||
1151 | } else if (ScanOrientation == dm_vert) { | |||
1152 | Request = REQ_128BytesContiguous; | |||
1153 | } else { | |||
1154 | Request = REQ_128BytesNonContiguous; | |||
1155 | } | |||
1156 | } else if (BytePerPixel == 8) { | |||
1157 | if (TilingFormat == dm_sw_4kb_d || TilingFormat == dm_sw_4kb_d_x | |||
1158 | || TilingFormat == dm_sw_var_d | |||
1159 | || TilingFormat == dm_sw_var_d_x | |||
1160 | || TilingFormat == dm_sw_64kb_d | |||
1161 | || TilingFormat == dm_sw_64kb_d_x | |||
1162 | || TilingFormat == dm_sw_64kb_d_t | |||
1163 | || TilingFormat == dm_sw_64kb_r_x) { | |||
1164 | if ((ScanOrientation == dm_vert && SwathHeight == 8) | |||
1165 | || (ScanOrientation != dm_vert | |||
1166 | && SwathHeight == 4)) { | |||
1167 | Request = REQ_256Bytes; | |||
1168 | } else if (ScanOrientation != dm_vert) { | |||
1169 | Request = REQ_128BytesContiguous; | |||
1170 | } else { | |||
1171 | Request = REQ_128BytesNonContiguous; | |||
1172 | } | |||
1173 | } else { | |||
1174 | if (ScanOrientation != dm_vert || SwathHeight == 8) { | |||
1175 | Request = REQ_256Bytes; | |||
1176 | } else { | |||
1177 | Request = REQ_128BytesContiguous; | |||
1178 | } | |||
1179 | } | |||
1180 | } | |||
1181 | } | |||
1182 | } else { | |||
1183 | Request = REQ_NA; | |||
1184 | } | |||
1185 | ||||
1186 | if (Request == REQ_256Bytes) { | |||
1187 | *MaxUncompressedBlock = 256; | |||
1188 | *MaxCompressedBlock = 256; | |||
1189 | *Independent64ByteBlock = false0; | |||
1190 | MaximumDCCCompressionSurface = 4.0; | |||
1191 | } else if (Request == REQ_128BytesContiguous) { | |||
1192 | *MaxUncompressedBlock = 128; | |||
1193 | *MaxCompressedBlock = 128; | |||
1194 | *Independent64ByteBlock = false0; | |||
1195 | MaximumDCCCompressionSurface = 2.0; | |||
1196 | } else if (Request == REQ_128BytesNonContiguous) { | |||
1197 | *MaxUncompressedBlock = 256; | |||
1198 | *MaxCompressedBlock = 64; | |||
1199 | *Independent64ByteBlock = true1; | |||
1200 | MaximumDCCCompressionSurface = 4.0; | |||
1201 | } else { | |||
1202 | *MaxUncompressedBlock = 0; | |||
1203 | *MaxCompressedBlock = 0; | |||
1204 | *Independent64ByteBlock = 0; | |||
1205 | MaximumDCCCompressionSurface = 0.0; | |||
1206 | } | |||
1207 | ||||
1208 | return MaximumDCCCompressionSurface; | |||
1209 | } | |||
1210 | ||||
1211 | static double CalculatePrefetchSourceLines( | |||
1212 | struct display_mode_lib *mode_lib, | |||
1213 | double VRatio, | |||
1214 | double vtaps, | |||
1215 | bool_Bool Interlace, | |||
1216 | bool_Bool ProgressiveToInterlaceUnitInOPP, | |||
1217 | unsigned int SwathHeight, | |||
1218 | unsigned int ViewportYStart, | |||
1219 | double *VInitPreFill, | |||
1220 | unsigned int *MaxNumSwath) | |||
1221 | { | |||
1222 | unsigned int MaxPartialSwath; | |||
1223 | ||||
1224 | if (ProgressiveToInterlaceUnitInOPP) | |||
1225 | *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); | |||
1226 | else | |||
1227 | *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); | |||
1228 | ||||
1229 | if (!mode_lib->vba.IgnoreViewportPositioning) { | |||
1230 | ||||
1231 | *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; | |||
1232 | ||||
1233 | if (*VInitPreFill > 1.0) | |||
1234 | MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; | |||
1235 | else | |||
1236 | MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) | |||
1237 | % SwathHeight; | |||
1238 | MaxPartialSwath = dml_max(1U, MaxPartialSwath); | |||
1239 | ||||
1240 | } else { | |||
1241 | ||||
1242 | if (ViewportYStart != 0) | |||
1243 | dml_print({do { } while(0); } | |||
1244 | "WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"){do { } while(0); }; | |||
1245 | ||||
1246 | *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); | |||
1247 | ||||
1248 | if (*VInitPreFill > 1.0) | |||
1249 | MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; | |||
1250 | else | |||
1251 | MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) | |||
1252 | % SwathHeight; | |||
1253 | } | |||
1254 | ||||
1255 | return *MaxNumSwath * SwathHeight + MaxPartialSwath; | |||
1256 | } | |||
1257 | ||||
1258 | static unsigned int CalculateVMAndRowBytes( | |||
1259 | struct display_mode_lib *mode_lib, | |||
1260 | bool_Bool DCCEnable, | |||
1261 | unsigned int BlockHeight256Bytes, | |||
1262 | unsigned int BlockWidth256Bytes, | |||
1263 | enum source_format_class SourcePixelFormat, | |||
1264 | unsigned int SurfaceTiling, | |||
1265 | unsigned int BytePerPixel, | |||
1266 | enum scan_direction_class ScanDirection, | |||
1267 | unsigned int ViewportWidth, | |||
1268 | unsigned int ViewportHeight, | |||
1269 | unsigned int SwathWidth, | |||
1270 | bool_Bool GPUVMEnable, | |||
1271 | bool_Bool HostVMEnable, | |||
1272 | unsigned int HostVMMaxPageTableLevels, | |||
1273 | unsigned int HostVMCachedPageTableLevels, | |||
1274 | unsigned int VMMPageSize, | |||
1275 | unsigned int PTEBufferSizeInRequests, | |||
1276 | unsigned int Pitch, | |||
1277 | unsigned int DCCMetaPitch, | |||
1278 | unsigned int *MacroTileWidth, | |||
1279 | unsigned int *MetaRowByte, | |||
1280 | unsigned int *PixelPTEBytesPerRow, | |||
1281 | bool_Bool *PTEBufferSizeNotExceeded, | |||
1282 | unsigned int *dpte_row_width_ub, | |||
1283 | unsigned int *dpte_row_height, | |||
1284 | unsigned int *MetaRequestWidth, | |||
1285 | unsigned int *MetaRequestHeight, | |||
1286 | unsigned int *meta_row_width, | |||
1287 | unsigned int *meta_row_height, | |||
1288 | unsigned int *vm_group_bytes, | |||
1289 | unsigned int *dpte_group_bytes, | |||
1290 | unsigned int *PixelPTEReqWidth, | |||
1291 | unsigned int *PixelPTEReqHeight, | |||
1292 | unsigned int *PTERequestSize, | |||
1293 | unsigned int *DPDE0BytesFrame, | |||
1294 | unsigned int *MetaPTEBytesFrame) | |||
1295 | { | |||
1296 | unsigned int MPDEBytesFrame; | |||
1297 | unsigned int DCCMetaSurfaceBytes; | |||
1298 | unsigned int MacroTileSizeBytes; | |||
1299 | unsigned int MacroTileHeight; | |||
1300 | unsigned int ExtraDPDEBytesFrame; | |||
1301 | unsigned int PDEAndMetaPTEBytesFrame; | |||
1302 | unsigned int PixelPTEReqHeightPTEs = 0; | |||
1303 | ||||
1304 | if (DCCEnable == true1) { | |||
1305 | *MetaRequestHeight = 8 * BlockHeight256Bytes; | |||
1306 | *MetaRequestWidth = 8 * BlockWidth256Bytes; | |||
1307 | if (ScanDirection == dm_horz) { | |||
1308 | *meta_row_height = *MetaRequestHeight; | |||
1309 | *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) | |||
1310 | + *MetaRequestWidth; | |||
1311 | *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; | |||
1312 | } else { | |||
1313 | *meta_row_height = *MetaRequestWidth; | |||
1314 | *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) | |||
1315 | + *MetaRequestHeight; | |||
1316 | *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; | |||
1317 | } | |||
1318 | if (ScanDirection == dm_horz) { | |||
1319 | DCCMetaSurfaceBytes = DCCMetaPitch | |||
1320 | * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) | |||
1321 | + 64 * BlockHeight256Bytes) * BytePerPixel | |||
1322 | / 256; | |||
1323 | } else { | |||
1324 | DCCMetaSurfaceBytes = DCCMetaPitch | |||
1325 | * (dml_ceil( | |||
1326 | (double) ViewportHeight - 1, | |||
1327 | 64 * BlockHeight256Bytes) | |||
1328 | + 64 * BlockHeight256Bytes) * BytePerPixel | |||
1329 | / 256; | |||
1330 | } | |||
1331 | if (GPUVMEnable == true1) { | |||
1332 | *MetaPTEBytesFrame = (dml_ceil( | |||
1333 | (double) (DCCMetaSurfaceBytes - VMMPageSize) | |||
1334 | / (8 * VMMPageSize), | |||
1335 | 1) + 1) * 64; | |||
1336 | MPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 2); | |||
1337 | } else { | |||
1338 | *MetaPTEBytesFrame = 0; | |||
1339 | MPDEBytesFrame = 0; | |||
1340 | } | |||
1341 | } else { | |||
1342 | *MetaPTEBytesFrame = 0; | |||
1343 | MPDEBytesFrame = 0; | |||
1344 | *MetaRowByte = 0; | |||
1345 | } | |||
1346 | ||||
1347 | if (SurfaceTiling == dm_sw_linear || SurfaceTiling == dm_sw_gfx7_2d_thin_gl || SurfaceTiling == dm_sw_gfx7_2d_thin_l_vp) { | |||
1348 | MacroTileSizeBytes = 256; | |||
1349 | MacroTileHeight = BlockHeight256Bytes; | |||
1350 | } else if (SurfaceTiling == dm_sw_4kb_s || SurfaceTiling == dm_sw_4kb_s_x | |||
1351 | || SurfaceTiling == dm_sw_4kb_d || SurfaceTiling == dm_sw_4kb_d_x) { | |||
1352 | MacroTileSizeBytes = 4096; | |||
1353 | MacroTileHeight = 4 * BlockHeight256Bytes; | |||
1354 | } else if (SurfaceTiling == dm_sw_64kb_s || SurfaceTiling == dm_sw_64kb_s_t | |||
1355 | || SurfaceTiling == dm_sw_64kb_s_x || SurfaceTiling == dm_sw_64kb_d | |||
1356 | || SurfaceTiling == dm_sw_64kb_d_t || SurfaceTiling == dm_sw_64kb_d_x | |||
1357 | || SurfaceTiling == dm_sw_64kb_r_x) { | |||
1358 | MacroTileSizeBytes = 65536; | |||
1359 | MacroTileHeight = 16 * BlockHeight256Bytes; | |||
1360 | } else { | |||
1361 | MacroTileSizeBytes = 262144; | |||
1362 | MacroTileHeight = 32 * BlockHeight256Bytes; | |||
1363 | } | |||
1364 | *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; | |||
1365 | ||||
1366 | if (GPUVMEnable == true1 && (mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) > 2) { | |||
1367 | if (ScanDirection == dm_horz) { | |||
1368 | *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); | |||
1369 | } else { | |||
1370 | *DPDE0BytesFrame = 64 * (dml_ceil(((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); | |||
1371 | } | |||
1372 | ExtraDPDEBytesFrame = 128 * ((mode_lib->vba.GPUVMMaxPageTableLevels + 1) * (mode_lib->vba.HostVMMaxPageTableLevels + 1) - 3); | |||
1373 | } else { | |||
1374 | *DPDE0BytesFrame = 0; | |||
1375 | ExtraDPDEBytesFrame = 0; | |||
1376 | } | |||
1377 | ||||
1378 | PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame | |||
1379 | + ExtraDPDEBytesFrame; | |||
1380 | ||||
1381 | if (HostVMEnable == true1) { | |||
1382 | PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); | |||
1383 | } | |||
1384 | ||||
1385 | if (GPUVMEnable == true1) { | |||
1386 | double FractionOfPTEReturnDrop; | |||
1387 | ||||
1388 | if (SurfaceTiling == dm_sw_linear) { | |||
1389 | PixelPTEReqHeightPTEs = 1; | |||
1390 | *PixelPTEReqHeight = 1; | |||
1391 | *PixelPTEReqWidth = 8.0 * VMMPageSize / BytePerPixel; | |||
1392 | *PTERequestSize = 64; | |||
1393 | FractionOfPTEReturnDrop = 0; | |||
1394 | } else if (MacroTileSizeBytes == 4096) { | |||
1395 | PixelPTEReqHeightPTEs = 1; | |||
1396 | *PixelPTEReqHeight = MacroTileHeight; | |||
1397 | *PixelPTEReqWidth = 8 * *MacroTileWidth; | |||
1398 | *PTERequestSize = 64; | |||
1399 | if (ScanDirection == dm_horz) | |||
1400 | FractionOfPTEReturnDrop = 0; | |||
1401 | else | |||
1402 | FractionOfPTEReturnDrop = 7 / 8; | |||
1403 | } else if (VMMPageSize == 4096 && MacroTileSizeBytes > 4096) { | |||
1404 | PixelPTEReqHeightPTEs = 16; | |||
1405 | *PixelPTEReqHeight = 16 * BlockHeight256Bytes; | |||
1406 | *PixelPTEReqWidth = 16 * BlockWidth256Bytes; | |||
1407 | *PTERequestSize = 128; | |||
1408 | FractionOfPTEReturnDrop = 0; | |||
1409 | } else { | |||
1410 | PixelPTEReqHeightPTEs = 1; | |||
1411 | *PixelPTEReqHeight = MacroTileHeight; | |||
1412 | *PixelPTEReqWidth = 8 * *MacroTileWidth; | |||
1413 | *PTERequestSize = 64; | |||
1414 | FractionOfPTEReturnDrop = 0; | |||
1415 | } | |||
1416 | ||||
1417 | if (SurfaceTiling == dm_sw_linear) { | |||
1418 | *dpte_row_height = dml_min(128, | |||
1419 | 1 << (unsigned int) dml_floor( | |||
1420 | dml_log2( | |||
1421 | (double) PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), | |||
1422 | 1)); | |||
1423 | *dpte_row_width_ub = (dml_ceil((double) (Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; | |||
1424 | *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; | |||
1425 | } else if (ScanDirection == dm_horz) { | |||
1426 | *dpte_row_height = *PixelPTEReqHeight; | |||
1427 | *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; | |||
1428 | *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; | |||
1429 | } else { | |||
1430 | *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); | |||
1431 | *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; | |||
1432 | *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; | |||
1433 | } | |||
1434 | if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) | |||
1435 | <= 64 * PTEBufferSizeInRequests) { | |||
1436 | *PTEBufferSizeNotExceeded = true1; | |||
1437 | } else { | |||
1438 | *PTEBufferSizeNotExceeded = false0; | |||
1439 | } | |||
1440 | } else { | |||
1441 | *PixelPTEBytesPerRow = 0; | |||
1442 | *PTEBufferSizeNotExceeded = true1; | |||
1443 | } | |||
1444 | dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %d\n", *MetaPTEBytesFrame){do { } while(0); }; | |||
1445 | ||||
1446 | if (HostVMEnable == true1) { | |||
1447 | *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * (HostVMMaxPageTableLevels - HostVMCachedPageTableLevels)); | |||
1448 | } | |||
1449 | ||||
1450 | if (HostVMEnable == true1) { | |||
1451 | *vm_group_bytes = 512; | |||
1452 | *dpte_group_bytes = 512; | |||
1453 | } else if (GPUVMEnable == true1) { | |||
1454 | *vm_group_bytes = 2048; | |||
1455 | if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection != dm_horz) { | |||
1456 | *dpte_group_bytes = 512; | |||
1457 | } else { | |||
1458 | *dpte_group_bytes = 2048; | |||
1459 | } | |||
1460 | } else { | |||
1461 | *vm_group_bytes = 0; | |||
1462 | *dpte_group_bytes = 0; | |||
1463 | } | |||
1464 | ||||
1465 | return PDEAndMetaPTEBytesFrame; | |||
1466 | } | |||
1467 | ||||
1468 | static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( | |||
1469 | struct display_mode_lib *mode_lib) | |||
1470 | { | |||
1471 | struct vba_vars_st *locals = &mode_lib->vba; | |||
1472 | unsigned int j, k; | |||
1473 | ||||
1474 | mode_lib->vba.WritebackDISPCLK = 0.0; | |||
1475 | mode_lib->vba.DISPCLKWithRamping = 0; | |||
1476 | mode_lib->vba.DISPCLKWithoutRamping = 0; | |||
1477 | mode_lib->vba.GlobalDPPCLK = 0.0; | |||
1478 | ||||
1479 | // DISPCLK and DPPCLK Calculation | |||
1480 | // | |||
1481 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
| ||||
1482 | if (mode_lib->vba.WritebackEnable[k]) { | |||
1483 | mode_lib->vba.WritebackDISPCLK = | |||
1484 | dml_max( | |||
1485 | mode_lib->vba.WritebackDISPCLK, | |||
1486 | CalculateWriteBackDISPCLK( | |||
1487 | mode_lib->vba.WritebackPixelFormat[k], | |||
1488 | mode_lib->vba.PixelClock[k], | |||
1489 | mode_lib->vba.WritebackHRatio[k], | |||
1490 | mode_lib->vba.WritebackVRatio[k], | |||
1491 | mode_lib->vba.WritebackLumaHTaps[k], | |||
1492 | mode_lib->vba.WritebackLumaVTaps[k], | |||
1493 | mode_lib->vba.WritebackChromaHTaps[k], | |||
1494 | mode_lib->vba.WritebackChromaVTaps[k], | |||
1495 | mode_lib->vba.WritebackDestinationWidth[k], | |||
1496 | mode_lib->vba.HTotal[k], | |||
1497 | mode_lib->vba.WritebackChromaLineBufferWidth)); | |||
1498 | } | |||
1499 | } | |||
1500 | ||||
1501 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1502 | if (mode_lib->vba.HRatio[k] > 1) { | |||
1503 | locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( | |||
1504 | mode_lib->vba.MaxDCHUBToPSCLThroughput, | |||
1505 | mode_lib->vba.MaxPSCLToLBThroughput | |||
1506 | * mode_lib->vba.HRatio[k] | |||
1507 | / dml_ceil( | |||
1508 | mode_lib->vba.htaps[k] | |||
1509 | / 6.0, | |||
1510 | 1)); | |||
1511 | } else { | |||
1512 | locals->PSCL_THROUGHPUT_LUMA[k] = dml_min( | |||
1513 | mode_lib->vba.MaxDCHUBToPSCLThroughput, | |||
1514 | mode_lib->vba.MaxPSCLToLBThroughput); | |||
1515 | } | |||
1516 | ||||
1517 | mode_lib->vba.DPPCLKUsingSingleDPPLuma = | |||
1518 | mode_lib->vba.PixelClock[k] | |||
1519 | * dml_max( | |||
1520 | mode_lib->vba.vtaps[k] / 6.0 | |||
1521 | * dml_min( | |||
1522 | 1.0, | |||
1523 | mode_lib->vba.HRatio[k]), | |||
1524 | dml_max( | |||
1525 | mode_lib->vba.HRatio[k] | |||
1526 | * mode_lib->vba.VRatio[k] | |||
1527 | / locals->PSCL_THROUGHPUT_LUMA[k], | |||
1528 | 1.0)); | |||
1529 | ||||
1530 | if ((mode_lib->vba.htaps[k] > 6 || mode_lib->vba.vtaps[k] > 6) | |||
1531 | && mode_lib->vba.DPPCLKUsingSingleDPPLuma | |||
1532 | < 2 * mode_lib->vba.PixelClock[k]) { | |||
1533 | mode_lib->vba.DPPCLKUsingSingleDPPLuma = 2 * mode_lib->vba.PixelClock[k]; | |||
1534 | } | |||
1535 | ||||
1536 | if ((mode_lib->vba.SourcePixelFormat[k] != dm_420_8 | |||
1537 | && mode_lib->vba.SourcePixelFormat[k] != dm_420_10)) { | |||
1538 | locals->PSCL_THROUGHPUT_CHROMA[k] = 0.0; | |||
1539 | locals->DPPCLKUsingSingleDPP[k] = | |||
1540 | mode_lib->vba.DPPCLKUsingSingleDPPLuma; | |||
1541 | } else { | |||
1542 | if (mode_lib->vba.HRatio[k] > 1) { | |||
1543 | locals->PSCL_THROUGHPUT_CHROMA[k] = | |||
1544 | dml_min( | |||
1545 | mode_lib->vba.MaxDCHUBToPSCLThroughput, | |||
1546 | mode_lib->vba.MaxPSCLToLBThroughput | |||
1547 | * mode_lib->vba.HRatio[k] | |||
1548 | / 2 | |||
1549 | / dml_ceil( | |||
1550 | mode_lib->vba.HTAPsChroma[k] | |||
1551 | / 6.0, | |||
1552 | 1.0)); | |||
1553 | } else { | |||
1554 | locals->PSCL_THROUGHPUT_CHROMA[k] = dml_min( | |||
1555 | mode_lib->vba.MaxDCHUBToPSCLThroughput, | |||
1556 | mode_lib->vba.MaxPSCLToLBThroughput); | |||
1557 | } | |||
1558 | mode_lib->vba.DPPCLKUsingSingleDPPChroma = | |||
1559 | mode_lib->vba.PixelClock[k] | |||
1560 | * dml_max( | |||
1561 | mode_lib->vba.VTAPsChroma[k] | |||
1562 | / 6.0 | |||
1563 | * dml_min( | |||
1564 | 1.0, | |||
1565 | mode_lib->vba.HRatio[k] | |||
1566 | / 2), | |||
1567 | dml_max( | |||
1568 | mode_lib->vba.HRatio[k] | |||
1569 | * mode_lib->vba.VRatio[k] | |||
1570 | / 4 | |||
1571 | / locals->PSCL_THROUGHPUT_CHROMA[k], | |||
1572 | 1.0)); | |||
1573 | ||||
1574 | if ((mode_lib->vba.HTAPsChroma[k] > 6 || mode_lib->vba.VTAPsChroma[k] > 6) | |||
1575 | && mode_lib->vba.DPPCLKUsingSingleDPPChroma | |||
1576 | < 2 * mode_lib->vba.PixelClock[k]) { | |||
1577 | mode_lib->vba.DPPCLKUsingSingleDPPChroma = 2 | |||
1578 | * mode_lib->vba.PixelClock[k]; | |||
1579 | } | |||
1580 | ||||
1581 | locals->DPPCLKUsingSingleDPP[k] = dml_max( | |||
1582 | mode_lib->vba.DPPCLKUsingSingleDPPLuma, | |||
1583 | mode_lib->vba.DPPCLKUsingSingleDPPChroma); | |||
1584 | } | |||
1585 | } | |||
1586 | ||||
1587 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1588 | if (mode_lib->vba.BlendingAndTiming[k] != k) | |||
1589 | continue; | |||
1590 | if (mode_lib->vba.ODMCombineEnabled[k]) { | |||
1591 | mode_lib->vba.DISPCLKWithRamping = | |||
1592 | dml_max( | |||
1593 | mode_lib->vba.DISPCLKWithRamping, | |||
1594 | mode_lib->vba.PixelClock[k] / 2 | |||
1595 | * (1 | |||
1596 | + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading | |||
1597 | / 100) | |||
1598 | * (1 | |||
1599 | + mode_lib->vba.DISPCLKRampingMargin | |||
1600 | / 100)); | |||
1601 | mode_lib->vba.DISPCLKWithoutRamping = | |||
1602 | dml_max( | |||
1603 | mode_lib->vba.DISPCLKWithoutRamping, | |||
1604 | mode_lib->vba.PixelClock[k] / 2 | |||
1605 | * (1 | |||
1606 | + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading | |||
1607 | / 100)); | |||
1608 | } else if (!mode_lib->vba.ODMCombineEnabled[k]) { | |||
1609 | mode_lib->vba.DISPCLKWithRamping = | |||
1610 | dml_max( | |||
1611 | mode_lib->vba.DISPCLKWithRamping, | |||
1612 | mode_lib->vba.PixelClock[k] | |||
1613 | * (1 | |||
1614 | + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading | |||
1615 | / 100) | |||
1616 | * (1 | |||
1617 | + mode_lib->vba.DISPCLKRampingMargin | |||
1618 | / 100)); | |||
1619 | mode_lib->vba.DISPCLKWithoutRamping = | |||
1620 | dml_max( | |||
1621 | mode_lib->vba.DISPCLKWithoutRamping, | |||
1622 | mode_lib->vba.PixelClock[k] | |||
1623 | * (1 | |||
1624 | + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading | |||
1625 | / 100)); | |||
1626 | } | |||
1627 | } | |||
1628 | ||||
1629 | mode_lib->vba.DISPCLKWithRamping = dml_max( | |||
1630 | mode_lib->vba.DISPCLKWithRamping, | |||
1631 | mode_lib->vba.WritebackDISPCLK); | |||
1632 | mode_lib->vba.DISPCLKWithoutRamping = dml_max( | |||
1633 | mode_lib->vba.DISPCLKWithoutRamping, | |||
1634 | mode_lib->vba.WritebackDISPCLK); | |||
1635 | ||||
1636 | ASSERT(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0)do { if (({ static int __warned; int __ret = !!(!(mode_lib-> vba.DISPCLKDPPCLKVCOSpeed != 0)); if (__ret && !__warned ) { printf("WARNING %s failed at %s:%d\n", "!(mode_lib->vba.DISPCLKDPPCLKVCOSpeed != 0)" , "/usr/src/sys/dev/pci/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c" , 1636); __warned = 1; } __builtin_expect(!!(__ret), 0); })) do {} while (0); } while (0); | |||
1637 | mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( | |||
1638 | mode_lib->vba.DISPCLKWithRamping, | |||
1639 | mode_lib->vba.DISPCLKDPPCLKVCOSpeed); | |||
1640 | mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp( | |||
1641 | mode_lib->vba.DISPCLKWithoutRamping, | |||
1642 | mode_lib->vba.DISPCLKDPPCLKVCOSpeed); | |||
1643 | mode_lib->vba.MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( | |||
1644 | mode_lib->vba.soc.clock_limits[mode_lib->vba.soc.num_states - 1].dispclk_mhz, | |||
1645 | mode_lib->vba.DISPCLKDPPCLKVCOSpeed); | |||
1646 | if (mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity | |||
1647 | > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { | |||
1648 | mode_lib->vba.DISPCLK_calculated = | |||
1649 | mode_lib->vba.DISPCLKWithoutRampingRoundedToDFSGranularity; | |||
1650 | } else if (mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity | |||
1651 | > mode_lib->vba.MaxDispclkRoundedToDFSGranularity) { | |||
1652 | mode_lib->vba.DISPCLK_calculated = mode_lib->vba.MaxDispclkRoundedToDFSGranularity; | |||
1653 | } else { | |||
1654 | mode_lib->vba.DISPCLK_calculated = | |||
1655 | mode_lib->vba.DISPCLKWithRampingRoundedToDFSGranularity; | |||
1656 | } | |||
1657 | DTRACE(" dispclk_mhz (calculated) = %f", mode_lib->vba.DISPCLK_calculated){do { } while(0); }; | |||
1658 | ||||
1659 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1660 | mode_lib->vba.DPPCLK_calculated[k] = locals->DPPCLKUsingSingleDPP[k] | |||
1661 | / mode_lib->vba.DPPPerPlane[k] | |||
1662 | * (1 + mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading / 100); | |||
1663 | mode_lib->vba.GlobalDPPCLK = dml_max( | |||
1664 | mode_lib->vba.GlobalDPPCLK, | |||
1665 | mode_lib->vba.DPPCLK_calculated[k]); | |||
1666 | } | |||
1667 | mode_lib->vba.GlobalDPPCLK = RoundToDFSGranularityUp( | |||
1668 | mode_lib->vba.GlobalDPPCLK, | |||
1669 | mode_lib->vba.DISPCLKDPPCLKVCOSpeed); | |||
1670 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1671 | mode_lib->vba.DPPCLK_calculated[k] = mode_lib->vba.GlobalDPPCLK / 255 | |||
1672 | * dml_ceil( | |||
1673 | mode_lib->vba.DPPCLK_calculated[k] * 255 | |||
1674 | / mode_lib->vba.GlobalDPPCLK, | |||
1675 | 1); | |||
1676 | DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, mode_lib->vba.DPPCLK_calculated[k]){do { } while(0); }; | |||
1677 | } | |||
1678 | ||||
1679 | // Urgent and B P-State/DRAM Clock Change Watermark | |||
1680 | DTRACE(" dcfclk_mhz = %f", mode_lib->vba.DCFCLK){do { } while(0); }; | |||
1681 | DTRACE(" return_bw_to_dcn = %f", mode_lib->vba.ReturnBandwidthToDCN){do { } while(0); }; | |||
1682 | DTRACE(" return_bus_bw = %f", mode_lib->vba.ReturnBW){do { } while(0); }; | |||
1683 | ||||
1684 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1685 | bool_Bool MainPlaneDoesODMCombine = false0; | |||
1686 | ||||
1687 | if (mode_lib->vba.SourceScan[k] == dm_horz) | |||
1688 | locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportWidth[k]; | |||
1689 | else | |||
1690 | locals->SwathWidthSingleDPPY[k] = mode_lib->vba.ViewportHeight[k]; | |||
1691 | ||||
1692 | if (mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) | |||
1693 | MainPlaneDoesODMCombine = true1; | |||
1694 | for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) | |||
1695 | if (mode_lib->vba.BlendingAndTiming[k] == j | |||
1696 | && mode_lib->vba.ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) | |||
1697 | MainPlaneDoesODMCombine = true1; | |||
1698 | ||||
1699 | if (MainPlaneDoesODMCombine == true1) | |||
1700 | locals->SwathWidthY[k] = dml_min( | |||
1701 | (double) locals->SwathWidthSingleDPPY[k], | |||
1702 | dml_round( | |||
1703 | mode_lib->vba.HActive[k] / 2.0 | |||
1704 | * mode_lib->vba.HRatio[k])); | |||
1705 | else | |||
1706 | locals->SwathWidthY[k] = locals->SwathWidthSingleDPPY[k] | |||
1707 | / mode_lib->vba.DPPPerPlane[k]; | |||
1708 | } | |||
1709 | ||||
1710 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1711 | if (mode_lib->vba.SourcePixelFormat[k] == dm_444_64) { | |||
1712 | locals->BytePerPixelDETY[k] = 8; | |||
1713 | locals->BytePerPixelDETC[k] = 0; | |||
1714 | } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_32) { | |||
1715 | locals->BytePerPixelDETY[k] = 4; | |||
1716 | locals->BytePerPixelDETC[k] = 0; | |||
1717 | } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_16 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_16) { | |||
1718 | locals->BytePerPixelDETY[k] = 2; | |||
1719 | locals->BytePerPixelDETC[k] = 0; | |||
1720 | } else if (mode_lib->vba.SourcePixelFormat[k] == dm_444_8 || mode_lib->vba.SourcePixelFormat[k] == dm_mono_8) { | |||
1721 | locals->BytePerPixelDETY[k] = 1; | |||
1722 | locals->BytePerPixelDETC[k] = 0; | |||
1723 | } else if (mode_lib->vba.SourcePixelFormat[k] == dm_420_8) { | |||
1724 | locals->BytePerPixelDETY[k] = 1; | |||
1725 | locals->BytePerPixelDETC[k] = 2; | |||
1726 | } else { // dm_420_10 | |||
1727 | locals->BytePerPixelDETY[k] = 4.0 / 3.0; | |||
1728 | locals->BytePerPixelDETC[k] = 8.0 / 3.0; | |||
1729 | } | |||
1730 | } | |||
1731 | ||||
1732 | mode_lib->vba.TotalDataReadBandwidth = 0.0; | |||
1733 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1734 | locals->ReadBandwidthPlaneLuma[k] = locals->SwathWidthSingleDPPY[k] | |||
1735 | * dml_ceil(locals->BytePerPixelDETY[k], 1) | |||
1736 | / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) | |||
1737 | * mode_lib->vba.VRatio[k]; | |||
1738 | locals->ReadBandwidthPlaneChroma[k] = locals->SwathWidthSingleDPPY[k] | |||
1739 | / 2 * dml_ceil(locals->BytePerPixelDETC[k], 2) | |||
1740 | / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) | |||
1741 | * mode_lib->vba.VRatio[k] / 2; | |||
1742 | DTRACE({do { } while(0); } | |||
1743 | " read_bw[%i] = %fBps",{do { } while(0); } | |||
1744 | k,{do { } while(0); } | |||
1745 | locals->ReadBandwidthPlaneLuma[k]{do { } while(0); } | |||
1746 | + locals->ReadBandwidthPlaneChroma[k]){do { } while(0); }; | |||
1747 | mode_lib->vba.TotalDataReadBandwidth += locals->ReadBandwidthPlaneLuma[k] | |||
1748 | + locals->ReadBandwidthPlaneChroma[k]; | |||
1749 | } | |||
1750 | ||||
1751 | // DCFCLK Deep Sleep | |||
1752 | CalculateDCFCLKDeepSleep( | |||
1753 | mode_lib, | |||
1754 | mode_lib->vba.NumberOfActivePlanes, | |||
1755 | locals->BytePerPixelDETY, | |||
1756 | locals->BytePerPixelDETC, | |||
1757 | mode_lib->vba.VRatio, | |||
1758 | locals->SwathWidthY, | |||
1759 | mode_lib->vba.DPPPerPlane, | |||
1760 | mode_lib->vba.HRatio, | |||
1761 | mode_lib->vba.PixelClock, | |||
1762 | locals->PSCL_THROUGHPUT_LUMA, | |||
1763 | locals->PSCL_THROUGHPUT_CHROMA, | |||
1764 | locals->DPPCLK, | |||
1765 | &mode_lib->vba.DCFCLKDeepSleep); | |||
1766 | ||||
1767 | // DSCCLK | |||
1768 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1769 | if ((mode_lib->vba.BlendingAndTiming[k] != k) || !mode_lib->vba.DSCEnabled[k]) { | |||
1770 | locals->DSCCLK_calculated[k] = 0.0; | |||
1771 | } else { | |||
1772 | if (mode_lib->vba.OutputFormat[k] == dm_420 | |||
1773 | || mode_lib->vba.OutputFormat[k] == dm_n422) | |||
1774 | mode_lib->vba.DSCFormatFactor = 2; | |||
1775 | else | |||
1776 | mode_lib->vba.DSCFormatFactor = 1; | |||
1777 | if (mode_lib->vba.ODMCombineEnabled[k]) | |||
1778 | locals->DSCCLK_calculated[k] = | |||
1779 | mode_lib->vba.PixelClockBackEnd[k] / 6 | |||
1780 | / mode_lib->vba.DSCFormatFactor | |||
1781 | / (1 | |||
1782 | - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading | |||
1783 | / 100); | |||
1784 | else | |||
1785 | locals->DSCCLK_calculated[k] = | |||
1786 | mode_lib->vba.PixelClockBackEnd[k] / 3 | |||
1787 | / mode_lib->vba.DSCFormatFactor | |||
1788 | / (1 | |||
1789 | - mode_lib->vba.DISPCLKDPPCLKDSCCLKDownSpreading | |||
1790 | / 100); | |||
1791 | } | |||
1792 | } | |||
1793 | ||||
1794 | // DSC Delay | |||
1795 | // TODO | |||
1796 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1797 | double bpp = mode_lib->vba.OutputBpp[k]; | |||
1798 | unsigned int slices = mode_lib->vba.NumberOfDSCSlices[k]; | |||
1799 | ||||
1800 | if (mode_lib->vba.DSCEnabled[k] && bpp != 0) { | |||
1801 | if (!mode_lib->vba.ODMCombineEnabled[k]) { | |||
1802 | locals->DSCDelay[k] = | |||
1803 | dscceComputeDelay( | |||
1804 | mode_lib->vba.DSCInputBitPerComponent[k], | |||
1805 | bpp, | |||
1806 | dml_ceil( | |||
1807 | (double) mode_lib->vba.HActive[k] | |||
1808 | / mode_lib->vba.NumberOfDSCSlices[k], | |||
1809 | 1), | |||
1810 | slices, | |||
1811 | mode_lib->vba.OutputFormat[k]) | |||
1812 | + dscComputeDelay( | |||
1813 | mode_lib->vba.OutputFormat[k]); | |||
1814 | } else { | |||
1815 | locals->DSCDelay[k] = | |||
1816 | 2 | |||
1817 | * (dscceComputeDelay( | |||
1818 | mode_lib->vba.DSCInputBitPerComponent[k], | |||
1819 | bpp, | |||
1820 | dml_ceil( | |||
1821 | (double) mode_lib->vba.HActive[k] | |||
1822 | / mode_lib->vba.NumberOfDSCSlices[k], | |||
1823 | 1), | |||
1824 | slices / 2.0, | |||
1825 | mode_lib->vba.OutputFormat[k]) | |||
1826 | + dscComputeDelay( | |||
1827 | mode_lib->vba.OutputFormat[k])); | |||
1828 | } | |||
1829 | locals->DSCDelay[k] = locals->DSCDelay[k] | |||
1830 | * mode_lib->vba.PixelClock[k] | |||
1831 | / mode_lib->vba.PixelClockBackEnd[k]; | |||
1832 | } else { | |||
1833 | locals->DSCDelay[k] = 0; | |||
1834 | } | |||
1835 | } | |||
1836 | ||||
1837 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) | |||
1838 | for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) // NumberOfPlanes | |||
1839 | if (j != k && mode_lib->vba.BlendingAndTiming[k] == j | |||
1840 | && mode_lib->vba.DSCEnabled[j]) | |||
1841 | locals->DSCDelay[k] = locals->DSCDelay[j]; | |||
1842 | ||||
1843 | // Prefetch | |||
1844 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
1845 | unsigned int PDEAndMetaPTEBytesFrameY; | |||
1846 | unsigned int PixelPTEBytesPerRowY; | |||
1847 | unsigned int MetaRowByteY; | |||
1848 | unsigned int MetaRowByteC; | |||
1849 | unsigned int PDEAndMetaPTEBytesFrameC; | |||
1850 | unsigned int PixelPTEBytesPerRowC; | |||
1851 | bool_Bool PTEBufferSizeNotExceededY; | |||
1852 | bool_Bool PTEBufferSizeNotExceededC; | |||
1853 | ||||
1854 | Calculate256BBlockSizes( | |||
1855 | mode_lib->vba.SourcePixelFormat[k], | |||
1856 | mode_lib->vba.SurfaceTiling[k], | |||
1857 | dml_ceil(locals->BytePerPixelDETY[k], 1), | |||
1858 | dml_ceil(locals->BytePerPixelDETC[k], 2), | |||
1859 | &locals->BlockHeight256BytesY[k], | |||
1860 | &locals->BlockHeight256BytesC[k], | |||
1861 | &locals->BlockWidth256BytesY[k], | |||
1862 | &locals->BlockWidth256BytesC[k]); | |||
1863 | ||||
1864 | locals->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( | |||
1865 | mode_lib, | |||
1866 | mode_lib->vba.VRatio[k], | |||
1867 | mode_lib->vba.vtaps[k], | |||
1868 | mode_lib->vba.Interlace[k], | |||
1869 | mode_lib->vba.ProgressiveToInterlaceUnitInOPP, | |||
1870 | mode_lib->vba.SwathHeightY[k], | |||
1871 | mode_lib->vba.ViewportYStartY[k], | |||
1872 | &locals->VInitPreFillY[k], | |||
1873 | &locals->MaxNumSwathY[k]); | |||
1874 | ||||
1875 | if ((mode_lib->vba.SourcePixelFormat[k] != dm_444_64 | |||
1876 | && mode_lib->vba.SourcePixelFormat[k] != dm_444_32 | |||
1877 | && mode_lib->vba.SourcePixelFormat[k] != dm_444_16 | |||
1878 | && mode_lib->vba.SourcePixelFormat[k] != dm_444_8)) { | |||
1879 | PDEAndMetaPTEBytesFrameC = | |||
1880 | CalculateVMAndRowBytes( | |||
1881 | mode_lib, | |||
1882 | mode_lib->vba.DCCEnable[k], | |||
1883 | locals->BlockHeight256BytesC[k], | |||
1884 | locals->BlockWidth256BytesC[k], | |||
1885 | mode_lib->vba.SourcePixelFormat[k], | |||
1886 | mode_lib->vba.SurfaceTiling[k], | |||
1887 | dml_ceil( | |||
1888 | locals->BytePerPixelDETC[k], | |||
1889 | 2), | |||
1890 | mode_lib->vba.SourceScan[k], | |||
1891 | mode_lib->vba.ViewportWidth[k] / 2, | |||
1892 | mode_lib->vba.ViewportHeight[k] / 2, | |||
1893 | locals->SwathWidthY[k] / 2, | |||
1894 | mode_lib->vba.GPUVMEnable, | |||
1895 | mode_lib->vba.HostVMEnable, | |||
1896 | mode_lib->vba.HostVMMaxPageTableLevels, | |||
1897 | mode_lib->vba.HostVMCachedPageTableLevels, | |||
1898 | mode_lib->vba.VMMPageSize, | |||
1899 | mode_lib->vba.PTEBufferSizeInRequestsChroma, | |||
1900 | mode_lib->vba.PitchC[k], | |||
1901 | mode_lib->vba.DCCMetaPitchC[k], | |||
1902 | &locals->MacroTileWidthC[k], | |||
1903 | &MetaRowByteC, | |||
1904 | &PixelPTEBytesPerRowC, | |||
1905 | &PTEBufferSizeNotExceededC, | |||
1906 | &locals->dpte_row_width_chroma_ub[k], | |||
1907 | &locals->dpte_row_height_chroma[k], | |||
1908 | &locals->meta_req_width_chroma[k], | |||
1909 | &locals->meta_req_height_chroma[k], | |||
1910 | &locals->meta_row_width_chroma[k], | |||
1911 | &locals->meta_row_height_chroma[k], | |||
1912 | &locals->vm_group_bytes_chroma, | |||
1913 | &locals->dpte_group_bytes_chroma, | |||
1914 | &locals->PixelPTEReqWidthC[k], | |||
1915 | &locals->PixelPTEReqHeightC[k], | |||
1916 | &locals->PTERequestSizeC[k], | |||
1917 | &locals->dpde0_bytes_per_frame_ub_c[k], | |||
1918 | &locals->meta_pte_bytes_per_frame_ub_c[k]); | |||
1919 | ||||
1920 | locals->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( | |||
1921 | mode_lib, | |||
1922 | mode_lib->vba.VRatio[k] / 2, | |||
1923 | mode_lib->vba.VTAPsChroma[k], | |||
1924 | mode_lib->vba.Interlace[k], | |||
1925 | mode_lib->vba.ProgressiveToInterlaceUnitInOPP, | |||
1926 | mode_lib->vba.SwathHeightC[k], | |||
1927 | mode_lib->vba.ViewportYStartC[k], | |||
1928 | &locals->VInitPreFillC[k], | |||
1929 | &locals->MaxNumSwathC[k]); | |||
1930 | } else { | |||
1931 | PixelPTEBytesPerRowC = 0; | |||
1932 | PDEAndMetaPTEBytesFrameC = 0; | |||
1933 | MetaRowByteC = 0; | |||
1934 | locals->MaxNumSwathC[k] = 0; | |||
1935 | locals->PrefetchSourceLinesC[k] = 0; | |||
1936 | locals->PTEBufferSizeInRequestsForLuma = mode_lib->vba.PTEBufferSizeInRequestsLuma + mode_lib->vba.PTEBufferSizeInRequestsChroma; | |||
1937 | } | |||
1938 | ||||
1939 | PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( | |||
1940 | mode_lib, | |||
1941 | mode_lib->vba.DCCEnable[k], | |||
1942 | locals->BlockHeight256BytesY[k], | |||
1943 | locals->BlockWidth256BytesY[k], | |||
1944 | mode_lib->vba.SourcePixelFormat[k], | |||
1945 | mode_lib->vba.SurfaceTiling[k], | |||
1946 | dml_ceil(locals->BytePerPixelDETY[k], 1), | |||
1947 | mode_lib->vba.SourceScan[k], | |||
1948 | mode_lib->vba.ViewportWidth[k], | |||
1949 | mode_lib->vba.ViewportHeight[k], | |||
1950 | locals->SwathWidthY[k], | |||
1951 | mode_lib->vba.GPUVMEnable, | |||
1952 | mode_lib->vba.HostVMEnable, | |||
1953 | mode_lib->vba.HostVMMaxPageTableLevels, | |||
1954 | mode_lib->vba.HostVMCachedPageTableLevels, | |||
1955 | mode_lib->vba.VMMPageSize, | |||
1956 | locals->PTEBufferSizeInRequestsForLuma, | |||
1957 | mode_lib->vba.PitchY[k], | |||
1958 | mode_lib->vba.DCCMetaPitchY[k], | |||
1959 | &locals->MacroTileWidthY[k], | |||
1960 | &MetaRowByteY, | |||
1961 | &PixelPTEBytesPerRowY, | |||
1962 | &PTEBufferSizeNotExceededY, | |||
1963 | &locals->dpte_row_width_luma_ub[k], | |||
1964 | &locals->dpte_row_height[k], | |||
1965 | &locals->meta_req_width[k], | |||
1966 | &locals->meta_req_height[k], | |||
1967 | &locals->meta_row_width[k], | |||
1968 | &locals->meta_row_height[k], | |||
1969 | &locals->vm_group_bytes[k], | |||
1970 | &locals->dpte_group_bytes[k], | |||
1971 | &locals->PixelPTEReqWidthY[k], | |||
1972 | &locals->PixelPTEReqHeightY[k], | |||
1973 | &locals->PTERequestSizeY[k], | |||
1974 | &locals->dpde0_bytes_per_frame_ub_l[k], | |||
1975 | &locals->meta_pte_bytes_per_frame_ub_l[k]); | |||
1976 | ||||
1977 | locals->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; | |||
1978 | locals->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY | |||
1979 | + PDEAndMetaPTEBytesFrameC; | |||
1980 | locals->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; | |||
1981 | ||||
1982 | CalculateActiveRowBandwidth( | |||
1983 | mode_lib->vba.GPUVMEnable, | |||
1984 | mode_lib->vba.SourcePixelFormat[k], | |||
1985 | mode_lib->vba.VRatio[k], | |||
1986 | mode_lib->vba.DCCEnable[k], | |||
1987 | mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], | |||
1988 | MetaRowByteY, | |||
1989 | MetaRowByteC, | |||
1990 | locals->meta_row_height[k], | |||
1991 | locals->meta_row_height_chroma[k], | |||
1992 | PixelPTEBytesPerRowY, | |||
1993 | PixelPTEBytesPerRowC, | |||
1994 | locals->dpte_row_height[k], | |||
1995 | locals->dpte_row_height_chroma[k], | |||
1996 | &locals->meta_row_bw[k], | |||
1997 | &locals->dpte_row_bw[k]); | |||
1998 | } | |||
1999 | ||||
2000 | mode_lib->vba.TotalDCCActiveDPP = 0; | |||
2001 | mode_lib->vba.TotalActiveDPP = 0; | |||
2002 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2003 | mode_lib->vba.TotalActiveDPP = mode_lib->vba.TotalActiveDPP | |||
2004 | + mode_lib->vba.DPPPerPlane[k]; | |||
2005 | if (mode_lib->vba.DCCEnable[k]) | |||
2006 | mode_lib->vba.TotalDCCActiveDPP = mode_lib->vba.TotalDCCActiveDPP | |||
2007 | + mode_lib->vba.DPPPerPlane[k]; | |||
2008 | } | |||
2009 | ||||
2010 | mode_lib->vba.UrgentOutOfOrderReturnPerChannel = dml_max3( | |||
2011 | mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, | |||
2012 | mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, | |||
2013 | mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); | |||
2014 | ||||
2015 | mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency = | |||
2016 | (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLK | |||
2017 | + mode_lib->vba.UrgentOutOfOrderReturnPerChannel | |||
2018 | * mode_lib->vba.NumberOfChannels | |||
2019 | / mode_lib->vba.ReturnBW; | |||
2020 | ||||
2021 | mode_lib->vba.UrgentExtraLatency = CalculateExtraLatency( | |||
2022 | mode_lib->vba.UrgentRoundTripAndOutOfOrderLatency, | |||
2023 | mode_lib->vba.TotalActiveDPP, | |||
2024 | mode_lib->vba.PixelChunkSizeInKByte, | |||
2025 | mode_lib->vba.TotalDCCActiveDPP, | |||
2026 | mode_lib->vba.MetaChunkSize, | |||
2027 | mode_lib->vba.ReturnBW, | |||
2028 | mode_lib->vba.GPUVMEnable, | |||
2029 | mode_lib->vba.HostVMEnable, | |||
2030 | mode_lib->vba.NumberOfActivePlanes, | |||
2031 | mode_lib->vba.DPPPerPlane, | |||
2032 | locals->dpte_group_bytes, | |||
2033 | mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
2034 | mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
2035 | mode_lib->vba.HostVMMaxPageTableLevels, | |||
2036 | mode_lib->vba.HostVMCachedPageTableLevels); | |||
2037 | ||||
2038 | ||||
2039 | mode_lib->vba.TCalc = 24.0 / mode_lib->vba.DCFCLKDeepSleep; | |||
2040 | ||||
2041 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2042 | if (mode_lib->vba.BlendingAndTiming[k] == k) { | |||
2043 | if (mode_lib->vba.WritebackEnable[k] == true1) { | |||
2044 | locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = | |||
2045 | mode_lib->vba.WritebackLatency | |||
2046 | + CalculateWriteBackDelay( | |||
2047 | mode_lib->vba.WritebackPixelFormat[k], | |||
2048 | mode_lib->vba.WritebackHRatio[k], | |||
2049 | mode_lib->vba.WritebackVRatio[k], | |||
2050 | mode_lib->vba.WritebackLumaHTaps[k], | |||
2051 | mode_lib->vba.WritebackLumaVTaps[k], | |||
2052 | mode_lib->vba.WritebackChromaHTaps[k], | |||
2053 | mode_lib->vba.WritebackChromaVTaps[k], | |||
2054 | mode_lib->vba.WritebackDestinationWidth[k]) | |||
2055 | / mode_lib->vba.DISPCLK; | |||
2056 | } else | |||
2057 | locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = 0; | |||
2058 | for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) { | |||
2059 | if (mode_lib->vba.BlendingAndTiming[j] == k | |||
2060 | && mode_lib->vba.WritebackEnable[j] == true1) { | |||
2061 | locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = | |||
2062 | dml_max( | |||
2063 | locals->WritebackDelay[mode_lib->vba.VoltageLevel][k], | |||
2064 | mode_lib->vba.WritebackLatency | |||
2065 | + CalculateWriteBackDelay( | |||
2066 | mode_lib->vba.WritebackPixelFormat[j], | |||
2067 | mode_lib->vba.WritebackHRatio[j], | |||
2068 | mode_lib->vba.WritebackVRatio[j], | |||
2069 | mode_lib->vba.WritebackLumaHTaps[j], | |||
2070 | mode_lib->vba.WritebackLumaVTaps[j], | |||
2071 | mode_lib->vba.WritebackChromaHTaps[j], | |||
2072 | mode_lib->vba.WritebackChromaVTaps[j], | |||
2073 | mode_lib->vba.WritebackDestinationWidth[j]) | |||
2074 | / mode_lib->vba.DISPCLK); | |||
2075 | } | |||
2076 | } | |||
2077 | } | |||
2078 | } | |||
2079 | ||||
2080 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) | |||
2081 | for (j = 0; j < mode_lib->vba.NumberOfActivePlanes; ++j) | |||
2082 | if (mode_lib->vba.BlendingAndTiming[k] == j) | |||
2083 | locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] = | |||
2084 | locals->WritebackDelay[mode_lib->vba.VoltageLevel][j]; | |||
2085 | ||||
2086 | mode_lib->vba.VStartupLines = 13; | |||
2087 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2088 | locals->MaxVStartupLines[k] = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k] - dml_max(1.0, dml_ceil(locals->WritebackDelay[mode_lib->vba.VoltageLevel][k] / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]), 1)); | |||
2089 | } | |||
2090 | ||||
2091 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) | |||
2092 | locals->MaximumMaxVStartupLines = dml_max(locals->MaximumMaxVStartupLines, locals->MaxVStartupLines[k]); | |||
2093 | ||||
2094 | // We don't really care to iterate between the various prefetch modes | |||
2095 | //mode_lib->vba.PrefetchERROR = CalculateMinAndMaxPrefetchMode(mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &mode_lib->vba.MinPrefetchMode, &mode_lib->vba.MaxPrefetchMode); | |||
2096 | mode_lib->vba.UrgentLatency = dml_max3(mode_lib->vba.UrgentLatencyPixelDataOnly, mode_lib->vba.UrgentLatencyPixelMixedWithVMData, mode_lib->vba.UrgentLatencyVMDataOnly); | |||
2097 | ||||
2098 | do { | |||
2099 | double MaxTotalRDBandwidth = 0; | |||
2100 | double MaxTotalRDBandwidthNoUrgentBurst = 0; | |||
2101 | bool_Bool DestinationLineTimesForPrefetchLessThan2 = false0; | |||
2102 | bool_Bool VRatioPrefetchMoreThan4 = false0; | |||
2103 | double TWait = CalculateTWait( | |||
2104 | mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], | |||
2105 | mode_lib->vba.DRAMClockChangeLatency, | |||
2106 | mode_lib->vba.UrgentLatency, | |||
2107 | mode_lib->vba.SREnterPlusExitTime); | |||
2108 | ||||
2109 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2110 | Pipe myPipe; | |||
2111 | HostVM myHostVM; | |||
2112 | ||||
2113 | if (mode_lib->vba.XFCEnabled[k] == true1) { | |||
2114 | mode_lib->vba.XFCRemoteSurfaceFlipDelay = | |||
2115 | CalculateRemoteSurfaceFlipDelay( | |||
2116 | mode_lib, | |||
2117 | mode_lib->vba.VRatio[k], | |||
2118 | locals->SwathWidthY[k], | |||
2119 | dml_ceil( | |||
2120 | locals->BytePerPixelDETY[k], | |||
2121 | 1), | |||
2122 | mode_lib->vba.HTotal[k] | |||
2123 | / mode_lib->vba.PixelClock[k], | |||
2124 | mode_lib->vba.XFCTSlvVupdateOffset, | |||
2125 | mode_lib->vba.XFCTSlvVupdateWidth, | |||
2126 | mode_lib->vba.XFCTSlvVreadyOffset, | |||
2127 | mode_lib->vba.XFCXBUFLatencyTolerance, | |||
2128 | mode_lib->vba.XFCFillBWOverhead, | |||
2129 | mode_lib->vba.XFCSlvChunkSize, | |||
2130 | mode_lib->vba.XFCBusTransportTime, | |||
2131 | mode_lib->vba.TCalc, | |||
2132 | TWait, | |||
2133 | &mode_lib->vba.SrcActiveDrainRate, | |||
2134 | &mode_lib->vba.TInitXFill, | |||
2135 | &mode_lib->vba.TslvChk); | |||
2136 | } else { | |||
2137 | mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0; | |||
2138 | } | |||
2139 | ||||
2140 | myPipe.DPPCLK = locals->DPPCLK[k]; | |||
2141 | myPipe.DISPCLK = mode_lib->vba.DISPCLK; | |||
2142 | myPipe.PixelClock = mode_lib->vba.PixelClock[k]; | |||
2143 | myPipe.DCFCLKDeepSleep = mode_lib->vba.DCFCLKDeepSleep; | |||
2144 | myPipe.DPPPerPlane = mode_lib->vba.DPPPerPlane[k]; | |||
2145 | myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; | |||
2146 | myPipe.SourceScan = mode_lib->vba.SourceScan[k]; | |||
2147 | myPipe.BlockWidth256BytesY = locals->BlockWidth256BytesY[k]; | |||
2148 | myPipe.BlockHeight256BytesY = locals->BlockHeight256BytesY[k]; | |||
2149 | myPipe.BlockWidth256BytesC = locals->BlockWidth256BytesC[k]; | |||
2150 | myPipe.BlockHeight256BytesC = locals->BlockHeight256BytesC[k]; | |||
2151 | myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; | |||
2152 | myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; | |||
2153 | myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; | |||
2154 | myPipe.HTotal = mode_lib->vba.HTotal[k]; | |||
2155 | ||||
2156 | ||||
2157 | myHostVM.Enable = mode_lib->vba.HostVMEnable; | |||
2158 | myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels; | |||
2159 | myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels; | |||
2160 | ||||
2161 | mode_lib->vba.ErrorResult[k] = | |||
2162 | CalculatePrefetchSchedule( | |||
2163 | mode_lib, | |||
2164 | mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
2165 | mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
2166 | &myPipe, | |||
2167 | locals->DSCDelay[k], | |||
2168 | mode_lib->vba.DPPCLKDelaySubtotal, | |||
2169 | mode_lib->vba.DPPCLKDelaySCL, | |||
2170 | mode_lib->vba.DPPCLKDelaySCLLBOnly, | |||
2171 | mode_lib->vba.DPPCLKDelayCNVCFormater, | |||
2172 | mode_lib->vba.DPPCLKDelayCNVCCursor, | |||
2173 | mode_lib->vba.DISPCLKDelaySubtotal, | |||
2174 | (unsigned int) (locals->SwathWidthY[k] | |||
2175 | / mode_lib->vba.HRatio[k]), | |||
2176 | mode_lib->vba.OutputFormat[k], | |||
2177 | mode_lib->vba.MaxInterDCNTileRepeaters, | |||
2178 | dml_min(mode_lib->vba.VStartupLines, locals->MaxVStartupLines[k]), | |||
2179 | locals->MaxVStartupLines[k], | |||
2180 | mode_lib->vba.GPUVMMaxPageTableLevels, | |||
2181 | mode_lib->vba.GPUVMEnable, | |||
2182 | &myHostVM, | |||
2183 | mode_lib->vba.DynamicMetadataEnable[k], | |||
2184 | mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k], | |||
2185 | mode_lib->vba.DynamicMetadataTransmittedBytes[k], | |||
2186 | mode_lib->vba.DCCEnable[k], | |||
2187 | mode_lib->vba.UrgentLatency, | |||
2188 | mode_lib->vba.UrgentExtraLatency, | |||
2189 | mode_lib->vba.TCalc, | |||
2190 | locals->PDEAndMetaPTEBytesFrame[k], | |||
2191 | locals->MetaRowByte[k], | |||
2192 | locals->PixelPTEBytesPerRow[k], | |||
2193 | locals->PrefetchSourceLinesY[k], | |||
2194 | locals->SwathWidthY[k], | |||
2195 | locals->BytePerPixelDETY[k], | |||
2196 | locals->VInitPreFillY[k], | |||
2197 | locals->MaxNumSwathY[k], | |||
2198 | locals->PrefetchSourceLinesC[k], | |||
2199 | locals->BytePerPixelDETC[k], | |||
2200 | locals->VInitPreFillC[k], | |||
2201 | locals->MaxNumSwathC[k], | |||
2202 | mode_lib->vba.SwathHeightY[k], | |||
2203 | mode_lib->vba.SwathHeightC[k], | |||
2204 | TWait, | |||
2205 | mode_lib->vba.XFCEnabled[k], | |||
2206 | mode_lib->vba.XFCRemoteSurfaceFlipDelay, | |||
2207 | mode_lib->vba.ProgressiveToInterlaceUnitInOPP, | |||
2208 | &locals->DSTXAfterScaler[k], | |||
2209 | &locals->DSTYAfterScaler[k], | |||
2210 | &locals->DestinationLinesForPrefetch[k], | |||
2211 | &locals->PrefetchBandwidth[k], | |||
2212 | &locals->DestinationLinesToRequestVMInVBlank[k], | |||
2213 | &locals->DestinationLinesToRequestRowInVBlank[k], | |||
2214 | &locals->VRatioPrefetchY[k], | |||
2215 | &locals->VRatioPrefetchC[k], | |||
2216 | &locals->RequiredPrefetchPixDataBWLuma[k], | |||
2217 | &locals->RequiredPrefetchPixDataBWChroma[k], | |||
2218 | &locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata, | |||
2219 | &locals->Tno_bw[k], | |||
2220 | &locals->prefetch_vmrow_bw[k], | |||
2221 | &locals->swath_width_luma_ub[k], | |||
2222 | &locals->swath_width_chroma_ub[k], | |||
2223 | &mode_lib->vba.VUpdateOffsetPix[k], | |||
2224 | &mode_lib->vba.VUpdateWidthPix[k], | |||
2225 | &mode_lib->vba.VReadyOffsetPix[k]); | |||
2226 | if (mode_lib->vba.BlendingAndTiming[k] == k) { | |||
2227 | locals->VStartup[k] = dml_min( | |||
2228 | mode_lib->vba.VStartupLines, | |||
2229 | locals->MaxVStartupLines[k]); | |||
2230 | if (locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata | |||
2231 | != 0) { | |||
2232 | locals->VStartup[k] = | |||
2233 | locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata; | |||
2234 | } | |||
2235 | } else { | |||
2236 | locals->VStartup[k] = | |||
2237 | dml_min( | |||
2238 | mode_lib->vba.VStartupLines, | |||
2239 | locals->MaxVStartupLines[mode_lib->vba.BlendingAndTiming[k]]); | |||
2240 | } | |||
2241 | } | |||
2242 | ||||
2243 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2244 | unsigned int m; | |||
2245 | ||||
2246 | locals->cursor_bw[k] = 0; | |||
2247 | locals->cursor_bw_pre[k] = 0; | |||
2248 | for (m = 0; m < mode_lib->vba.NumberOfCursors[k]; m++) { | |||
2249 | locals->cursor_bw[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; | |||
2250 | locals->cursor_bw_pre[k] += mode_lib->vba.CursorWidth[k][m] * mode_lib->vba.CursorBPP[k][m] / 8.0 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * locals->VRatioPrefetchY[k]; | |||
2251 | } | |||
2252 | ||||
2253 | CalculateUrgentBurstFactor( | |||
2254 | mode_lib->vba.DETBufferSizeInKByte[0], | |||
2255 | mode_lib->vba.SwathHeightY[k], | |||
2256 | mode_lib->vba.SwathHeightC[k], | |||
2257 | locals->SwathWidthY[k], | |||
2258 | mode_lib->vba.HTotal[k] / | |||
2259 | mode_lib->vba.PixelClock[k], | |||
2260 | mode_lib->vba.UrgentLatency, | |||
2261 | mode_lib->vba.CursorBufferSize, | |||
2262 | mode_lib->vba.CursorWidth[k][0] + mode_lib->vba.CursorWidth[k][1], | |||
2263 | dml_max(mode_lib->vba.CursorBPP[k][0], mode_lib->vba.CursorBPP[k][1]), | |||
2264 | mode_lib->vba.VRatio[k], | |||
2265 | locals->VRatioPrefetchY[k], | |||
2266 | locals->VRatioPrefetchC[k], | |||
2267 | locals->BytePerPixelDETY[k], | |||
2268 | locals->BytePerPixelDETC[k], | |||
2269 | &locals->UrgentBurstFactorCursor[k], | |||
2270 | &locals->UrgentBurstFactorCursorPre[k], | |||
2271 | &locals->UrgentBurstFactorLuma[k], | |||
2272 | &locals->UrgentBurstFactorLumaPre[k], | |||
2273 | &locals->UrgentBurstFactorChroma[k], | |||
2274 | &locals->UrgentBurstFactorChromaPre[k], | |||
2275 | &locals->NotEnoughUrgentLatencyHiding[0][0], | |||
2276 | &locals->NotEnoughUrgentLatencyHidingPre); | |||
2277 | ||||
2278 | if (mode_lib->vba.UseUrgentBurstBandwidth == false0) { | |||
2279 | locals->UrgentBurstFactorLuma[k] = 1; | |||
2280 | locals->UrgentBurstFactorChroma[k] = 1; | |||
2281 | locals->UrgentBurstFactorCursor[k] = 1; | |||
2282 | locals->UrgentBurstFactorLumaPre[k] = 1; | |||
2283 | locals->UrgentBurstFactorChromaPre[k] = 1; | |||
2284 | locals->UrgentBurstFactorCursorPre[k] = 1; | |||
2285 | } | |||
2286 | ||||
2287 | MaxTotalRDBandwidth = MaxTotalRDBandwidth + | |||
2288 | dml_max3(locals->prefetch_vmrow_bw[k], | |||
2289 | locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] | |||
2290 | + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] | |||
2291 | * locals->UrgentBurstFactorCursor[k] + locals->meta_row_bw[k] + locals->dpte_row_bw[k], | |||
2292 | locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + locals->RequiredPrefetchPixDataBWChroma[k] | |||
2293 | * locals->UrgentBurstFactorChromaPre[k] + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); | |||
2294 | ||||
2295 | MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst + | |||
2296 | dml_max3(locals->prefetch_vmrow_bw[k], | |||
2297 | locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k] | |||
2298 | + locals->meta_row_bw[k] + locals->dpte_row_bw[k], | |||
2299 | locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); | |||
2300 | ||||
2301 | if (locals->DestinationLinesForPrefetch[k] < 2) | |||
2302 | DestinationLineTimesForPrefetchLessThan2 = true1; | |||
2303 | if (locals->VRatioPrefetchY[k] > 4 || locals->VRatioPrefetchC[k] > 4) | |||
2304 | VRatioPrefetchMoreThan4 = true1; | |||
2305 | } | |||
2306 | mode_lib->vba.FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / mode_lib->vba.ReturnBW; | |||
2307 | ||||
2308 | if (MaxTotalRDBandwidth <= mode_lib->vba.ReturnBW && locals->NotEnoughUrgentLatencyHiding[0][0] == 0 && | |||
2309 | locals->NotEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 | |||
2310 | && !DestinationLineTimesForPrefetchLessThan2) | |||
2311 | mode_lib->vba.PrefetchModeSupported = true1; | |||
2312 | else { | |||
2313 | mode_lib->vba.PrefetchModeSupported = false0; | |||
2314 | dml_print({do { } while(0); } | |||
2315 | "DML: CalculatePrefetchSchedule ***failed***. Bandwidth violation. Results are NOT valid\n"){do { } while(0); }; | |||
2316 | } | |||
2317 | ||||
2318 | if (mode_lib->vba.PrefetchModeSupported
| |||
2319 | mode_lib->vba.BandwidthAvailableForImmediateFlip = mode_lib->vba.ReturnBW; | |||
2320 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2321 | mode_lib->vba.BandwidthAvailableForImmediateFlip = | |||
2322 | mode_lib->vba.BandwidthAvailableForImmediateFlip | |||
2323 | - dml_max( | |||
2324 | locals->ReadBandwidthPlaneLuma[k] * locals->UrgentBurstFactorLuma[k] | |||
2325 | + locals->ReadBandwidthPlaneChroma[k] * locals->UrgentBurstFactorChroma[k] | |||
2326 | + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], | |||
2327 | locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] + | |||
2328 | locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] + | |||
2329 | locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); | |||
2330 | } | |||
2331 | ||||
2332 | mode_lib->vba.TotImmediateFlipBytes = 0; | |||
2333 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2334 | mode_lib->vba.TotImmediateFlipBytes = mode_lib->vba.TotImmediateFlipBytes + locals->PDEAndMetaPTEBytesFrame[k] + locals->MetaRowByte[k] + locals->PixelPTEBytesPerRow[k]; | |||
2335 | } | |||
2336 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2337 | CalculateFlipSchedule( | |||
2338 | mode_lib, | |||
2339 | mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData, | |||
2340 | mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly, | |||
2341 | mode_lib->vba.UrgentExtraLatency, | |||
2342 | mode_lib->vba.UrgentLatency, | |||
2343 | mode_lib->vba.GPUVMMaxPageTableLevels, | |||
2344 | mode_lib->vba.HostVMEnable, | |||
2345 | mode_lib->vba.HostVMMaxPageTableLevels, | |||
2346 | mode_lib->vba.HostVMCachedPageTableLevels, | |||
2347 | mode_lib->vba.GPUVMEnable, | |||
2348 | locals->PDEAndMetaPTEBytesFrame[k], | |||
2349 | locals->MetaRowByte[k], | |||
2350 | locals->PixelPTEBytesPerRow[k], | |||
2351 | mode_lib->vba.BandwidthAvailableForImmediateFlip, | |||
2352 | mode_lib->vba.TotImmediateFlipBytes, | |||
2353 | mode_lib->vba.SourcePixelFormat[k], | |||
2354 | mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k], | |||
2355 | mode_lib->vba.VRatio[k], | |||
2356 | locals->Tno_bw[k], | |||
2357 | mode_lib->vba.DCCEnable[k], | |||
2358 | locals->dpte_row_height[k], | |||
2359 | locals->meta_row_height[k], | |||
2360 | locals->dpte_row_height_chroma[k], | |||
2361 | locals->meta_row_height_chroma[k], | |||
2362 | &locals->DestinationLinesToRequestVMInImmediateFlip[k], | |||
2363 | &locals->DestinationLinesToRequestRowInImmediateFlip[k], | |||
2364 | &locals->final_flip_bw[k], | |||
2365 | &locals->ImmediateFlipSupportedForPipe[k]); | |||
2366 | } | |||
2367 | mode_lib->vba.total_dcn_read_bw_with_flip = 0.0; | |||
2368 | mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; | |||
2369 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2370 | mode_lib->vba.total_dcn_read_bw_with_flip = | |||
2371 | mode_lib->vba.total_dcn_read_bw_with_flip + dml_max3( | |||
2372 | locals->prefetch_vmrow_bw[k], | |||
2373 | locals->final_flip_bw[k] + locals->ReadBandwidthLuma[k] * locals->UrgentBurstFactorLuma[k] | |||
2374 | + locals->ReadBandwidthChroma[k] * locals->UrgentBurstFactorChroma[k] + locals->cursor_bw[k] * locals->UrgentBurstFactorCursor[k], | |||
2375 | locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] * locals->UrgentBurstFactorLumaPre[k] | |||
2376 | + locals->RequiredPrefetchPixDataBWChroma[k] * locals->UrgentBurstFactorChromaPre[k] | |||
2377 | + locals->cursor_bw_pre[k] * locals->UrgentBurstFactorCursorPre[k]); | |||
2378 | mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst = | |||
2379 | mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst + | |||
2380 | dml_max3(locals->prefetch_vmrow_bw[k], | |||
2381 | locals->final_flip_bw[k] + locals->ReadBandwidthPlaneLuma[k] + locals->ReadBandwidthPlaneChroma[k] + locals->cursor_bw[k], | |||
2382 | locals->final_flip_bw[k] + locals->RequiredPrefetchPixDataBWLuma[k] + locals->RequiredPrefetchPixDataBWChroma[k] + locals->cursor_bw_pre[k]); | |||
2383 | ||||
2384 | } | |||
2385 | mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip = mode_lib->vba.total_dcn_read_bw_with_flip_no_urgent_burst / mode_lib->vba.ReturnBW; | |||
2386 | ||||
2387 | mode_lib->vba.ImmediateFlipSupported = true1; | |||
2388 | if (mode_lib->vba.total_dcn_read_bw_with_flip > mode_lib->vba.ReturnBW) { | |||
2389 | mode_lib->vba.ImmediateFlipSupported = false0; | |||
2390 | } | |||
2391 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2392 | if (locals->ImmediateFlipSupportedForPipe[k] == false0) { | |||
2393 | mode_lib->vba.ImmediateFlipSupported = false0; | |||
2394 | } | |||
2395 | } | |||
2396 | } else { | |||
2397 | mode_lib->vba.ImmediateFlipSupported = false0; | |||
2398 | } | |||
2399 | ||||
2400 | for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { | |||
2401 | if (mode_lib->vba.ErrorResult[k]) { | |||
2402 | mode_lib->vba.PrefetchModeSupported = false0; | |||
2403 | dml_print({do { } while(0); } | |||
2404 | "DML: CalculatePrefetchSchedule ***failed***. Prefetch schedule violation. Results are NOT valid\n"){do { } while(0); }; | |||
2405 | } | |||
2406 | } | |||
2407 | ||||
2408 | mode_lib->vba.VStartupLines = mode_lib->vba.VStartupLines + 1; | |||
2409 | } while (!((mode_lib->vba.PrefetchModeSupported |
33.1 | Field 'PrefetchModeSupported' is false |
39.1 | Field 'TotalActiveWriteback' is <= 1 |
40.1 | Field 'TotalActiveWriteback' is <= 1 |
48 | Assigned value is garbage or undefined |