;//////////////////////////////////////////////////////////////////////////// ;// ;// INTEL CORPORATION PROPRIETARY INFORMATION ;// ;// This software is supplied under the terms of a license ;// agreement or nondisclosure agreement with Intel Corporation ;// and may not be copied or disclosed except in accordance ;// with the terms of that agreement. ;// ;//////////////////////////////////////////////////////////////////////////// ;// ;// $Header: S:\h26x\src\enc\exedtq.inv 1.15 06 Nov 1996 16:18:34 BNICKERS $ ;// ;// $Log: S:\h26x\src\enc\exedtq.inv $ ;// ;// Rev 1.15 06 Nov 1996 16:18:34 BNICKERS ;// Improve performance. ;// ;// Rev 1.14 18 Oct 1996 16:57:14 BNICKERS ;// Fixes for EMV ;// ;// Rev 1.13 10 Oct 1996 16:42:54 BNICKERS ;// Initial debugging of Extended Motion Vectors. ;// ;// Rev 1.12 04 Oct 1996 08:48:00 BNICKERS ;// Add EMV. ;// ;// Rev 1.11 12 Sep 1996 10:56:18 BNICKERS ;// Add arguments for thresholds and differentials. ;// ;// Rev 1.10 22 Jul 1996 15:23:32 BNICKERS ;// Reduce code size. Implement H261 spatial filter. ;// ;// Rev 1.9 25 Jun 1996 14:24:54 BNICKERS ;// Implement heuristic motion estimation for MMX, AP mode. ;// ;// Rev 1.8 14 May 1996 12:18:54 BNICKERS ;// Initial debugging of MMx B-Frame ME. ;// ;// Rev 1.7 03 May 1996 14:03:46 BNICKERS ;// ;// Minor bug fixes and integration refinements. ;// ;// Rev 1.6 02 May 1996 12:00:58 BNICKERS ;// Initial integration of B Frame ME, MMX version. ;// ;// Rev 1.5 16 Apr 1996 16:41:02 BNICKERS ;// Start adding storage for B frame ME. ;// ;// Rev 1.4 10 Apr 1996 13:14:12 BNICKERS ;// Recoding of Motion Estimation, Advanced Prediction. ;// ;// Rev 1.3 05 Apr 1996 12:27:54 BNICKERS ;// Improvements to baseline half pel ME. ;// ;// Rev 1.2 26 Mar 1996 12:00:20 BNICKERS ;// Did some tuning for MMx encode. ;// ;// Rev 1.1 20 Mar 1996 15:26:56 KLILLEVO ;// changed quantization to match IA quantization ;// ;// Rev 1.0 15 Mar 1996 15:54:14 BECHOLS ;// Initial revision. ;// ;// Rev 1.0 16 Feb 1996 17:12:12 BNICKERS ;// Initial revision. ;// ;//////////////////////////////////////////////////////////////////////////// ; ; exEDTQ.inc -- Include file for MMx versions of Motion Estimation and Frame ; Differencing, Forward DC Transform, and Quant/RLE. ; ; Storage on local stack frame for variables that survive only for the duration ; of one of the four phases (Motion Est, Frame Diff, FDCT, Quant RLE). This ; storage is prime in that it is accessed by the 3-byte addressing form, ; esp+8_bit_Disp. This is particularly important for MMx instructions, which ; would be 8 bytes long if a 32-bit Displacement was used. There's a penalty ; for such a long instruction. ; (128 bytes; 32:159) ; ******************************************** ; Motion Estimation Locals * THAT DO NOT SURVIVE DURING OTHER PASSES. * ; ******************************************** HalfPelMBMESWDAccum TEXTEQU <[esp+ 0+StackOffset]>; 4 QWORDs + bit bucket BestOfFourStartingPoints TEXTEQU HalfPelMBMESWDAccum+4 BitBucket1 TEXTEQU <[esp+ 32+StackOffset]>; 8 bytes (QWORD) StashMM6 TEXTEQU <[esp+ 32+StackOffset]>; QWORD PartSWDForLLBlk TEXTEQU <[esp+ 32+StackOffset]>; QWORD SWDULandLR TEXTEQU <[esp+ 40+StackOffset]>; QWORD BitBucket2 TEXTEQU <[esp+ 48+StackOffset]>; QWORD PartSWDForLRBlk TEXTEQU <[esp+ 48+StackOffset]>; QWORD Addr0MVRefBlk TEXTEQU <[esp+ 48+StackOffset]>; DWORD LimitForSWDForBlkMV TEXTEQU <[esp+ 52+StackOffset]>; DWORD SWDURandLL TEXTEQU <[esp+ 56+StackOffset]>; QWORD PartSWDForURBlk TEXTEQU <[esp+ 64+StackOffset]>; QWORD SWD0MVURandLL TEXTEQU <[esp+ 72+StackOffset]>; QWORD SWD0MVULandLR TEXTEQU <[esp+ 80+StackOffset]>; QWORD SWDForNon0MVToBeat TEXTEQU <[esp+ 88+StackOffset]>; DWORD BestMBFullPelSWD TEXTEQU <[esp+ 92+StackOffset]>; DWORD BestMBHalfPelSWD TEXTEQU <[esp+ 96+StackOffset]>; DWORD BestMBHalfPelRefAddr TEXTEQU <[esp+ 100+StackOffset]>; DWORD BestHalfPelHorzSWD TEXTEQU <[esp+ 104+StackOffset]>; DWORD BestHalfPelVertSWD TEXTEQU <[esp+ 108+StackOffset]>; DWORD Addr0MVRef TEXTEQU <[esp+ 112+StackOffset]>; DWORD BestBlockRefAddrVP1 TEXTEQU <[esp+ 116+StackOffset]>; DWORD BestBlkFullPelSWD TEXTEQU <[esp+ 120+StackOffset]>; DWORD SWDForBlock2Or4 TEXTEQU <[esp+ 124+StackOffset]>; DWORD ; Frame Differencing Locals, passed to FDCT. ; ; The output of frame differencing is the input to the forward DCT. ; The intermediate coefficients are also stored here. This keeps the ; addressing forms as small as possible. This is particularly important ; for MMx instructions, to keep them 7 bytes or shorter. ; (32:167) PelDiffs TEXTEQU <[esp+StackOffset]> ; Must stay here! PelDiffsLine0 TEXTEQU PelDiffsLine1 TEXTEQU PelDiffsLine2 TEXTEQU PelDiffsLine3 TEXTEQU PelDiffsLine4 TEXTEQU PelDiffsLine5 TEXTEQU PelDiffsLine6 TEXTEQU PelDiffsLine7 TEXTEQU Coeffs TEXTEQU <[esp+StackOffset+8]> ; 16 QWORDs ; ***************************************** ; Local variables * THAT SURVIVE FROM ONE PASS TO ANOTHER * ; ***************************************** ; ; QWORD aligned: ; (184:191) BlockAbove TEXTEQU <[esp+StackOffset+152]> ; 2 DWORDs ; ************************************************ ; Frame Differencing Locals * THAT NEED NOT SURVIVE OTHER PASSES (but do). * ; ************************************************ ; These three blocks of 8*8 storage are needed for the left, right, and ; central remote prediction contributions. ; (192:383) LeftPred TEXTEQU <[esp+StackOffset+160]> RightPred TEXTEQU <[esp+StackOffset+224]> CentralPred TEXTEQU <[esp+CONST_384*1+StackOffset-96]> ; Temp space used by Heuristic ME. TargetSigContribForRowPairs TEXTEQU CentralPred ; ***************************************** ; Local variables * THAT SURVIVE FROM ONE PASS TO ANOTHER * ; ***************************************** ; (384:511) DoHalfPelME TEXTEQU <[esp+CONST_384*1+StackOffset- 32]> DoBlockLevelVectors TEXTEQU <[esp+CONST_384*1+StackOffset- 28]> DoAdvancedPrediction TEXTEQU <[esp+CONST_384*1+StackOffset- 27]> DoSpatialFiltering TEXTEQU <[esp+CONST_384*1+StackOffset- 26]> IsPlainPFrame TEXTEQU <[esp+CONST_384*1+StackOffset- 25]> TargetFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 24]> PreviousFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 20]> TargToRef TEXTEQU <[esp+CONST_384*1+StackOffset- 16]> BFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 12]> SpatiallyFilteredMB TEXTEQU BFrameToFuture TEXTEQU <[esp+CONST_384*1+StackOffset- 8]> SpatialFiltThreshold TEXTEQU PendingOBMC TEXTEQU <[esp+CONST_384*1+StackOffset- 4]> SpatialFiltDifferential TEXTEQU DistToBADforBlockAbove TEXTEQU <[esp+CONST_384*1+StackOffset+ 0]> DistToBADforBlockBelow TEXTEQU <[esp+CONST_384*1+StackOffset+ 4]> AddrOfLeftPred TEXTEQU <[esp+CONST_384*1+StackOffset+ 8]> AddrOfRightPred TEXTEQU <[esp+CONST_384*1+StackOffset+ 12]> Recip2QPToUse TEXTEQU <[esp+CONST_384*1+StackOffset+ 16]> QPDiv2 TEXTEQU <[esp+CONST_384*1+StackOffset+ 20]> BRecip2QPToUse TEXTEQU <[esp+CONST_384*1+StackOffset+ 24]> BQPDiv2 TEXTEQU <[esp+CONST_384*1+StackOffset+ 28]> CodeStreamCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 32]> BCodeStreamCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 36]> C00Copy TEXTEQU <[esp+CONST_384*1+StackOffset+ 40]> StashBlockType TEXTEQU <[esp+CONST_384*1+StackOffset+ 44]> TargetMacroBlockBaseAddr TEXTEQU <[esp+CONST_384*1+StackOffset+ 48]> BestMV TEXTEQU <[esp+CONST_384*1+StackOffset+ 52]> BestMBHalfPelMV TEXTEQU <[esp+CONST_384*1+StackOffset+ 56]> CandidateMV TEXTEQU SWDTotal TEXTEQU <[esp+CONST_384*1+StackOffset+ 60]> BSWDTotal TEXTEQU <[esp+CONST_384*1+StackOffset+ 64]> BlockActionDescrCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 68]> MBlockActionStream TEXTEQU BlockActionDescrCursor BFrmCBP TEXTEQU <[esp+CONST_384*1+StackOffset+ 72]> PastRefPitchDiv4 TEXTEQU <[esp+CONST_384*1+StackOffset+ 76]> CurrSWDState TEXTEQU <[esp+CONST_384*1+StackOffset+ 80]> StashPartialRefBlkAddr TEXTEQU <[esp+CONST_384*1+StackOffset+ 84]> StashESP TEXTEQU <[esp+CONST_384*1+StackOffset+ 92]> ; These two arrays use esp+384+96:esp+384+223, and esp+384*2-96:esp+384*2+31. ; (512:639, 704:831) WeightForwardMotion TEXTEQU <[esp+384+StackOffset+96]> WeightBackwardMotion TEXTEQU <[esp+384+StackOffset+160]> ; 32 more bytes of local variables here: ; (832:863) DoHeuristicME TEXTEQU <[esp+CONST_384*2+StackOffset+ 32]> TargetToSig_Debiased TEXTEQU <[esp+CONST_384*2+StackOffset+ 36]> SigToTarget TEXTEQU <[esp+CONST_384*2+StackOffset+ 40]> BFrmZeroVectorThreshold TEXTEQU <[esp+CONST_384*2+StackOffset+ 44]> EMVLimitsForThisMB TEXTEQU <[esp+CONST_384*2+StackOffset+ 48]> ; 8 DoExtendedMotionVectors TEXTEQU <[esp+CONST_384*2+StackOffset+ 56]> StackSpaceAvailable TEXTEQU <[esp+CONST_384*2+StackOffset+ 60]> EXTERNDEF C0100010001000100:DWORD EXTERNDEF C1:DWORD EXTERNDEF C2:DWORD EXTERNDEF C3:DWORD EXTERNDEF C4:DWORD EXTERNDEF C5:DWORD EXTERNDEF C6:DWORD EXTERNDEF C7:DWORD EXTERNDEF Diff_IdxRefWts:BYTE EXTERNDEF FutureWt_FF_or_00:DWORD EXTERNDEF BFrmSWDState:BYTE EXTERNDEF Pel_Rnd:DWORD EXTERNDEF LeftRightBlkPosition:DWORD EXTERNDEF UpDownBlkPosition:DWORD EXTERNDEF BlkEmptyFlag:BYTE EXTERNDEF NextZigZagCoeff:BYTE C00 = 0 C04 = 8 C10 = 16 C14 = 24 C20 = 32 C24 = 40 C30 = 48 C34 = 56 C40 = 64 C44 = 72 C50 = 80 C54 = 88 C60 = 96 C64 = 104 C70 = 112 C74 = 120 Q00 = C00 ; C00 Q01 = C10 ; C00+1 Q02 = C20 ; C00+2 Q03 = C30 ; C00+3 Q04 = C40 ; C00+4 Q05 = C50 ; C00+5 Q06 = C60 ; C00+6 Q07 = C70 ; C00+7 Q10 = C00+1 ; C10 Q11 = C10+1 ; C10+1 Q12 = C20+1 ; C10+2 Q13 = C30+1 ; C10+3 Q14 = C40+1 ; C10+4 Q15 = C50+1 ; C10+5 Q16 = C60+1 ; C10+6 Q17 = C70+1 ; C10+7 Q20 = C00+2 ; C20 Q21 = C10+2 ; C20+1 Q22 = C20+2 ; C20+2 Q23 = C30+2 ; C20+3 Q24 = C40+2 ; C20+4 Q25 = C50+2 ; C20+5 Q26 = C60+2 ; C20+6 Q27 = C70+2 ; C20+7 Q30 = C00+3 ; C30 Q31 = C10+3 ; C30+1 Q32 = C20+3 ; C30+2 Q33 = C30+3 ; C30+3 Q34 = C40+3 ; C30+4 Q35 = C50+3 ; C30+5 Q36 = C60+3 ; C30+6 Q37 = C70+3 ; C30+7 Q40 = C00+4 ; C40 Q41 = C10+4 ; C40+1 Q42 = C20+4 ; C40+2 Q43 = C30+4 ; C40+3 Q44 = C40+4 ; C40+4 Q45 = C50+4 ; C40+5 Q46 = C60+4 ; C40+6 Q47 = C70+4 ; C40+7 Q50 = C00+5 ; C50 Q51 = C10+5 ; C50+1 Q52 = C20+5 ; C50+2 Q53 = C30+5 ; C50+3 Q54 = C40+5 ; C50+4 Q55 = C50+5 ; C50+5 Q56 = C60+5 ; C50+6 Q57 = C70+5 ; C50+7 Q60 = C00+6 ; C60 Q61 = C10+6 ; C60+1 Q62 = C20+6 ; C60+2 Q63 = C30+6 ; C60+3 Q64 = C40+6 ; C60+4 Q65 = C50+6 ; C60+5 Q66 = C60+6 ; C60+6 Q67 = C70+6 ; C60+7 Q70 = C00+7 ; C70 Q71 = C10+7 ; C70+1 Q72 = C20+7 ; C70+2 Q73 = C30+7 ; C70+3 Q74 = C40+7 ; C70+4 Q75 = C50+7 ; C70+5 Q76 = C60+7 ; C70+6 Q77 = C70+7 ; C70+7