Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

315 lines
13 KiB

  1. ;////////////////////////////////////////////////////////////////////////////
  2. ;//
  3. ;// INTEL CORPORATION PROPRIETARY INFORMATION
  4. ;//
  5. ;// This software is supplied under the terms of a license
  6. ;// agreement or nondisclosure agreement with Intel Corporation
  7. ;// and may not be copied or disclosed except in accordance
  8. ;// with the terms of that agreement.
  9. ;//
  10. ;////////////////////////////////////////////////////////////////////////////
  11. ;//
  12. ;// $Header: S:\h26x\src\enc\exedtq.inv 1.15 06 Nov 1996 16:18:34 BNICKERS $
  13. ;//
  14. ;// $Log: S:\h26x\src\enc\exedtq.inv $
  15. ;//
  16. ;// Rev 1.15 06 Nov 1996 16:18:34 BNICKERS
  17. ;// Improve performance.
  18. ;//
  19. ;// Rev 1.14 18 Oct 1996 16:57:14 BNICKERS
  20. ;// Fixes for EMV
  21. ;//
  22. ;// Rev 1.13 10 Oct 1996 16:42:54 BNICKERS
  23. ;// Initial debugging of Extended Motion Vectors.
  24. ;//
  25. ;// Rev 1.12 04 Oct 1996 08:48:00 BNICKERS
  26. ;// Add EMV.
  27. ;//
  28. ;// Rev 1.11 12 Sep 1996 10:56:18 BNICKERS
  29. ;// Add arguments for thresholds and differentials.
  30. ;//
  31. ;// Rev 1.10 22 Jul 1996 15:23:32 BNICKERS
  32. ;// Reduce code size. Implement H261 spatial filter.
  33. ;//
  34. ;// Rev 1.9 25 Jun 1996 14:24:54 BNICKERS
  35. ;// Implement heuristic motion estimation for MMX, AP mode.
  36. ;//
  37. ;// Rev 1.8 14 May 1996 12:18:54 BNICKERS
  38. ;// Initial debugging of MMx B-Frame ME.
  39. ;//
  40. ;// Rev 1.7 03 May 1996 14:03:46 BNICKERS
  41. ;//
  42. ;// Minor bug fixes and integration refinements.
  43. ;//
  44. ;// Rev 1.6 02 May 1996 12:00:58 BNICKERS
  45. ;// Initial integration of B Frame ME, MMX version.
  46. ;//
  47. ;// Rev 1.5 16 Apr 1996 16:41:02 BNICKERS
  48. ;// Start adding storage for B frame ME.
  49. ;//
  50. ;// Rev 1.4 10 Apr 1996 13:14:12 BNICKERS
  51. ;// Recoding of Motion Estimation, Advanced Prediction.
  52. ;//
  53. ;// Rev 1.3 05 Apr 1996 12:27:54 BNICKERS
  54. ;// Improvements to baseline half pel ME.
  55. ;//
  56. ;// Rev 1.2 26 Mar 1996 12:00:20 BNICKERS
  57. ;// Did some tuning for MMx encode.
  58. ;//
  59. ;// Rev 1.1 20 Mar 1996 15:26:56 KLILLEVO
  60. ;// changed quantization to match IA quantization
  61. ;//
  62. ;// Rev 1.0 15 Mar 1996 15:54:14 BECHOLS
  63. ;// Initial revision.
  64. ;//
  65. ;// Rev 1.0 16 Feb 1996 17:12:12 BNICKERS
  66. ;// Initial revision.
  67. ;//
  68. ;////////////////////////////////////////////////////////////////////////////
  69. ;
  70. ; exEDTQ.inc -- Include file for MMx versions of Motion Estimation and Frame
  71. ; Differencing, Forward DC Transform, and Quant/RLE.
  72. ;
  73. ; Storage on local stack frame for variables that survive only for the duration
  74. ; of one of the four phases (Motion Est, Frame Diff, FDCT, Quant RLE). This
  75. ; storage is prime in that it is accessed by the 3-byte addressing form,
  76. ; esp+8_bit_Disp. This is particularly important for MMx instructions, which
  77. ; would be 8 bytes long if a 32-bit Displacement was used. There's a penalty
  78. ; for such a long instruction.
  79. ; (128 bytes; 32:159)
  80. ; ********************************************
  81. ; Motion Estimation Locals * THAT DO NOT SURVIVE DURING OTHER PASSES. *
  82. ; ********************************************
  83. HalfPelMBMESWDAccum TEXTEQU <[esp+ 0+StackOffset]>; 4 QWORDs + bit bucket
  84. BestOfFourStartingPoints TEXTEQU HalfPelMBMESWDAccum+4
  85. BitBucket1 TEXTEQU <[esp+ 32+StackOffset]>; 8 bytes (QWORD)
  86. StashMM6 TEXTEQU <[esp+ 32+StackOffset]>; QWORD
  87. PartSWDForLLBlk TEXTEQU <[esp+ 32+StackOffset]>; QWORD
  88. SWDULandLR TEXTEQU <[esp+ 40+StackOffset]>; QWORD
  89. BitBucket2 TEXTEQU <[esp+ 48+StackOffset]>; QWORD
  90. PartSWDForLRBlk TEXTEQU <[esp+ 48+StackOffset]>; QWORD
  91. Addr0MVRefBlk TEXTEQU <[esp+ 48+StackOffset]>; DWORD
  92. LimitForSWDForBlkMV TEXTEQU <[esp+ 52+StackOffset]>; DWORD
  93. SWDURandLL TEXTEQU <[esp+ 56+StackOffset]>; QWORD
  94. PartSWDForURBlk TEXTEQU <[esp+ 64+StackOffset]>; QWORD
  95. SWD0MVURandLL TEXTEQU <[esp+ 72+StackOffset]>; QWORD
  96. SWD0MVULandLR TEXTEQU <[esp+ 80+StackOffset]>; QWORD
  97. SWDForNon0MVToBeat TEXTEQU <[esp+ 88+StackOffset]>; DWORD
  98. BestMBFullPelSWD TEXTEQU <[esp+ 92+StackOffset]>; DWORD
  99. BestMBHalfPelSWD TEXTEQU <[esp+ 96+StackOffset]>; DWORD
  100. BestMBHalfPelRefAddr TEXTEQU <[esp+ 100+StackOffset]>; DWORD
  101. BestHalfPelHorzSWD TEXTEQU <[esp+ 104+StackOffset]>; DWORD
  102. BestHalfPelVertSWD TEXTEQU <[esp+ 108+StackOffset]>; DWORD
  103. Addr0MVRef TEXTEQU <[esp+ 112+StackOffset]>; DWORD
  104. BestBlockRefAddrVP1 TEXTEQU <[esp+ 116+StackOffset]>; DWORD
  105. BestBlkFullPelSWD TEXTEQU <[esp+ 120+StackOffset]>; DWORD
  106. SWDForBlock2Or4 TEXTEQU <[esp+ 124+StackOffset]>; DWORD
  107. ; Frame Differencing Locals, passed to FDCT.
  108. ;
  109. ; The output of frame differencing is the input to the forward DCT.
  110. ; The intermediate coefficients are also stored here. This keeps the
  111. ; addressing forms as small as possible. This is particularly important
  112. ; for MMx instructions, to keep them 7 bytes or shorter.
  113. ; (32:167)
  114. PelDiffs TEXTEQU <[esp+StackOffset]> ; Must stay here!
  115. PelDiffsLine0 TEXTEQU <PelDiffs>
  116. PelDiffsLine1 TEXTEQU <PelDiffs+16>
  117. PelDiffsLine2 TEXTEQU <PelDiffs+32>
  118. PelDiffsLine3 TEXTEQU <PelDiffs+48>
  119. PelDiffsLine4 TEXTEQU <PelDiffs+64>
  120. PelDiffsLine5 TEXTEQU <PelDiffs+80>
  121. PelDiffsLine6 TEXTEQU <PelDiffs+96>
  122. PelDiffsLine7 TEXTEQU <PelDiffs+112>
  123. Coeffs TEXTEQU <[esp+StackOffset+8]> ; 16 QWORDs
  124. ; *****************************************
  125. ; Local variables * THAT SURVIVE FROM ONE PASS TO ANOTHER *
  126. ; *****************************************
  127. ;
  128. ; QWORD aligned:
  129. ; (184:191)
  130. BlockAbove TEXTEQU <[esp+StackOffset+152]> ; 2 DWORDs
  131. ; ************************************************
  132. ; Frame Differencing Locals * THAT NEED NOT SURVIVE OTHER PASSES (but do). *
  133. ; ************************************************
  134. ; These three blocks of 8*8 storage are needed for the left, right, and
  135. ; central remote prediction contributions.
  136. ; (192:383)
  137. LeftPred TEXTEQU <[esp+StackOffset+160]>
  138. RightPred TEXTEQU <[esp+StackOffset+224]>
  139. CentralPred TEXTEQU <[esp+CONST_384*1+StackOffset-96]>
  140. ; Temp space used by Heuristic ME.
  141. TargetSigContribForRowPairs TEXTEQU CentralPred
  142. ; *****************************************
  143. ; Local variables * THAT SURVIVE FROM ONE PASS TO ANOTHER *
  144. ; *****************************************
  145. ; (384:511)
  146. DoHalfPelME TEXTEQU <[esp+CONST_384*1+StackOffset- 32]>
  147. DoBlockLevelVectors TEXTEQU <[esp+CONST_384*1+StackOffset- 28]>
  148. DoAdvancedPrediction TEXTEQU <[esp+CONST_384*1+StackOffset- 27]>
  149. DoSpatialFiltering TEXTEQU <[esp+CONST_384*1+StackOffset- 26]>
  150. IsPlainPFrame TEXTEQU <[esp+CONST_384*1+StackOffset- 25]>
  151. TargetFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 24]>
  152. PreviousFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 20]>
  153. TargToRef TEXTEQU <[esp+CONST_384*1+StackOffset- 16]>
  154. BFrameBaseAddress TEXTEQU <[esp+CONST_384*1+StackOffset- 12]>
  155. SpatiallyFilteredMB TEXTEQU <BFrameBaseAddress>
  156. BFrameToFuture TEXTEQU <[esp+CONST_384*1+StackOffset- 8]>
  157. SpatialFiltThreshold TEXTEQU <BFrameToFuture>
  158. PendingOBMC TEXTEQU <[esp+CONST_384*1+StackOffset- 4]>
  159. SpatialFiltDifferential TEXTEQU <PendingOBMC>
  160. DistToBADforBlockAbove TEXTEQU <[esp+CONST_384*1+StackOffset+ 0]>
  161. DistToBADforBlockBelow TEXTEQU <[esp+CONST_384*1+StackOffset+ 4]>
  162. AddrOfLeftPred TEXTEQU <[esp+CONST_384*1+StackOffset+ 8]>
  163. AddrOfRightPred TEXTEQU <[esp+CONST_384*1+StackOffset+ 12]>
  164. Recip2QPToUse TEXTEQU <[esp+CONST_384*1+StackOffset+ 16]>
  165. QPDiv2 TEXTEQU <[esp+CONST_384*1+StackOffset+ 20]>
  166. BRecip2QPToUse TEXTEQU <[esp+CONST_384*1+StackOffset+ 24]>
  167. BQPDiv2 TEXTEQU <[esp+CONST_384*1+StackOffset+ 28]>
  168. CodeStreamCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 32]>
  169. BCodeStreamCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 36]>
  170. C00Copy TEXTEQU <[esp+CONST_384*1+StackOffset+ 40]>
  171. StashBlockType TEXTEQU <[esp+CONST_384*1+StackOffset+ 44]>
  172. TargetMacroBlockBaseAddr TEXTEQU <[esp+CONST_384*1+StackOffset+ 48]>
  173. BestMV TEXTEQU <[esp+CONST_384*1+StackOffset+ 52]>
  174. BestMBHalfPelMV TEXTEQU <[esp+CONST_384*1+StackOffset+ 56]>
  175. CandidateMV TEXTEQU <BestMBHalfPelMV>
  176. SWDTotal TEXTEQU <[esp+CONST_384*1+StackOffset+ 60]>
  177. BSWDTotal TEXTEQU <[esp+CONST_384*1+StackOffset+ 64]>
  178. BlockActionDescrCursor TEXTEQU <[esp+CONST_384*1+StackOffset+ 68]>
  179. MBlockActionStream TEXTEQU BlockActionDescrCursor
  180. BFrmCBP TEXTEQU <[esp+CONST_384*1+StackOffset+ 72]>
  181. PastRefPitchDiv4 TEXTEQU <[esp+CONST_384*1+StackOffset+ 76]>
  182. CurrSWDState TEXTEQU <[esp+CONST_384*1+StackOffset+ 80]>
  183. StashPartialRefBlkAddr TEXTEQU <[esp+CONST_384*1+StackOffset+ 84]>
  184. StashESP TEXTEQU <[esp+CONST_384*1+StackOffset+ 92]>
  185. ; These two arrays use esp+384+96:esp+384+223, and esp+384*2-96:esp+384*2+31.
  186. ; (512:639, 704:831)
  187. WeightForwardMotion TEXTEQU <[esp+384+StackOffset+96]>
  188. WeightBackwardMotion TEXTEQU <[esp+384+StackOffset+160]>
  189. ; 32 more bytes of local variables here:
  190. ; (832:863)
  191. DoHeuristicME TEXTEQU <[esp+CONST_384*2+StackOffset+ 32]>
  192. TargetToSig_Debiased TEXTEQU <[esp+CONST_384*2+StackOffset+ 36]>
  193. SigToTarget TEXTEQU <[esp+CONST_384*2+StackOffset+ 40]>
  194. BFrmZeroVectorThreshold TEXTEQU <[esp+CONST_384*2+StackOffset+ 44]>
  195. EMVLimitsForThisMB TEXTEQU <[esp+CONST_384*2+StackOffset+ 48]> ; 8
  196. DoExtendedMotionVectors TEXTEQU <[esp+CONST_384*2+StackOffset+ 56]>
  197. StackSpaceAvailable TEXTEQU <[esp+CONST_384*2+StackOffset+ 60]>
  198. EXTERNDEF C0100010001000100:DWORD
  199. EXTERNDEF C1:DWORD
  200. EXTERNDEF C2:DWORD
  201. EXTERNDEF C3:DWORD
  202. EXTERNDEF C4:DWORD
  203. EXTERNDEF C5:DWORD
  204. EXTERNDEF C6:DWORD
  205. EXTERNDEF C7:DWORD
  206. EXTERNDEF Diff_IdxRefWts:BYTE
  207. EXTERNDEF FutureWt_FF_or_00:DWORD
  208. EXTERNDEF BFrmSWDState:BYTE
  209. EXTERNDEF Pel_Rnd:DWORD
  210. EXTERNDEF LeftRightBlkPosition:DWORD
  211. EXTERNDEF UpDownBlkPosition:DWORD
  212. EXTERNDEF BlkEmptyFlag:BYTE
  213. EXTERNDEF NextZigZagCoeff:BYTE
  214. C00 = 0
  215. C04 = 8
  216. C10 = 16
  217. C14 = 24
  218. C20 = 32
  219. C24 = 40
  220. C30 = 48
  221. C34 = 56
  222. C40 = 64
  223. C44 = 72
  224. C50 = 80
  225. C54 = 88
  226. C60 = 96
  227. C64 = 104
  228. C70 = 112
  229. C74 = 120
  230. Q00 = C00 ; C00
  231. Q01 = C10 ; C00+1
  232. Q02 = C20 ; C00+2
  233. Q03 = C30 ; C00+3
  234. Q04 = C40 ; C00+4
  235. Q05 = C50 ; C00+5
  236. Q06 = C60 ; C00+6
  237. Q07 = C70 ; C00+7
  238. Q10 = C00+1 ; C10
  239. Q11 = C10+1 ; C10+1
  240. Q12 = C20+1 ; C10+2
  241. Q13 = C30+1 ; C10+3
  242. Q14 = C40+1 ; C10+4
  243. Q15 = C50+1 ; C10+5
  244. Q16 = C60+1 ; C10+6
  245. Q17 = C70+1 ; C10+7
  246. Q20 = C00+2 ; C20
  247. Q21 = C10+2 ; C20+1
  248. Q22 = C20+2 ; C20+2
  249. Q23 = C30+2 ; C20+3
  250. Q24 = C40+2 ; C20+4
  251. Q25 = C50+2 ; C20+5
  252. Q26 = C60+2 ; C20+6
  253. Q27 = C70+2 ; C20+7
  254. Q30 = C00+3 ; C30
  255. Q31 = C10+3 ; C30+1
  256. Q32 = C20+3 ; C30+2
  257. Q33 = C30+3 ; C30+3
  258. Q34 = C40+3 ; C30+4
  259. Q35 = C50+3 ; C30+5
  260. Q36 = C60+3 ; C30+6
  261. Q37 = C70+3 ; C30+7
  262. Q40 = C00+4 ; C40
  263. Q41 = C10+4 ; C40+1
  264. Q42 = C20+4 ; C40+2
  265. Q43 = C30+4 ; C40+3
  266. Q44 = C40+4 ; C40+4
  267. Q45 = C50+4 ; C40+5
  268. Q46 = C60+4 ; C40+6
  269. Q47 = C70+4 ; C40+7
  270. Q50 = C00+5 ; C50
  271. Q51 = C10+5 ; C50+1
  272. Q52 = C20+5 ; C50+2
  273. Q53 = C30+5 ; C50+3
  274. Q54 = C40+5 ; C50+4
  275. Q55 = C50+5 ; C50+5
  276. Q56 = C60+5 ; C50+6
  277. Q57 = C70+5 ; C50+7
  278. Q60 = C00+6 ; C60
  279. Q61 = C10+6 ; C60+1
  280. Q62 = C20+6 ; C60+2
  281. Q63 = C30+6 ; C60+3
  282. Q64 = C40+6 ; C60+4
  283. Q65 = C50+6 ; C60+5
  284. Q66 = C60+6 ; C60+6
  285. Q67 = C70+6 ; C60+7
  286. Q70 = C00+7 ; C70
  287. Q71 = C10+7 ; C70+1
  288. Q72 = C20+7 ; C70+2
  289. Q73 = C30+7 ; C70+3
  290. Q74 = C40+7 ; C70+4
  291. Q75 = C50+7 ; C70+5
  292. Q76 = C60+7 ; C70+6
  293. Q77 = C70+7 ; C70+7