Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4144 lines
147 KiB

  1. ;/* *************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;** *************************************************************************
  13. ;*/
  14. ;////////////////////////////////////////////////////////////////////////////
  15. ;//
  16. ;// $Header: R:\h26x\h26x\src\enc\ex5fdct.asv 1.5 14 May 1996 12:18:50 BNICKERS $
  17. ;// $Log: R:\h26x\h26x\src\enc\ex5fdct.asv $
  18. ;//
  19. ;// Rev 1.5 14 May 1996 12:18:50 BNICKERS
  20. ;// Initial debugging of MMx B-Frame ME.
  21. ;//
  22. ;// Rev 1.4 11 Apr 1996 16:02:06 AKASAI
  23. ;// Updated H261 encoder to new interface and macroblock action stream
  24. ;// data structure in e3mbad.inc for FORWARDDCT. Files updated together
  25. ;// e1enc.cpp, e1enc.h, ex5fdct.asm, e3mbad.inc.
  26. ;//
  27. ;// Added IFNDEF H261 in ex5fdct so that code used only in H263 is
  28. ;// not assembled for H261.
  29. ;//
  30. ;// Rev 1.3 24 Jan 1996 13:21:28 BNICKERS
  31. ;// Implement OBMC
  32. ;//
  33. ;// Rev 1.1 27 Dec 1995 15:32:42 RMCKENZX
  34. ;// Added copyright notice
  35. ;//
  36. ;////////////////////////////////////////////////////////////////////////////
  37. ;
  38. ; e35fdct -- This function performs a Forward Discrete Cosine Transform for H263, on a stream of macroblocks comprised
  39. ; of 8*8 blocks of pels or pel diffs. This version is tuned for the Pentium Microprocessor.
  40. ;
  41. ; Arguments:
  42. ;
  43. ; MBlockActionStream (Input)
  44. ;
  45. ; A stream of MacroBlock Action Descriptors. Each descriptor indicates which blocks of a macroblock are non-empty
  46. ; and thus need to be transformed. There are from 0 to 12 non-empty blocks in each macroblock.
  47. ;
  48. ; Processing commences with the macroblock described by the first descriptor in the stream (regardless of whether
  49. ; it's End-Of-Stream bit is set). Processing continues up to but not including the next descriptor that has the
  50. ; End-Of-Stream bit set.
  51. ;
  52. ; This function requires each descripgor in the MBlockActionStream to be 16-byte aligned. Moreover, each of the
  53. ; T_Blk elements in the descriptor must also be 16-byte aligned, and ordered as they are now. (Note that I am
  54. ; talking about the address of these pointer variables, not the alignement of the data they point to.)
  55. ;
  56. ; Best performance will be attained when 8*8 blocks are (or usually are) DWORD aligned. MMx implementations will
  57. ; probably prefer 8-byte alignment.
  58. ;
  59. ; The complete format of the MacroBlock Action Descriptors is provided in e3mbad.inc.
  60. ;
  61. ; TargetFrameBaseAddress -- Address of upper left viewable pel in the target Y plane. When doing B frames, this
  62. ; is the Target B Frame Base Address.
  63. ;
  64. ; PreviousFrameBaseAddress -- Address of the reconstructed previous frame. This really isn't needed for P-frame
  65. ; processing, estimation since the address of each block's prediction was recorded by
  66. ; MotionEstimation. It's only used by B-frame processing.
  67. ;
  68. ; FutureFrameBaseAddress -- Address of the reconstructed future (a.k.a. current) P-frame. Only used when processing
  69. ; B frames.
  70. ;
  71. ; CoeffStream (Output)
  72. ;
  73. ; A stream of storage blocks which receive the DCT output coefficient
  74. ; blocks for each non-empty blocks described in the MBlockActionStream.
  75. ; Each coefficient block is 128 bytes. The stream must be large enough
  76. ; to hold all the output coefficient blocks.
  77. ;
  78. ; Best performance will be attained by assuring the storage is 32-byte
  79. ; aligned. Best performance will be attained by using the output before
  80. ; the data cache gets changed by other data. Consuming the coefficient
  81. ; blocks in forward order is best, since they are defined in reverse
  82. ; order (and thus the first blocks are most likely to be in cache).
  83. ;
  84. ; The complete format of the coefficient blocks is provided in encdctc.inc.
  85. ;
  86. ; IsBFrame (Input)
  87. ;
  88. ; 0 (False) if doing Key or P frame. 1 (True) if doing B frame.
  89. OPTION PROLOGUE:None
  90. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  91. OPTION M510
  92. include e3inst.inc ; Encoder instance data
  93. include e3mbad.inc ; MacroBlock Action Descriptor struct layout
  94. include e3dctc.inc ; DCT Coefficient block layout
  95. .xlist
  96. include memmodel.inc
  97. .list
  98. .DATA
  99. InitTbl MACRO WeightHi,WeightLo,TableLabel
  100. TableLabel LABEL DWORD
  101. CNT = -128
  102. REPEAT 128
  103. DWORD ((WeightHi*CNT-08000H)/010000H*010000H)+((WeightLo*CNT-08000H)/010000H)
  104. DWORD ((WeightHi*CNT-08000H)/010000H*010000H)-((WeightLo*CNT-08000H)/010000H)
  105. CNT = CNT + 1
  106. ENDM
  107. REPEAT 128
  108. DWORD ((WeightHi*CNT+08000H)/010000H*010000H)+((WeightLo*CNT+08000H)/010000H)
  109. DWORD ((WeightHi*CNT+08000H)/010000H*010000H)-((WeightLo*CNT+08000H)/010000H)
  110. CNT = CNT + 1
  111. ENDM
  112. ENDM
  113. InitTbl 080000H,04545FH,P80000_P4545F
  114. P80000_N4545F = P80000_P4545F + 4
  115. InitTbl 080000H,0A73D7H,P80000_PA73D7
  116. P80000_NA73D7 = P80000_PA73D7 + 4
  117. BYTE 680 DUP (?) ; To assure that tables interleave nicely in cache.
  118. InitTbl 02350BH, 06491AH,P2350B_P6491A
  119. P2350B_N6491A = P2350B_P6491A + 4
  120. InitTbl -0B18A8H,-096831H,NB18A8_N96831
  121. NB18A8_P96831 = NB18A8_N96831 + 4
  122. BYTE 680 DUP (?) ; To assure that tables interleave nicely in cache.
  123. InitTbl -096831H, 02350BH,N96831_P2350B
  124. N96831_N2350B = N96831_P2350B + 4
  125. InitTbl 06491AH, 0B18A8H,P6491A_PB18A8
  126. P6491A_NB18A8 = P6491A_PB18A8 + 4
  127. IFNDEF H261
  128. ColsDefined DD 000000000H,000000000H,07F7F7F7FH,07F7F7F7FH
  129. DD 000000000H,07F7F7F00H,07F7F7F7FH,00000007FH
  130. DD 000000000H,07F7F0000H,07F7F7F7FH,000007F7FH
  131. DD 000000000H,07F000000H,07F7F7F7FH,0007F7F7FH,000000000H
  132. ; Right Left Chroma
  133. DB 0 ; -22.0
  134. DB 0 ; -21.5
  135. DB 0 ; -21.0
  136. DB 0 ; -20.5
  137. DB 0 ; -20.0
  138. DB 0 ; -19.5
  139. DB 0 ; -19.0
  140. DB 0 ; -18.5
  141. DB 0 ; -18.0
  142. DB 0 ; -17.5
  143. DB 0 ; -17.0
  144. DB 0 ; -16.5
  145. DB 0 ; -16.0
  146. DB 0 ; -15.5
  147. DB 0 ; -15.0
  148. DB 0 ; -14.5
  149. DB 0 ; -22.0 -14.0
  150. DB 0 ; -21.5 -13.5
  151. DB 0 ; -21.0 -13.0
  152. DB 0 ; -20.5 -12.5
  153. DB 0 ; -20.0 -12.0
  154. DB 0 ; -19.5 -11.5
  155. DB 0 ; -19.0 -11.0
  156. DB 0 ; -18.5 -10.5
  157. DB 0 ; -18.0 -10.0
  158. DB 0 ; -17.5 -9.5
  159. DB 0 ; -17.0 -9.0
  160. DB 0 ; -16.5 -8.5
  161. DB 0 ; -16.0 -8.0
  162. DB 0 ; -15.5 -7.5
  163. DB 48 ; -15.0 -7.0
  164. DB 48 ; -14.5 -6.5
  165. DB 32 ; -14.0 -6.0
  166. DB 32 ; -13.5 -5.5
  167. DB 16 ; -13.0 -5.0
  168. DB 16 ; -12.5 -4.5
  169. DB 4 ; -12.0 -4.0
  170. DB 4 ; -11.5 -3.5
  171. DB 52 ; -11.0 -3.0
  172. DB 52 ; -10.5 -2.5
  173. DB 36 ; -10.0 -2.0
  174. DB 36 ; -9.5 -1.5
  175. DB 20 ; -9.0 -1.0
  176. DB 20 ; -8.5 -.5
  177. LeftYBlkColsDef DB 8 ; -8.0 0
  178. DB 8 ; -7.5 .5
  179. DB 8 ; -7.0 1.0
  180. DB 8 ; -6.5 1.5
  181. DB 8 ; -6.0 2.0
  182. DB 8 ; -5.5 2.5
  183. DB 8 ; -5.0 3.0
  184. DB 8 ; -4.5 3.5
  185. DB 8 ; -4.0 4.0
  186. DB 8 ; -3.5 4.5
  187. DB 8 ; -3.0 5.0
  188. DB 8 ; -2.5 5.5
  189. DB 8 ; -2.0 6.0
  190. DB 8 ; -1.5 6.5
  191. DB 8 ; -1.0 7.0
  192. DB 8 ; -.5 7.5
  193. RightYBlkColsDef DB 8 ; 0 8.0
  194. DB 56 ; .5 8.5
  195. DB 56 ; 1.0 9.0
  196. DB 40 ; 1.5 9.5
  197. DB 40 ; 2.0 10.0
  198. DB 24 ; 2.5 10.5
  199. DB 24 ; 3.0 11.0
  200. DB 12 ; 3.5 11.5
  201. DB 12 ; 4.0 12.0
  202. DB 60 ; 4.5 12.5
  203. DB 60 ; 5.0 13.0
  204. DB 44 ; 5.5 13.5
  205. DB 44 ; 6.0 14.0
  206. DB 28 ; 6.5 14.5
  207. DB 28 ; 7.0 15.0
  208. DB 0 ; 7.5 15.5
  209. DB 0 ; 8.0 16.0
  210. DB 0 ; 8.5 16.5
  211. DB 0 ; 9.0 17.0
  212. DB 0 ; 9.5 17.5
  213. DB 0 ; 10.0 18.0
  214. DB 0 ; 10.5 18.5
  215. DB 0 ; 11.0 19.0
  216. DB 0 ; 11.5 19.5
  217. DB 0 ; 12.0 20.0
  218. DB 0 ; 12.5 20.5
  219. DB 0 ; 13.0 21.0
  220. DB 0 ; 13.5 21.5
  221. DB 0 ; 14.0 22.0
  222. DB 0 ; 14.5
  223. DB 0 ; 15.0
  224. DB 0 ; 15.5
  225. DB 0 ; 16.0
  226. DB 0 ; 16.5
  227. DB 0 ; 17.0
  228. DB 0 ; 17.5
  229. DB 0 ; 18.0
  230. DB 0 ; 18.5 -11.0
  231. DB 0 ; 19.0 -10.5
  232. DB 0 ; 19.5 -10.0
  233. DB 0 ; 20.0 -9.5
  234. DB 0 ; 20.5 -9.0
  235. DB 0 ; 21.0 -8.5
  236. DB 0 ; 21.5 -8.0
  237. DB 0 ; 22.0 -7.5
  238. DB 48 ; -7.0
  239. DB 48 ; -6.5
  240. DB 32 ; -6.0
  241. DB 32 ; -5.5
  242. DB 16 ; -5.0
  243. DB 16 ; -4.5
  244. DB 4 ; -4.0
  245. DB 4 ; -3.5
  246. DB 52 ; -3.0
  247. DB 52 ; -2.5
  248. DB 36 ; -2.0
  249. DB 36 ; -1.5
  250. DB 20 ; -1.0
  251. DB 20 ; -.5
  252. ChromaColsDef DB 8 ; 0
  253. DB 56 ; .5
  254. DB 56 ; 1.0
  255. DB 40 ; 1.5
  256. DB 40 ; 2.0
  257. DB 24 ; 2.5
  258. DB 24 ; 3.0
  259. DB 12 ; 3.5
  260. DB 12 ; 4.0
  261. DB 60 ; 4.5
  262. DB 60 ; 5.0
  263. DB 44 ; 5.5
  264. DB 44 ; 6.0
  265. DB 28 ; 6.5
  266. DB 28 ; 7.0
  267. DB 0 ; 7.5
  268. DB 0 ; 8.0
  269. DB 0 ; 8.5
  270. DB 0 ; 9.0
  271. DB 0 ; 9.5
  272. DB 0 ; 10.0
  273. DB 0 ; 10.5
  274. DB 0 ; 11.0
  275. ; Lower Upper Chroma
  276. DB 000H ; -22.0
  277. DB 000H ; -21.5
  278. DB 000H ; -21.0
  279. DB 000H ; -20.5
  280. DB 000H ; -20.0
  281. DB 000H ; -19.5
  282. DB 000H ; -19.0
  283. DB 000H ; -18.5
  284. DB 000H ; -18.0
  285. DB 000H ; -17.5
  286. DB 000H ; -17.0
  287. DB 000H ; -16.5
  288. DB 000H ; -16.0
  289. DB 000H ; -15.5
  290. DB 000H ; -15.0
  291. DB 000H ; -14.5
  292. DB 000H ; -22.0 -14.0
  293. DB 000H ; -21.5 -13.5
  294. DB 000H ; -21.0 -13.0
  295. DB 000H ; -20.5 -12.5
  296. DB 000H ; -20.0 -12.0
  297. DB 000H ; -19.5 -11.5
  298. DB 000H ; -19.0 -11.0
  299. DB 000H ; -18.5 -10.5
  300. DB 000H ; -18.0 -10.0
  301. DB 000H ; -17.5 -9.5
  302. DB 000H ; -17.0 -9.0
  303. DB 000H ; -16.5 -8.5
  304. DB 000H ; -16.0 -8.0
  305. DB 000H ; -15.5 -7.5
  306. DB 001H ; -15.0 -7.0
  307. DB 001H ; -14.5 -6.5
  308. DB 003H ; -14.0 -6.0
  309. DB 003H ; -13.5 -5.5
  310. DB 007H ; -13.0 -5.0
  311. DB 007H ; -12.5 -4.5
  312. DB 00FH ; -12.0 -4.0
  313. DB 00FH ; -11.5 -3.5
  314. DB 01FH ; -11.0 -3.0
  315. DB 01FH ; -10.5 -2.5
  316. DB 03FH ; -10.0 -2.0
  317. DB 03FH ; -9.5 -1.5
  318. DB 07FH ; -9.0 -1.0
  319. DB 07FH ; -8.5 -.5
  320. UpperYBlkLinesDef DB 0FFH ; -8.0 0
  321. DB 0FFH ; -7.5 .5
  322. DB 0FFH ; -7.0 1.0
  323. DB 0FFH ; -6.5 1.5
  324. DB 0FFH ; -6.0 2.0
  325. DB 0FFH ; -5.5 2.5
  326. DB 0FFH ; -5.0 3.0
  327. DB 0FFH ; -4.5 3.5
  328. DB 0FFH ; -4.0 4.0
  329. DB 0FFH ; -3.5 4.5
  330. DB 0FFH ; -3.0 5.0
  331. DB 0FFH ; -2.5 5.5
  332. DB 0FFH ; -2.0 6.0
  333. DB 0FFH ; -1.5 6.5
  334. DB 0FFH ; -1.0 7.0
  335. DB 0FFH ; -.5 7.5
  336. LowerYBlkLinesDef DB 0FFH ; 0 8.0
  337. DB 0FEH ; .5 8.5
  338. DB 0FEH ; 1.0 9.0
  339. DB 0FCH ; 1.5 9.5
  340. DB 0FCH ; 2.0 10.0
  341. DB 0F8H ; 2.5 10.5
  342. DB 0F8H ; 3.0 11.0
  343. DB 0F0H ; 3.5 11.5
  344. DB 0F0H ; 4.0 12.0
  345. DB 0E0H ; 4.5 12.5
  346. DB 0E0H ; 5.0 13.0
  347. DB 0C0H ; 5.5 13.5
  348. DB 0C0H ; 6.0 14.0
  349. DB 080H ; 6.5 14.5
  350. DB 080H ; 7.0 15.0
  351. DB 000H ; 7.5 15.5
  352. DB 000H ; 8.0 16.0
  353. DB 000H ; 8.5 16.5
  354. DB 000H ; 9.0 17.0
  355. DB 000H ; 9.5 17.5
  356. DB 000H ; 10.0 18.0
  357. DB 000H ; 10.5 18.5
  358. DB 000H ; 11.0 19.0
  359. DB 000H ; 11.5 19.5
  360. DB 000H ; 12.0 20.0
  361. DB 000H ; 12.5 20.5
  362. DB 000H ; 13.0 21.0
  363. DB 000H ; 13.5 21.5
  364. DB 000H ; 14.0 22.0
  365. DB 000H ; 14.5
  366. DB 000H ; 15.0
  367. DB 000H ; 15.5
  368. DB 000H ; 16.0
  369. DB 000H ; 16.5
  370. DB 000H ; 17.0
  371. DB 000H ; 17.5
  372. DB 000H ; 18.0
  373. DB 000H ; 18.5 -11.0
  374. DB 000H ; 19.0 -10.5
  375. DB 000H ; 19.5 -10.0
  376. DB 000H ; 20.0 -9.5
  377. DB 000H ; 20.5 -9.0
  378. DB 000H ; 21.0 -8.5
  379. DB 000H ; 21.5 -8.0
  380. DB 000H ; 22.0 -7.5
  381. DB 001H ; -7.0
  382. DB 001H ; -6.5
  383. DB 003H ; -6.0
  384. DB 003H ; -5.5
  385. DB 007H ; -5.0
  386. DB 007H ; -4.5
  387. DB 00FH ; -4.0
  388. DB 00FH ; -3.5
  389. DB 01FH ; -3.0
  390. DB 01FH ; -2.5
  391. DB 03FH ; -2.0
  392. DB 03FH ; -1.5
  393. DB 07FH ; -1.0
  394. DB 07FH ; -.5
  395. ChromaLinesDef DB 0FFH ; 0
  396. DB 0FEH ; .5
  397. DB 0FEH ; 1.0
  398. DB 0FCH ; 1.5
  399. DB 0FCH ; 2.0
  400. DB 0F8H ; 2.5
  401. DB 0F8H ; 3.0
  402. DB 0F0H ; 3.5
  403. DB 0F0H ; 4.0
  404. DB 0E0H ; 4.5
  405. DB 0E0H ; 5.0
  406. DB 0C0H ; 5.5
  407. DB 0C0H ; 6.0
  408. DB 080H ; 6.5
  409. DB 080H ; 7.0
  410. DB 000H ; 7.5
  411. DB 000H ; 8.0
  412. DB 000H ; 8.5
  413. DB 000H ; 9.0
  414. DB 000H ; 9.5
  415. DB 000H ; 10.0
  416. DB 000H ; 10.5
  417. DB 000H ; 11.0
  418. ENDIF
  419. .CODE
  420. ;ASSUME cs : FLAT
  421. ;ASSUME ds : FLAT
  422. ;ASSUME es : FLAT
  423. ;ASSUME fs : FLAT
  424. ;ASSUME gs : FLAT
  425. ;ASSUME ss : FLAT
  426. FORWARDDCT proc C AMBlockActionStream: DWORD,
  427. ATargetFrameBaseAddress: DWORD, APreviousFrameBaseAddress: DWORD,
  428. AFutureFrameBaseAddress: DWORD, ACoeffStream: DWORD, AIsBFrame: DWORD,
  429. AIsAdvancedPrediction: DWORD, AIsPOfPBPair: DWORD, AScratchBlocks: DWORD,
  430. ANumMBlksInGOB: DWORD
  431. LocalFrameSize = 196
  432. RegisterStorageSize = 16
  433. ; Arguments:
  434. MBlockActionStream = RegisterStorageSize + 4
  435. TargetFrameBaseAddress_arg = RegisterStorageSize + 8
  436. PreviousFrameBaseAddress_arg = RegisterStorageSize + 12
  437. FutureFrameBaseAddress_arg = RegisterStorageSize + 16
  438. CoeffStream_arg = RegisterStorageSize + 20
  439. IsBFrame = RegisterStorageSize + 24
  440. IsAdvancedPrediction = RegisterStorageSize + 28
  441. IsPOfPBPair = RegisterStorageSize + 32
  442. ScratchBlocks = RegisterStorageSize + 36
  443. NumMBlksInGOB = RegisterStorageSize + 40
  444. EndOfArgList = RegisterStorageSize + 44
  445. ; Locals (on local stack frame)
  446. P00 EQU [esp+ 8] ; Biased Pels or Biased Pel Differences
  447. P01 EQU [esp+ 9]
  448. P02 EQU [esp+ 10]
  449. P03 EQU [esp+ 11]
  450. P04 EQU [esp+ 12]
  451. P05 EQU [esp+ 13]
  452. P06 EQU [esp+ 14]
  453. P07 EQU [esp+ 15]
  454. P10 EQU [esp+ 16]
  455. P11 EQU [esp+ 17]
  456. P12 EQU [esp+ 18]
  457. P13 EQU [esp+ 19]
  458. P14 EQU [esp+ 20]
  459. P15 EQU [esp+ 21]
  460. P16 EQU [esp+ 22]
  461. P17 EQU [esp+ 23]
  462. P20 EQU [esp+ 24]
  463. P21 EQU [esp+ 25]
  464. P22 EQU [esp+ 26]
  465. P23 EQU [esp+ 27]
  466. P24 EQU [esp+ 28]
  467. P25 EQU [esp+ 29]
  468. P26 EQU [esp+ 30]
  469. P27 EQU [esp+ 31]
  470. P30 EQU [esp+ 32]
  471. P31 EQU [esp+ 33]
  472. P32 EQU [esp+ 34]
  473. P33 EQU [esp+ 35]
  474. P34 EQU [esp+ 36]
  475. P35 EQU [esp+ 37]
  476. P36 EQU [esp+ 38]
  477. P37 EQU [esp+ 39]
  478. P40 EQU [esp+ 40]
  479. P41 EQU [esp+ 41]
  480. P42 EQU [esp+ 42]
  481. P43 EQU [esp+ 43]
  482. P44 EQU [esp+ 44]
  483. P45 EQU [esp+ 45]
  484. P46 EQU [esp+ 46]
  485. P47 EQU [esp+ 47]
  486. P50 EQU [esp+ 48]
  487. P51 EQU [esp+ 49]
  488. P52 EQU [esp+ 50]
  489. P53 EQU [esp+ 51]
  490. P54 EQU [esp+ 52]
  491. P55 EQU [esp+ 53]
  492. P56 EQU [esp+ 54]
  493. P57 EQU [esp+ 55]
  494. P60 EQU [esp+ 56]
  495. P61 EQU [esp+ 57]
  496. P62 EQU [esp+ 58]
  497. P63 EQU [esp+ 59]
  498. P64 EQU [esp+ 60]
  499. P65 EQU [esp+ 61]
  500. P66 EQU [esp+ 62]
  501. P67 EQU [esp+ 63]
  502. P70 EQU [esp+ 64]
  503. P71 EQU [esp+ 65]
  504. P72 EQU [esp+ 66]
  505. P73 EQU [esp+ 67]
  506. P74 EQU [esp+ 68]
  507. P75 EQU [esp+ 69]
  508. P76 EQU [esp+ 70]
  509. P77 EQU [esp+ 71]
  510. I00I02 EQU P00 ; Intermed for row 0, columns 0 and 2.
  511. I01I03 EQU P04 ; Share storage with pels.
  512. I04I06 EQU [esp+ 72]
  513. Mask00 EQU [esp+ 72]
  514. I07I05 EQU [esp+ 76]
  515. Mask04 EQU [esp+ 76]
  516. I10I12 EQU P10
  517. I11I13 EQU P14
  518. I14I16 EQU [esp+ 80]
  519. Mask10 EQU [esp+ 80]
  520. I17I15 EQU [esp+ 84]
  521. Mask14 EQU [esp+ 84]
  522. I20I22 EQU P20
  523. I21I23 EQU P24
  524. I24I26 EQU [esp+ 88]
  525. Mask20 EQU [esp+ 88]
  526. I27I25 EQU [esp+ 92]
  527. Mask24 EQU [esp+ 92]
  528. I30I32 EQU P30
  529. I31I33 EQU P34
  530. I34I36 EQU [esp+ 96]
  531. Mask30 EQU [esp+ 96]
  532. I37I35 EQU [esp+100]
  533. Mask34 EQU [esp+100]
  534. I40I42 EQU P40
  535. I41I43 EQU P44
  536. I44I46 EQU [esp+104]
  537. Mask40 EQU [esp+104]
  538. I47I45 EQU [esp+108]
  539. Mask44 EQU [esp+108]
  540. I50I52 EQU P50
  541. I51I53 EQU P54
  542. I54I56 EQU [esp+112]
  543. Mask50 EQU [esp+112]
  544. I57I55 EQU [esp+116]
  545. Mask54 EQU [esp+116]
  546. I60I62 EQU P60
  547. I61I63 EQU P64
  548. I64I66 EQU [esp+120]
  549. Mask60 EQU [esp+120]
  550. I67I65 EQU [esp+124]
  551. Mask64 EQU [esp+124]
  552. I70I72 EQU P70
  553. I71I73 EQU P74
  554. I74I76 EQU [esp+128]
  555. Mask70 EQU [esp+128]
  556. I77I75 EQU [esp+132]
  557. Mask74 EQU [esp+132]
  558. S4 EQU I10I12 ; Temp storage, shared.
  559. S7 EQU I00I02 ; Temp storage, shared.
  560. S3 EQU I30I32 ; Temp storage, shared.
  561. S0 EQU I40I42 ; Temp storage, shared.
  562. CoeffStreamStart EQU [esp+ 0]
  563. CoeffStream EQU [esp+ 4]
  564. BlkActionDescrAddr EQU [esp+136]
  565. FutureFrameBaseAddress EQU [esp+140]
  566. DistFromTargetToPastP EQU [esp+144]
  567. TargetFrameBaseAddress EQU [esp+148]
  568. PredictionsBaseAddress EQU [esp+152]
  569. IsPlainPFrame EQU [esp+156]
  570. PreviousFrameBaseAddress EQU [esp+160]
  571. DistToBlockToLeft EQU [esp+164]
  572. DistToBlockAbove EQU [esp+168]
  573. DistToBlockToRight EQU [esp+172]
  574. DistToBlockBelow EQU [esp+176]
  575. DistFromBlk1ToBlk3Above EQU [esp+180]
  576. MBActionCursor EQU [esp+184]
  577. CentralRefAddrAndInterps EQU [esp+188]
  578. StashESP EQU [esp+192]
  579. push esi
  580. push edi
  581. push ebp
  582. push ebx
  583. mov ebx,esp
  584. sub esp,LocalFrameSize+4
  585. mov edi,[ebx+CoeffStream_arg] ; Get address of storage for coeffs.
  586. and esp,0FFFFFFC0H ; Get 64-byte aligned.
  587. xor ebp,ebp
  588. add esp,4 ; esp at cache line plus 4.
  589. mov esi,[ebx+MBlockActionStream] ; Get address of MB action stream.
  590. mov StashESP,ebx
  591. mov edx,[ebx+TargetFrameBaseAddress_arg]
  592. mov TargetFrameBaseAddress,edx
  593. mov eax,[ebx+PreviousFrameBaseAddress_arg]
  594. mov PreviousFrameBaseAddress,eax
  595. sub eax,edx
  596. mov ecx,[ebx+FutureFrameBaseAddress_arg]
  597. mov FutureFrameBaseAddress,ecx
  598. mov DistFromTargetToPastP,eax
  599. mov CoeffStreamStart,edi
  600. xor eax,eax
  601. xor ecx,ecx
  602. IFNDEF H261
  603. ;; H261 does not execute the OBMC code so it is included only when H261 is not defined
  604. ;;
  605. cmp ebp,[ebx+IsBFrame]
  606. mov edx,PITCH
  607. jne NextBMacroBlock
  608. cmp ebp,[ebx+IsAdvancedPrediction]
  609. je NextMacroBlock
  610. mov eax,[ebx+ScratchBlocks] ; We must do OBMC.
  611. mov ecx,[esi].BlkY1.BlkOffset
  612. sub eax,ecx
  613. mov ebp,[ebx+IsPOfPBPair]
  614. xor ebp,1
  615. mov PredictionsBaseAddress,eax
  616. mov IsPlainPFrame,ebp
  617. mov ebp,[ebx+NumMBlksInGOB]
  618. imul ebp,-SIZEOF T_MacroBlockActionDescr
  619. add ebp,2*SIZEOF T_Blk
  620. mov DistFromBlk1ToBlk3Above,ebp
  621. ;===============================================================================
  622. ;===============================================================================
  623. ; First pass builds block action stream from macroblock action stream.
  624. ;===============================================================================
  625. ;===============================================================================
  626. ; esi -- MacroBlockActionStream cursor
  627. ; edi -- BlockActionStream cursor
  628. ; edx -- Address of a block to do
  629. ; bl -- BlockType;
  630. ; MB edge condition: 1 off if left edge | 2: right | 4: top | 8: bottom
  631. ; eax -- Coded block pattern for P block;
  632. ; (Block_number - 1) * SIZEOF T_Blk
  633. NextMacroBlock_OBMC:
  634. mov bl,PB [esi].BlockType
  635. mov al,PB [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  636. ; Bit 4 set for non-empty U blk.
  637. ; Bit 5 set for non-empty V blk.
  638. ; Bit 6 clear except at stream end.
  639. ; Bit 7 clear. Unused.
  640. and bl,IsINTRA
  641. jne MBIsIntraCoded_OBMC
  642. lea edx,[esi].BlkY1+12 ; Addr of block addr (plus 12).
  643. test al,1 ; Check if block 1 empty.
  644. mov [edi].BlockAddr,edx ; Store address of block address.
  645. je Block1DescrBuilt
  646. mov al,[esi].MBEdgeType
  647. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  648. shl eax,31
  649. mov ecx,-SIZEOF T_MacroBlockActionDescr + SIZEOF T_Blk
  650. sar eax,31
  651. mov CoeffStream,edi ; Stash block descriptor ptr.
  652. and ecx,eax ; Blk to left is blk 2 of mb to the left, or off edge.
  653. mov al,[esi].MBEdgeType
  654. shl eax,29
  655. mov DistToBlockToLeft,ecx
  656. sar eax,31
  657. mov ecx,DistFromBlk1ToBlk3Above
  658. and ecx,eax ; Blk above is in macroblock above, or off upper edge.
  659. mov eax,SIZEOF T_Blk ; Blk to right is blk 2 of current macroblock.
  660. mov DistToBlockAbove,ecx
  661. mov ecx,2*SIZEOF T_Blk; Blk below is blk 3 of current macroblock.
  662. mov DistToBlockToRight,eax
  663. mov DistToBlockBelow,ecx
  664. mov ebp,T_MacroBlockActionDescr.BlkY1
  665. jmp BuildOBMCPrediction
  666. Block1DescrBuilt:
  667. test al,2 ; Check if block 2 empty.
  668. lea edx,[esi].BlkY2+12 ; Addr of block addr (plus 12).
  669. mov [edi].BlockAddr,edx ; Store address of block address.
  670. je Block2DescrBuilt
  671. mov al,[esi].MBEdgeType
  672. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  673. shl eax,30
  674. mov ecx,SIZEOF T_MacroBlockActionDescr - SIZEOF T_Blk
  675. sar eax,31
  676. mov CoeffStream,edi ; Stash block descriptor ptr.
  677. and ecx,eax ; Blk to right is blk 1 of mb to right, or off edge.
  678. mov al,[esi].MBEdgeType
  679. shl eax,29
  680. mov DistToBlockToRight,ecx
  681. sar eax,31
  682. mov ecx,DistFromBlk1ToBlk3Above
  683. and ecx,eax ; Blk above is in macroblock above, or off upper edge.
  684. mov eax,-SIZEOF T_Blk ; Blk to left is blk 1 of current macroblock.
  685. mov DistToBlockAbove,ecx
  686. mov ecx,2*SIZEOF T_Blk; Blk below is blk 4 of current macroblock.
  687. mov DistToBlockToLeft,eax
  688. mov DistToBlockBelow,ecx
  689. mov ebp,T_MacroBlockActionDescr.BlkY2
  690. jmp BuildOBMCPrediction
  691. Block1or2DescrBuilt:
  692. mov al,PB [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  693. mov edi,CoeffStream ; Restore block descriptor ptr.
  694. jl Block1DescrBuilt
  695. Block2DescrBuilt:
  696. test al,4 ; Check if block 3 empty.
  697. lea edx,[esi].BlkY3+12 ; Addr of block addr (plus 12).
  698. mov [edi].BlockAddr,edx ; Store address of block address.
  699. je Block3DescrBuilt
  700. mov al,[esi].MBEdgeType
  701. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  702. shl eax,31
  703. mov ecx,-SIZEOF T_MacroBlockActionDescr + SIZEOF T_Blk
  704. sar eax,31
  705. mov CoeffStream,edi ; Stash block descriptor ptr.
  706. and eax,ecx ; Blk to left is blk 4 of mb to the left, or off edge.
  707. mov ecx,-2*SIZEOF T_Blk ; Blk above is blk 1 of current mb.
  708. mov DistToBlockToLeft,eax
  709. mov eax,SIZEOF T_Blk ; Blk to right is blk 4 of current macroblock.
  710. mov DistToBlockAbove,ecx
  711. xor ecx,ecx ; Blk below is current block.
  712. mov DistToBlockToRight,eax
  713. mov DistToBlockBelow,ecx
  714. mov ebp,T_MacroBlockActionDescr.BlkY3
  715. jmp BuildOBMCPrediction
  716. Block3DescrBuilt:
  717. test al,8 ; Check if block 4 empty.
  718. lea edx,[esi].BlkY4+12 ; Addr of block addr (plus 12).
  719. mov [edi].BlockAddr,edx ; Store address of block address.
  720. je Block4DescrBuilt
  721. mov al,[esi].MBEdgeType
  722. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  723. shl eax,30
  724. mov ecx,SIZEOF T_MacroBlockActionDescr - SIZEOF T_Blk
  725. sar eax,31
  726. mov CoeffStream,edi ; Stash block descriptor ptr.
  727. and eax,ecx ; Blk to right is blk 3 of mb to right, or off edge.
  728. mov ecx,-2*SIZEOF T_Blk ; Blk above is blk 2 of current mb.
  729. mov DistToBlockToRight,eax
  730. mov eax,-SIZEOF T_Blk ; Blk to left is blk 3 of current macroblock.
  731. mov DistToBlockAbove,ecx
  732. xor ecx,ecx ; Blk below is current block.
  733. mov DistToBlockToLeft,eax
  734. mov DistToBlockBelow,ecx
  735. mov ebp,T_MacroBlockActionDescr.BlkY4
  736. BuildOBMCPrediction:
  737. ; esi -- MacroBlockActionStream cursor
  738. ; ebp -- T_MacroBlockActionDescr.BlkYN
  739. ; edi -- Address at which to put prediction block
  740. mov edi,PredictionsBaseAddress
  741. mov eax,[esi+ebp*1].T_Blk.BlkOffset; BlkOffset
  742. add edi,eax ; Compute addr at which to put OBMC pred.
  743. mov eax,[esi+ebp*1].T_Blk.MVs ; al = horz MV; ah = vert MV.
  744. test eax,1
  745. mov edx,[esi+ebp*1].T_Blk.PastRef ; Fetch address for ref block.
  746. mov MBActionCursor,esi
  747. jne HorzInterpInCentralPred
  748. mov [esi+ebp*1].T_Blk.PastRef,edi ; Update address for ref block.
  749. test eax,0100H
  750. mov ecx,PITCH
  751. jne VertInterpInCentralPred
  752. ; No half pel interpolation for central point required. Just copy it.
  753. @@:
  754. mov eax,[edx+0]
  755. mov ebx,[edx+4]
  756. mov [edi+ 0],eax
  757. mov [edi+ 4],ebx
  758. mov [edi+ 8],eax
  759. mov [edi+12],ebx
  760. mov [edi+28],eax
  761. mov [edi+32],ebx
  762. add edx,PITCH
  763. add edi,PITCH
  764. add ebp,020000000H
  765. jnc @b
  766. sub edi,PITCH*8
  767. sub edx,PITCH*8-080000000H ; Address of ref, xor 10 in high 2 bits.
  768. jmp CentralPredGottenForOBMC
  769. HorzInterpInCentralPred:
  770. mov [esi+ebp*1].T_Blk.PastRef,edi ; Update address for ref block.
  771. test eax,0100H
  772. mov ecx,1
  773. jne BothInterpInCentralPred
  774. VertInterpInCentralPred:
  775. @@:
  776. mov eax,[edx+0]
  777. mov ebx,[edx+4]
  778. add eax,[edx+ecx+0]
  779. add ebx,[edx+ecx+4]
  780. add eax,001010101H
  781. add ebx,001010101H
  782. shr eax,1
  783. and ebx,0FEFEFEFEH
  784. shr ebx,1
  785. and eax,07F7F7F7FH
  786. mov [edi+ 0],eax
  787. mov [edi+ 4],ebx
  788. mov [edi+ 8],eax
  789. mov [edi+12],ebx
  790. mov [edi+28],eax
  791. mov [edi+32],ebx
  792. add edx,PITCH
  793. add edi,PITCH
  794. add ebp,020000000H
  795. jnc @b
  796. sub edi,PITCH*8
  797. sub edx,PITCH*8
  798. shl ecx,30
  799. xor edx,ecx ; Address of ref, xor 00 in high 2 bits if vertically
  800. ; ; interpolated; xor 01 if horizontally interpolated.
  801. jmp CentralPredGottenForOBMC
  802. BothInterpInCentralPred:
  803. @@:
  804. mov eax,[edx+1] ; <P04 P03 P02 P01> prediction pels.
  805. mov esi,001010101H ; Get 001010101H mask.
  806. mov ebx,[edx] ; <P03 P02 P01 P00>.
  807. add edi,4 ; Pre-increment OBMC prediction block pointer.
  808. mov ecx,[edx+PITCH+1] ; <P14 P13 P12 P11>.
  809. add eax,ebx ; <P04+P03 P03+P02 P02+P01 P01+P00>.
  810. mov ebx,[edx+PITCH] ; <P13 P12 P11 P10>.
  811. and esi,eax ; <(P04+P03)&1 ...>.
  812. shr eax,1 ; <(P04+P03)/2 ...> (dirty).
  813. add ebx,ecx ; <P14+P13 P13+P12 P12+P11 P11+P10>.
  814. and eax,07F7F7F7FH ; <(P04+P03)/2 ...> (clean).
  815. add ebx,esi ; <P14+P13+((P04+P03)&1) ...>.
  816. shr ebx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  817. add edx,4 ; Advance reference block pointer.
  818. and ebx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  819. add eax,001010101H ; <(P04+P03)/2+1 ...>.
  820. add ebx,eax ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  821. mov eax,4
  822. shr ebx,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  823. mov esi,MBActionCursor ; Speculatively restore esi.
  824. and ebx,07F7F7F7FH ; Interpolated prediction.
  825. and eax,edi
  826. mov [edi-4],ebx
  827. mov [edi+8-4],ebx
  828. mov [edi+28-4],ebx
  829. jne @b
  830. add edi,PITCH-8 ; Advance to next line of block.
  831. add edx,PITCH-8 ; Advance to next line of block.
  832. add ebp,020000000H ; Iterate 8 times. Quit when carry flag gets set.
  833. jnc @b
  834. sub edx,PITCH*8
  835. xor edx,0C0000000H ; Address of ref, xor 11 in high 2 bits.
  836. sub edi,PITCH*8
  837. CentralPredGottenForOBMC:
  838. ; At this point, the central contribution to OBMC prediction is in its scratch
  839. ; block, whose address has been written to PastRef in the block action descr.
  840. ;
  841. ; esi -- MacroBlockActionStream cursor
  842. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  843. ; edi -- Address at which to put prediction block
  844. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  845. ; 00 -- If central ref was interpolated vertically.
  846. ; 01 -- If central ref was interpolated horizontally.
  847. ; 10 -- If central ref was not interpolated.
  848. ; 11 -- If central ref was interpolated both ways.
  849. ; eax -- Offset to block descriptor for block to left.
  850. mov eax,DistToBlockToLeft
  851. lea ebx,[esi+ebp]
  852. add ebx,eax ; Address of block descriptor for block to the left.
  853. mov ecx,-SIZEOF T_MacroBlockActionDescr
  854. and ecx,ebx ; Address of macroblock descr for block to the left.
  855. mov ah,IsPlainPFrame ; 0 if P of PB; 1 if run-of-the-mill P frame.
  856. mov ebx,[ebx].T_Blk.MVs
  857. mov CentralRefAddrAndInterps,edx ; Stash function of ref addr and interps.
  858. mov al,[ecx].BlockType ; Bottom bit set if left neighbor is INTRA.
  859. mov cl,bh
  860. and al,ah ; 0 if PB frame or if not INTRA
  861. jne LeftPredGottenForOBMC ; Jump if INTRA in plain P frame. (Use central)
  862. shl ebx,24 ; Get horz MV in [24:31].
  863. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  864. sar ecx,1 ; CF==1 if interp vertically.
  865. jc InterpVertForTheLeftContrib
  866. shl ecx,25
  867. sar ebx,25 ; Sign extend horz MV. CF==1 if interp horizontally.
  868. jc InterpHorzForTheLeftContrib
  869. IF PITCH-384
  870. **** Magic leaks out if pitch not equal to 384
  871. ENDIF
  872. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  873. add eax,ebx ; Start accumulating left ref addr in eax.
  874. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  875. mov ebx,PreviousFrameBaseAddress
  876. add eax,ebx ; Continue to accumulate left ref addr in eax.
  877. xor edx,080000000H ; Hi 2 bits of central ref same as this ref if
  878. ; ; central ref also was not interpolated.
  879. add ecx,eax ; Finish accumulating left ref addr in ecx.
  880. cmp ecx,edx ; Is central ref the same?
  881. je LeftPredGottenForOBMC
  882. mov ebx,[ecx+PITCH*0]
  883. mov [edi+PITCH*0+8],ebx
  884. mov ebx,[ecx+PITCH*1]
  885. mov [edi+PITCH*1+8],ebx
  886. mov ebx,[ecx+PITCH*2]
  887. mov [edi+PITCH*2+8],ebx
  888. mov ebx,[ecx+PITCH*3]
  889. mov [edi+PITCH*3+8],ebx
  890. mov ebx,[ecx+PITCH*4]
  891. mov [edi+PITCH*4+8],ebx
  892. mov ebx,[ecx+PITCH*5]
  893. mov [edi+PITCH*5+8],ebx
  894. mov ebx,[ecx+PITCH*6]
  895. mov [edi+PITCH*6+8],ebx
  896. mov ebx,[ecx+PITCH*7]
  897. mov [edi+PITCH*7+8],ebx
  898. jmp LeftPredGottenForOBMC
  899. InterpVertForTheLeftContrib:
  900. shl ecx,25
  901. sar ebx,25 ; Sign extend horz MV. CF==1 if interp horizontally.
  902. jc InterpBothForTheLeftContrib
  903. IF PITCH-384
  904. **** Magic leaks out if pitch not equal to 384
  905. ENDIF
  906. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  907. add eax,ebx ; Start accumulating left ref addr in eax.
  908. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  909. mov ebx,PreviousFrameBaseAddress
  910. add ebx,eax ; Continue to accumulate left ref addr in eax.
  911. ; ; Hi 2 bits of central ref same as this ref if
  912. ; ; central ref also interpolated vertically.
  913. add ecx,ebx ; Finish accumulating left ref addr in ecx.
  914. mov ebx,PITCH
  915. cmp ecx,edx ; Is central ref the same?
  916. je LeftPredGottenForOBMC
  917. DoInterpHorzForTheLeftContrib:
  918. @@:
  919. mov eax,[ecx+0]
  920. add edi,PITCH
  921. mov edx,[ecx+ebx+0]
  922. add eax,001010101H
  923. add eax,edx
  924. add ecx,PITCH
  925. shr eax,1
  926. ;
  927. and eax,07F7F7F7FH
  928. add ebp,020000000H
  929. mov [edi+ 8-PITCH],eax
  930. jnc @b
  931. sub edi,PITCH*8
  932. jmp LeftPredGottenForOBMC
  933. InterpBothForTheLeftContrib:
  934. IF PITCH-384
  935. **** Magic leaks out if pitch not equal to 384
  936. ENDIF
  937. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  938. add eax,ebx ; Start accumulating left ref addr in eax.
  939. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  940. mov ebx,PreviousFrameBaseAddress
  941. add eax,ebx ; Continue to accumulate left ref addr in eax.
  942. xor edx,0C0000000H ; Hi 2 bits of central ref same as this ref if
  943. ; ; central ref also interpolated both ways.
  944. add ecx,eax ; Finish accumulating left ref addr in ecx.
  945. cmp ecx,edx ; Is central ref the same?
  946. je LeftPredGottenForOBMC
  947. @@:
  948. mov eax,[ecx+1] ; <P04 P03 P02 P01> prediction pels.
  949. mov esi,001010101H ; Get 001010101H mask.
  950. mov ebx,[ecx] ; <P03 P02 P01 P00>.
  951. add edi,PITCH ; Pre-increment OBMC prediction block pointer.
  952. mov edx,[ecx+PITCH+1] ; <P14 P13 P12 P11>.
  953. add eax,ebx ; <P04+P03 P03+P02 P02+P01 P01+P00>.
  954. mov ebx,[ecx+PITCH] ; <P13 P12 P11 P10>.
  955. and esi,eax ; <(P04+P03)&1 ...>.
  956. shr eax,1 ; <(P04+P03)/2 ...> (dirty).
  957. add ebx,edx ; <P14+P13 P13+P12 P12+P11 P11+P10>.
  958. and eax,07F7F7F7FH ; <(P04+P03)/2 ...> (clean).
  959. add ebx,esi ; <P14+P13+((P04+P03)&1) ...>.
  960. shr ebx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  961. add ecx,PITCH ; Advance reference block pointer.
  962. and ebx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  963. add eax,001010101H ; <(P04+P03)/2+1 ...>.
  964. add ebx,eax ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  965. shr ebx,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  966. mov esi,MBActionCursor ; Speculatively restore esi.
  967. and ebx,07F7F7F7FH ; Interpolated prediction.
  968. add ebp,020000000H ; Iterate 8 times. Quit when carry flag gets set.
  969. mov [edi+8-PITCH],ebx
  970. jnc @b
  971. sub edi,PITCH*8
  972. jmp LeftPredGottenForOBMC
  973. InterpHorzForTheLeftContrib:
  974. IF PITCH-384
  975. **** Magic leaks out if pitch not equal to 384
  976. ENDIF
  977. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  978. add eax,ebx ; Start accumulating left ref addr in eax.
  979. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  980. mov ebx,PreviousFrameBaseAddress
  981. add eax,ebx ; Continue to accumulate left ref addr in eax.
  982. xor edx,040000000H ; Hi 2 bits of central ref same as this ref if
  983. ; ; central ref also interpolated horizontally.
  984. add ecx,eax ; Finish accumulating left ref addr in ecx.
  985. mov ebx,1
  986. cmp ecx,edx ; Is central ref the same?
  987. jne DoInterpHorzForTheLeftContrib
  988. LeftPredGottenForOBMC:
  989. ; At this point, the left contribution to OBMC prediction is in its scratch
  990. ; half block. Now do the right contribution.
  991. ;
  992. ; esi -- MacroBlockActionStream cursor
  993. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  994. ; edi -- Address at which to put prediction block
  995. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  996. ; 00 -- If central ref was interpolated vertically.
  997. ; 01 -- If central ref was interpolated horizontally.
  998. ; 10 -- If central ref was not interpolated.
  999. ; 11 -- If central ref was interpolated both ways.
  1000. ; eax -- Offset to block descriptor for block to right.
  1001. mov eax,DistToBlockToRight
  1002. lea ebx,[esi+ebp]
  1003. add ebx,eax
  1004. mov ecx,-SIZEOF T_MacroBlockActionDescr
  1005. and ecx,ebx
  1006. mov ah,IsPlainPFrame
  1007. mov ebx,[ebx].T_Blk.MVs
  1008. mov edx,CentralRefAddrAndInterps ; Reload function of ref addr and interps.
  1009. mov al,[ecx].BlockType
  1010. mov cl,bh
  1011. and al,ah
  1012. jne RightPredGottenForOBMC
  1013. shl ebx,24
  1014. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  1015. sar ecx,1
  1016. jc InterpVertForTheRightContrib
  1017. shl ecx,25
  1018. sar ebx,25
  1019. jc InterpHorzForTheRightContrib
  1020. IF PITCH-384
  1021. **** Magic leaks out if pitch not equal to 384
  1022. ENDIF
  1023. lea ecx,[ecx+ecx*2]
  1024. add eax,ebx
  1025. sar ecx,18
  1026. mov ebx,PreviousFrameBaseAddress
  1027. add eax,ebx
  1028. xor edx,080000000H
  1029. add ecx,eax
  1030. cmp ecx,edx
  1031. je RightPredGottenForOBMC
  1032. mov ebx,[ecx+PITCH*0+4]
  1033. mov [edi+PITCH*0+12],ebx
  1034. mov ebx,[ecx+PITCH*1+4]
  1035. mov [edi+PITCH*1+12],ebx
  1036. mov ebx,[ecx+PITCH*2+4]
  1037. mov [edi+PITCH*2+12],ebx
  1038. mov ebx,[ecx+PITCH*3+4]
  1039. mov [edi+PITCH*3+12],ebx
  1040. mov ebx,[ecx+PITCH*4+4]
  1041. mov [edi+PITCH*4+12],ebx
  1042. mov ebx,[ecx+PITCH*5+4]
  1043. mov [edi+PITCH*5+12],ebx
  1044. mov ebx,[ecx+PITCH*6+4]
  1045. mov [edi+PITCH*6+12],ebx
  1046. mov ebx,[ecx+PITCH*7+4]
  1047. mov [edi+PITCH*7+12],ebx
  1048. jmp RightPredGottenForOBMC
  1049. InterpVertForTheRightContrib:
  1050. shl ecx,25
  1051. sar ebx,25
  1052. jc InterpBothForTheRightContrib
  1053. IF PITCH-384
  1054. **** Magic leaks out if pitch not equal to 384
  1055. ENDIF
  1056. lea ecx,[ecx+ecx*2]
  1057. add eax,ebx
  1058. sar ecx,18
  1059. mov ebx,PreviousFrameBaseAddress
  1060. add ebx,eax
  1061. add ecx,ebx
  1062. mov ebx,PITCH
  1063. cmp ecx,edx
  1064. je RightPredGottenForOBMC
  1065. DoInterpHorzForTheRightContrib:
  1066. @@:
  1067. mov eax,[ecx+4]
  1068. add edi,PITCH
  1069. mov edx,[ecx+ebx+4]
  1070. add eax,001010101H
  1071. add eax,edx
  1072. add ecx,PITCH
  1073. shr eax,1
  1074. ;
  1075. and eax,07F7F7F7FH
  1076. add ebp,020000000H
  1077. mov [edi+12-PITCH],eax
  1078. jnc @b
  1079. sub edi,PITCH*8
  1080. jmp RightPredGottenForOBMC
  1081. InterpBothForTheRightContrib:
  1082. IF PITCH-384
  1083. **** Magic leaks out if pitch not equal to 384
  1084. ENDIF
  1085. lea ecx,[ecx+ecx*2]
  1086. add eax,ebx
  1087. sar ecx,18
  1088. mov ebx,PreviousFrameBaseAddress
  1089. add eax,ebx
  1090. xor edx,0C0000000H
  1091. add ecx,eax
  1092. cmp ecx,edx
  1093. je RightPredGottenForOBMC
  1094. @@:
  1095. mov eax,[ecx+5]
  1096. mov esi,001010101H
  1097. mov ebx,[ecx+4]
  1098. add edi,PITCH
  1099. mov edx,[ecx+PITCH+5]
  1100. add eax,ebx
  1101. mov ebx,[ecx+PITCH+4]
  1102. and esi,eax
  1103. shr eax,1
  1104. add ebx,edx
  1105. and eax,07F7F7F7FH
  1106. add ebx,esi
  1107. shr ebx,1
  1108. add ecx,PITCH
  1109. and ebx,07F7F7F7FH
  1110. add eax,001010101H
  1111. add ebx,eax
  1112. shr ebx,1
  1113. mov esi,MBActionCursor
  1114. and ebx,07F7F7F7FH
  1115. add ebp,020000000H
  1116. mov [edi+12-PITCH],ebx
  1117. jnc @b
  1118. sub edi,PITCH*8
  1119. jmp RightPredGottenForOBMC
  1120. InterpHorzForTheRightContrib:
  1121. IF PITCH-384
  1122. **** Magic leaks out if pitch not equal to 384
  1123. ENDIF
  1124. lea ecx,[ecx+ecx*2]
  1125. add eax,ebx
  1126. sar ecx,18
  1127. mov ebx,PreviousFrameBaseAddress
  1128. add eax,ebx
  1129. xor edx,040000000H
  1130. add ecx,eax
  1131. mov ebx,1
  1132. cmp ecx,edx
  1133. jne DoInterpHorzForTheRightContrib
  1134. RightPredGottenForOBMC:
  1135. ; At this point, the left and right contributions to OBMC prediction are in
  1136. ; their scratch half blocks. Now do the contribution for the block above.
  1137. ;
  1138. ; esi -- MacroBlockActionStream cursor
  1139. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  1140. ; edi -- Address at which to put prediction block
  1141. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  1142. ; 00 -- If central ref was interpolated vertically.
  1143. ; 01 -- If central ref was interpolated horizontally.
  1144. ; 10 -- If central ref was not interpolated.
  1145. ; 11 -- If central ref was interpolated both ways.
  1146. ; eax -- Offset to block descriptor for block above.
  1147. mov eax,DistToBlockAbove
  1148. lea ebx,[esi+ebp]
  1149. add ebx,eax
  1150. mov ecx,-SIZEOF T_MacroBlockActionDescr
  1151. and ecx,ebx
  1152. mov ah,IsPlainPFrame
  1153. mov ebx,[ebx].T_Blk.MVs
  1154. mov edx,CentralRefAddrAndInterps
  1155. mov al,[ecx].BlockType
  1156. mov cl,bh
  1157. and al,ah
  1158. jne AbovePredGottenForOBMC
  1159. shl ebx,24
  1160. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  1161. sar ecx,1
  1162. jc InterpVertForTheAboveContrib
  1163. shl ecx,25
  1164. sar ebx,25
  1165. jc InterpHorzForTheAboveContrib
  1166. IF PITCH-384
  1167. **** Magic leaks out if pitch not equal to 384
  1168. ENDIF
  1169. lea ecx,[ecx+ecx*2]
  1170. add eax,ebx
  1171. sar ecx,18
  1172. mov ebx,PreviousFrameBaseAddress
  1173. add eax,ebx
  1174. xor edx,080000000H
  1175. add ecx,eax
  1176. cmp ecx,edx
  1177. je AbovePredGottenForOBMC
  1178. mov edx,[ecx+PITCH*0+0]
  1179. mov ebx,[ecx+PITCH*0+4]
  1180. mov [edi+PITCH*0+28],edx
  1181. mov [edi+PITCH*0+32],ebx
  1182. mov edx,[ecx+PITCH*1+0]
  1183. mov ebx,[ecx+PITCH*1+4]
  1184. mov [edi+PITCH*1+32],ebx
  1185. mov [edi+PITCH*1+28],edx
  1186. mov edx,[ecx+PITCH*2+0]
  1187. mov ebx,[ecx+PITCH*2+4]
  1188. mov [edi+PITCH*2+28],edx
  1189. mov [edi+PITCH*2+32],ebx
  1190. mov edx,[ecx+PITCH*3+0]
  1191. mov ebx,[ecx+PITCH*3+4]
  1192. mov [edi+PITCH*3+32],ebx
  1193. mov [edi+PITCH*3+28],edx
  1194. jmp AbovePredGottenForOBMC
  1195. InterpVertForTheAboveContrib:
  1196. shl ecx,25
  1197. sar ebx,25
  1198. jc InterpBothForTheAboveContrib
  1199. IF PITCH-384
  1200. **** Magic leaks out if pitch not equal to 384
  1201. ENDIF
  1202. lea ecx,[ecx+ecx*2]
  1203. add eax,ebx
  1204. sar ecx,18
  1205. mov ebx,PreviousFrameBaseAddress
  1206. add ebx,eax
  1207. add ecx,ebx
  1208. mov ebx,PITCH
  1209. cmp ecx,edx
  1210. je AbovePredGottenForOBMC
  1211. DoInterpHorzForTheAboveContrib:
  1212. @@:
  1213. mov eax,[ecx+0]
  1214. mov edx,[ecx+4]
  1215. add eax,[ecx+ebx+0]
  1216. add edx,[ecx+ebx+4]
  1217. add eax,001010101H
  1218. add edx,001010101H
  1219. shr eax,1
  1220. and edx,0FEFEFEFEH
  1221. shr edx,1
  1222. and eax,07F7F7F7FH
  1223. mov [edi+28],eax
  1224. mov [edi+32],edx
  1225. add ecx,PITCH
  1226. add edi,PITCH
  1227. add ebp,040000000H
  1228. jnc @b
  1229. sub edi,PITCH*4
  1230. jmp AbovePredGottenForOBMC
  1231. InterpBothForTheAboveContrib:
  1232. IF PITCH-384
  1233. **** Magic leaks out if pitch not equal to 384
  1234. ENDIF
  1235. lea ecx,[ecx+ecx*2]
  1236. add eax,ebx
  1237. sar ecx,18
  1238. mov ebx,PreviousFrameBaseAddress
  1239. add eax,ebx
  1240. xor edx,0C0000000H
  1241. add ecx,eax
  1242. cmp ecx,edx
  1243. je AbovePredGottenForOBMC
  1244. @@:
  1245. mov eax,[ecx+1]
  1246. mov esi,001010101H
  1247. mov ebx,[ecx]
  1248. add edi,4
  1249. mov edx,[ecx+PITCH+1]
  1250. add eax,ebx
  1251. mov ebx,[ecx+PITCH]
  1252. and esi,eax
  1253. shr eax,1
  1254. add ebx,edx
  1255. and eax,07F7F7F7FH
  1256. add ebx,esi
  1257. shr ebx,1
  1258. add ecx,4
  1259. and ebx,07F7F7F7FH
  1260. add eax,001010101H
  1261. add ebx,eax
  1262. mov eax,4
  1263. shr ebx,1
  1264. mov esi,MBActionCursor
  1265. and ebx,07F7F7F7FH
  1266. and eax,edi
  1267. mov [edi+28-4],ebx
  1268. jne @b
  1269. add edi,PITCH-8
  1270. add ecx,PITCH-8
  1271. add ebp,040000000H
  1272. jnc @b
  1273. sub edi,PITCH*4
  1274. jmp AbovePredGottenForOBMC
  1275. InterpHorzForTheAboveContrib:
  1276. IF PITCH-384
  1277. **** Magic leaks out if pitch not equal to 384
  1278. ENDIF
  1279. lea ecx,[ecx+ecx*2]
  1280. add eax,ebx
  1281. sar ecx,18
  1282. mov ebx,PreviousFrameBaseAddress
  1283. add eax,ebx
  1284. xor edx,040000000H
  1285. add ecx,eax
  1286. mov ebx,1
  1287. cmp ecx,edx
  1288. jne DoInterpHorzForTheAboveContrib
  1289. AbovePredGottenForOBMC:
  1290. ; At this point, the left, right, and above contributions to OBMC prediction
  1291. ; are in their scratch half blocks. Now do contribution for the block below.
  1292. ;
  1293. ; esi -- MacroBlockActionStream cursor
  1294. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  1295. ; edi -- Address at which to put prediction block
  1296. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  1297. ; 00 -- If central ref was interpolated vertically.
  1298. ; 01 -- If central ref was interpolated horizontally.
  1299. ; 10 -- If central ref was not interpolated.
  1300. ; 11 -- If central ref was interpolated both ways.
  1301. ; eax -- Offset to block descriptor for block above.
  1302. mov eax,DistToBlockBelow
  1303. lea ebx,[esi+ebp]
  1304. add ebx,eax
  1305. mov ecx,-SIZEOF T_MacroBlockActionDescr
  1306. and ecx,ebx
  1307. mov ah,IsPlainPFrame
  1308. mov ebx,[ebx].T_Blk.MVs
  1309. mov edx,CentralRefAddrAndInterps
  1310. mov al,[ecx].BlockType
  1311. mov cl,bh
  1312. and al,ah
  1313. jne BelowPredGottenForOBMC
  1314. shl ebx,24
  1315. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  1316. sar ecx,1
  1317. jc InterpVertForTheBelowContrib
  1318. shl ecx,25
  1319. sar ebx,25
  1320. jc InterpHorzForTheBelowContrib
  1321. IF PITCH-384
  1322. **** Magic leaks out if pitch not equal to 384
  1323. ENDIF
  1324. lea ecx,[ecx+ecx*2]
  1325. add eax,ebx
  1326. sar ecx,18
  1327. mov ebx,PreviousFrameBaseAddress
  1328. add eax,ebx
  1329. xor edx,080000000H
  1330. add ecx,eax
  1331. cmp ecx,edx
  1332. je BelowPredGottenForOBMC
  1333. mov edx,[ecx+PITCH*4+0]
  1334. mov ebx,[ecx+PITCH*4+4]
  1335. mov [edi+PITCH*4+28],edx
  1336. mov [edi+PITCH*4+32],ebx
  1337. mov edx,[ecx+PITCH*5+0]
  1338. mov ebx,[ecx+PITCH*5+4]
  1339. mov [edi+PITCH*5+32],ebx
  1340. mov [edi+PITCH*5+28],edx
  1341. mov edx,[ecx+PITCH*6+0]
  1342. mov ebx,[ecx+PITCH*6+4]
  1343. mov [edi+PITCH*6+28],edx
  1344. mov [edi+PITCH*6+32],ebx
  1345. mov edx,[ecx+PITCH*7+0]
  1346. mov ebx,[ecx+PITCH*7+4]
  1347. mov [edi+PITCH*7+32],ebx
  1348. mov [edi+PITCH*7+28],edx
  1349. jmp BelowPredGottenForOBMC
  1350. InterpVertForTheBelowContrib:
  1351. shl ecx,25
  1352. sar ebx,25
  1353. jc InterpBothForTheBelowContrib
  1354. IF PITCH-384
  1355. **** Magic leaks out if pitch not equal to 384
  1356. ENDIF
  1357. lea ecx,[ecx+ecx*2]
  1358. add eax,ebx
  1359. sar ecx,18
  1360. mov ebx,PreviousFrameBaseAddress
  1361. add eax,ebx
  1362. add ecx,eax
  1363. mov ebx,PITCH
  1364. cmp ecx,edx
  1365. je BelowPredGottenForOBMC
  1366. DoInterpHorzForTheBelowContrib:
  1367. @@:
  1368. mov eax,[ecx+PITCH*4+0]
  1369. mov edx,[ecx+PITCH*4+4]
  1370. add eax,[ecx+ebx+PITCH*4+0]
  1371. add edx,[ecx+ebx+PITCH*4+4]
  1372. add eax,001010101H
  1373. add edx,001010101H
  1374. shr eax,1
  1375. and edx,0FEFEFEFEH
  1376. shr edx,1
  1377. and eax,07F7F7F7FH
  1378. mov [edi+PITCH*4+28],eax
  1379. mov [edi+PITCH*4+32],edx
  1380. add ecx,PITCH
  1381. add edi,PITCH
  1382. add ebp,040000000H
  1383. jnc @b
  1384. sub edi,PITCH*4
  1385. jmp BelowPredGottenForOBMC
  1386. InterpBothForTheBelowContrib:
  1387. IF PITCH-384
  1388. **** Magic leaks out if pitch not equal to 384
  1389. ENDIF
  1390. lea ecx,[ecx+ecx*2]
  1391. add eax,ebx
  1392. sar ecx,18
  1393. mov ebx,PreviousFrameBaseAddress
  1394. add eax,ebx
  1395. xor edx,0C0000000H
  1396. add ecx,eax
  1397. cmp ecx,edx
  1398. je BelowPredGottenForOBMC
  1399. @@:
  1400. mov eax,[ecx+PITCH*4+1]
  1401. mov esi,001010101H
  1402. mov ebx,[ecx+PITCH*4]
  1403. add edi,4
  1404. mov edx,[ecx+PITCH*5+1]
  1405. add eax,ebx
  1406. mov ebx,[ecx+PITCH*5]
  1407. and esi,eax
  1408. shr eax,1
  1409. add ebx,edx
  1410. and eax,07F7F7F7FH
  1411. add ebx,esi
  1412. shr ebx,1
  1413. add ecx,4
  1414. and ebx,07F7F7F7FH
  1415. add eax,001010101H
  1416. add ebx,eax
  1417. mov eax,4
  1418. shr ebx,1
  1419. mov esi,MBActionCursor
  1420. and ebx,07F7F7F7FH
  1421. and eax,edi
  1422. mov [edi+PITCH*4+28-4],ebx
  1423. jne @b
  1424. add edi,PITCH-8
  1425. add ecx,PITCH-8
  1426. add ebp,040000000H
  1427. jnc @b
  1428. sub edi,PITCH*4
  1429. jmp BelowPredGottenForOBMC
  1430. InterpHorzForTheBelowContrib:
  1431. IF PITCH-384
  1432. **** Magic leaks out if pitch not equal to 384
  1433. ENDIF
  1434. lea ecx,[ecx+ecx*2]
  1435. add eax,ebx
  1436. sar ecx,18
  1437. mov ebx,PreviousFrameBaseAddress
  1438. add eax,ebx
  1439. xor edx,040000000H
  1440. add ecx,eax
  1441. mov ebx,1
  1442. cmp ecx,edx
  1443. jne DoInterpHorzForTheBelowContrib
  1444. BelowPredGottenForOBMC:
  1445. ; At this point all the contributions to OBMC prediction are in their scratch
  1446. ; half blocks. Now combine them to get the OBMC prediction.
  1447. ;
  1448. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  1449. ; edi -- Address at which to put prediction block
  1450. @@:
  1451. mov eax,[edi+4] ; <C07 C05 C05 C04> or <C77 C76 C75 C74>
  1452. mov ebx,[edi+12] ; <R07 R06 R05 R04> or <R77 R76 R75 R74>
  1453. mov ecx,[edi+32] ; <A07 A06 A05 A04> or <B77 B76 B75 B74>
  1454. mov esi,[edi] ; <C03 C02 C01 C00> or <C73 C72 C71 C70>
  1455. lea edx,[eax+ebx] ; <junk C6+R6 C5+R5 C4+R4>
  1456. and ebx,0FF000000H ; <R7 __ __ __>
  1457. shr edx,1 ; <junk (C6+R6)/2 (C5+R5)/2 (C4+R4)/2> dirty
  1458. add ecx,ebx ; <A7+R7 A6 A5 A4>
  1459. and edx,0007F7F7FH ; <__ (C6+R6)/2 (C5+R5)/2 (C4+R4)/2> clean
  1460. mov ebx,[edi+8] ; <L03 L02 L01 L00> or <L73 L72 L71 L70>
  1461. add edx,ecx ; <(2A7+2R7)/2 (2A6+C5+R5)/2 ...>
  1462. add edi,PITCH*7 ; Move from line 0 to 7 (or 7 to 14)
  1463. shr edx,1 ; <(2A7+2R7)/4 (2A6+C5+R5)/4 ...> dirty
  1464. add ebx,esi ; <C3+L3 C2+L2 C1+L1 junk>
  1465. shr ebx,1 ; <(C3+L3)/2 (C2+L2)/2 (C1+L1)/2 junk> dirty
  1466. and edx,07F7F7F7FH ; <(2A7+2R7)/4 (2A6+C5+R5)/4 ...> clean
  1467. and ebx,07F7F7F7FH ; <(C3+L3)/2 (C2+L2)/2 (C1+L1)/2 junk> clean
  1468. mov ecx,[edi+28-PITCH*7] ; <A03 A02 A01 A00> or <B73 B72 B71 B70>
  1469. lea eax,[eax+edx+001010101H]; <(2A7+4C7+2R7+4)/4 (2A6+5C5+R5+4)/4 ...>
  1470. mov bl,[edi+8-PITCH*7] ; <(C3+L3)/2 (C2+L2)/2 (C1+L1)/2 L0>
  1471. shr eax,1 ; <(2A7+4C7+2R7+4)/8 (2A6+5C5+R5+4)/8 ...> dirty
  1472. add ebx,ecx ; <... (2A1+C1+L1)/2 (2A0+2L0)/2>
  1473. shr ebx,1 ; <... (2A1+C1+L1)/4 (2A0+2L0)/4> dirty
  1474. and eax,07F7F7F7FH ; <(2A7+4C7+2R7+4)/8 (2A6+5C5+R5+4)/8 ...> clean
  1475. and ebx,07F7F7F7FH ; <... (2A1+C1+L1)/4 (2A0+2L0)/4> clean
  1476. add esi,001010101H ; <C3+1 C2+1 C1+1 C0+1>
  1477. add ebx,esi ; <... (2A1+5C1+L1+4)/4 (2A0+4C0+2L0+4)/4>
  1478. mov [edi+4-PITCH*7],eax ; Store OBMC pred for pels 4-7 of line 0 or 7.
  1479. shr ebx,1 ; <... (2A1+5C1+L1+4)/8 (2A0+4C0+2L0+4)/8> dirty
  1480. lea esi,[edi-PITCH*13] ; Speculatively advance to line 1.
  1481. and ebx,07F7F7F7FH ; <... (2A1+5C1+L1+4)/8 (2A0+4C0+2L0+4)/8> clean
  1482. add ebp,080000000H
  1483. mov [edi-PITCH*7],ebx ; Store OBMC pred for pels 0-3 of line 0 or 7.
  1484. jnc @b
  1485. @@:
  1486. mov edx,[esi+28] ; <A13 A12 A11 A10> or <B63 B62 B61 B60>
  1487. mov eax,[esi+8] ; <L13 L12 L11 L10> or <L63 L62 L61 L60>
  1488. mov ecx,[esi+32] ; <A17 A16 A15 A14> or <B67 B66 B65 B64>
  1489. mov ebx,[esi+12] ; <R17 R16 R15 R14> or <R67 R66 R65 R64>
  1490. mov edi,[esi] ; <C13 C12 C11 C10> or <C63 C62 C61 C60>
  1491. add esi,PITCH*5 ; Move from line 1 to 6 (or 6 to 11)
  1492. xchg dx,ax ; edx: <A3 A2 L1 L0> eax: <L3 L2 A1 A0>
  1493. xchg cx,bx ; ecx: <A7 A6 R5 R4> ebx: <R7 R6 A5 A4>
  1494. add eax,edi ; <C3+L3 C2+L2 C1+A1 C0+A0>
  1495. mov edi,[esi+4-PITCH*5] ; <C17 C15 C15 C14> or <C67 C66 C65 C64>
  1496. shr eax,1 ; <(C3+L3)/2 (C2+L2)/2 (C1+A1)/2 (C0+A0)/2>dirty
  1497. add ecx,edi ; <C7+A7 C6+A6 C5+R5 C4+R4>
  1498. shr ecx,1 ; <(C7+A7)/2 (C6+A6)/2 (C5+R5)/2 (C4+R4)/2>dirty
  1499. and eax,07F7F7F7FH ; <(C3+L3)/2 (C2+L2)/2 (C1+A1)/2 (C0+A0)/2>clean
  1500. add eax,edx ; <(C3+L3+2A3)/2 ... (C1+2L1+A1)/2 ...>
  1501. and ecx,07F7F7F7FH ; <(C7+A7)/2 (C6+A6)/2 (C5+R5)/2 (C4+R4)/2>clean
  1502. shr eax,1 ; <(C3+L3+2A3)/4 ... (C1+2L1+A1)/4 ...> dirty
  1503. add ecx,ebx ; <(C7+2R7+A7)/2 ... (C5+R5+2A5)/2 ...>
  1504. mov ebx,[esi-PITCH*5] ; <C13 C12 C11 C10> or <C63 C62 C61 C60>
  1505. and eax,07F7F7F7FH ; <(C3+L3+2A3)/4 ... (C1+2L1+A1)/4 ...> clean
  1506. shr ecx,1 ; <(C7+2R7+A7)/4 ... (C5+R5+2A5)/4 ...> dirty
  1507. add edi,001010101H ; <C7+1 C6+1 C5+1 C4+1>
  1508. and ecx,07F7F7F7FH ; <(C7+2R7+A7)/4 ... (C5+R5+2A5)/4 ...> clean
  1509. lea eax,[eax+ebx+001010101H]; <(5C3+L3+2A3+4)/4 ... (5C1+2L1+A1)/4 ...>
  1510. shr eax,1 ; <(5C3+L3+2A3+4)/8 ... (5C1+2L1+A1)/8 ...>dirty
  1511. add ecx,edi ; <(5C7+2R7+A7+4)/4 ... (5C5+R5+2A5)/4 ...>
  1512. shr ecx,1 ; <(5C7+2R7+A7+4)/8 ... (5C5+R5+2A5)/8 ...>dirty
  1513. and eax,07F7F7F7FH ; <(5C3+L3+2A3+4)/8 ... (5C1+2L1+A1)/8 ...>clean
  1514. and ecx,07F7F7F7FH ; <(5C7+2R7+A7+4)/8 ... (5C5+R5+2A5)/8 ...>clean
  1515. mov [esi-PITCH*5],eax ; Store OBMC pred for pels 4-7 of line 1 or 6.
  1516. mov [esi+4-PITCH*5],ecx ; Store OBMC pred for pels 0-3 of line 1 or 6.
  1517. lea edi,[esi-PITCH*9] ; Speculatively advance to line 2.
  1518. add ebp,080000000H
  1519. jnc @b
  1520. @@:
  1521. mov eax,[edi+4] ; <C27 C26 C25 C24> ... <C57 C56 C55 C54>
  1522. mov ebx,[edi+12] ; <R27 R26 R25 R24> ... <R57 R56 R55 R54>
  1523. add bl,al ; <R7 R6 R5 C4+R4>
  1524. mov ecx,[edi] ; <C23 C22 C21 C20> ... <C53 C52 C51 C50>
  1525. shr bl,1 ; <R7 R6 R5 (C4+R4)/2>
  1526. mov edx,[edi+8] ; <L23 L22 L21 L20> ... <L53 L52 L51 L50>
  1527. add bh,ah ; <R7 R6 C5+R5 (C4+R4)/2>
  1528. add edx,ecx ; <C3+L3 C2+L2 junk junk>
  1529. shr bh,1 ; <2R7/2 2R6/2 (C5+R5)/2 (C4+R4)/2>
  1530. mov esi,[edi+32] ; <A27 A26 A25 A24> ... <B57 B56 B55 B54>
  1531. shr edx,1 ; <(C3+L3)/2 (C2+L2)/2 junk junk> dirty
  1532. add esi,eax ; <C7+A7 C6+A6 C5+A5 C4+A4>
  1533. shr esi,1 ; <(C7+A7)/2 (C6+A6)/2 (C5+A5)/2 (C4+A4)/2>dirty
  1534. and edx,07F7F7F7FH ; <(C3+L3)/2 (C2+L2)/2 junk junk> clean
  1535. and esi,07F7F7F7FH ; <(C7+A7)/2 (C6+A6)/2 (C5+A5)/2 (C4+A4)/2>clean
  1536. mov dl,[edi+8] ; <(C3+L3)/2 (C2+L2)/2 junk 2L0/2>
  1537. add esi,ebx ; <(C7+2R7+A7)/2 ... (2C5+R5+A5)/2 ...>
  1538. mov ebx,[edi+28] ; <A23 A22 A21 A20> ... <B53 B52 B51 B50>
  1539. shr esi,1 ; <(C7+2R7+A7)/4 ... (2C5+R5+A5)/4 ...> dirty
  1540. add ebx,ecx ; <C3+A3 C2+A2 C1+A1 C0+A0>
  1541. shr ebx,1 ; <(C3+A3)/2 (C2+A2)/2 (C1+A1)/2 (C0+A0)/2>dirty
  1542. and esi,07F7F7F7FH ; <(C7+2R7+A7)/4 ... (2C5+R5+A5)/4 ...> clean
  1543. and ebx,07F7F7F7FH ; <(C3+A3)/2 (C2+A2)/2 (C1+A1)/2 (C0+A0)/2>clean
  1544. mov dh,[edi+9] ; <(C3+L3)/2 (C2+L2)/2 2L1/2 2L0/2>
  1545. add ebx,edx ; <(2C3+L3+A3)/2 ... (C1+2L1+A1)/2 ...>
  1546. lea eax,[eax+esi+001010101H]; <(5C7+2R7+A7+4)/4 ... (6C5+R5+A5+4)/4 ...>
  1547. shr ebx,1 ; <(2C3+L3+A3)/4 ... (C1+2L1+A1)/4 ...> dirty
  1548. add ecx,001010101H ; <C3+1 C2+1 C1+1 C0+1>
  1549. shr eax,1 ; <(5C7+2R7+A7+4)/8 ... (6C5+R5+A5+4)/8...>dirty
  1550. and ebx,07F7F7F7FH ; <(2C3+L3+A3)/4 ... (C1+2L1+A1)/4 ...> clean
  1551. add ebx,ecx ; <(6C3+L3+A3+4)/4 ... (5C1+2L1+A1+4)/4 ...>
  1552. and eax,07F7F7F7FH ; <(5C7+2R7+A7+4)/8 ... (6C5+R5+A5+4)/8...>clean
  1553. shr ebx,1 ; <(6C3+L3+A3+4)/8 ... (5C1+2L1+A1+4)/8...>dirty
  1554. mov [edi+4],eax ; Store OBMC pred for pels 4-7 of line 2 thru 5.
  1555. and ebx,07F7F7F7FH ; <(6C3+L3+A3+4)/8 ... (5C1+2L1+A1+4)/8...>clean
  1556. mov [edi],ebx ; Store OBMC pred for pels 0-3 of line 2 thru 5.
  1557. add edi,PITCH ; Advance to next line.
  1558. add ebp,040000000H
  1559. jnc @b
  1560. mov esi,MBActionCursor
  1561. cmp ebp,T_MacroBlockActionDescr.BlkY2
  1562. jle Block1or2DescrBuilt
  1563. mov al,PB [esi].CodedBlocks
  1564. mov edi,CoeffStream ; Restore block descriptor ptr.
  1565. cmp ebp,T_MacroBlockActionDescr.BlkY3
  1566. je Block3DescrBuilt
  1567. Block4DescrBuilt:
  1568. shr al,5 ; Check if block 5 (U) empty.
  1569. lea edx,[esi].BlkU+4 ; Addr of block addr (plus 4).
  1570. sbb ebp,ebp ; -1 iff block not empty.
  1571. mov [edi].BlockAddr,edx ; Store address of block address.
  1572. shr al,1 ; Check if block 6 (Y) empty.
  1573. lea edx,[esi].BlkV+4 ; Addr of block addr (plus 4).
  1574. sbb ebx,ebx ; -1 iff block not empty.
  1575. and ebp,T_CoeffBlk ; 0 iff block empty, else inc.
  1576. and ebx,T_CoeffBlk ; 0 iff block empty, else inc.
  1577. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next macroblock descriptor.
  1578. mov [edi+ebp*1].BlockAddr,edx ; Store address of block address.
  1579. add edi,ebp ; Inc block descr ptr if blk non-empty.
  1580. add edi,ebx ; Inc block descr ptr if blk non-empty.
  1581. xor ebp,ebp
  1582. and al,1 ; Are we at end-of-stream?
  1583. je NextMacroBlock_OBMC
  1584. sub edi,SIZEOF T_CoeffBlk
  1585. jmp BlockActionStreamBuilt
  1586. ;; partial end of section only defined when H261 not defined.
  1587. ENDIF
  1588. BuildBlockActionDescr MACRO BlockNumber,AddrOffset
  1589. shr al,1 ; Check if block empty.
  1590. lea edi,[edi+ebp] ; Adjust BlockActionDescr cursor.
  1591. sbb ebp,ebp ; -1 iff block not empty.
  1592. lea edx,[esi].Blk[BlockNumber*SIZEOF T_Blk]+AddrOffset ; Addr of block addr.
  1593. and ebp,T_CoeffBlk ; 0 iff block empty, else inc.
  1594. mov [edi].BlockAddr,edx ; Store address of block address.
  1595. ENDM
  1596. IFNDEF H261
  1597. ;; more code only used when H261 not defined
  1598. MBIsIntraCoded_OBMC:
  1599. shr al,1 ; Same as BuildBlockActionDescr macro, except don't inc edi.
  1600. sbb ebp,ebp
  1601. lea edx,[esi].BlkY1
  1602. and ebp,T_CoeffBlk
  1603. mov [edi].BlockAddr,edx
  1604. BuildBlockActionDescr 1,0 ; If blk 2 non-empty, record BAD to do as intra.
  1605. BuildBlockActionDescr 2,0 ; blk 3
  1606. BuildBlockActionDescr 3,0 ; blk 4
  1607. BuildBlockActionDescr 4,0 ; blk 5
  1608. BuildBlockActionDescr 5,0 ; blk 6
  1609. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1610. add edi,ebp
  1611. test al,1 ; Are we at end-of-stream?
  1612. je NextMacroBlock_OBMC
  1613. sub edi,SIZEOF T_CoeffBlk
  1614. jmp BlockActionStreamBuilt
  1615. ;; end of section only defined when H261 not defined.
  1616. ENDIF
  1617. ;===============================================================================
  1618. ;===============================================================================
  1619. ; First pass builds block action stream from macroblock action stream.
  1620. ;===============================================================================
  1621. ;===============================================================================
  1622. ; esi -- MacroBlockActionStream cursor
  1623. ; edi -- BlockActionStream cursor
  1624. ; ebp -- Increment for BlockActionStream cursor
  1625. ; edx -- Address of a block to do
  1626. ; al -- Coded block pattern for I or P block
  1627. ; bl -- BlockType
  1628. NextMacroBlock:
  1629. mov bl,PB [esi].BlockType
  1630. mov al,PB [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  1631. ; Bit 4 set for non-empty U blk.
  1632. ; Bit 5 set for non-empty V blk.
  1633. ; Bit 6 clear except at stream end.
  1634. ; Bit 7 clear. Unused.
  1635. and bl,IsINTRA
  1636. jne MBIsIntraCoded
  1637. BuildBlockActionDescr 0,4 ; If blk 1 non-empty, record BAD to do as inter.
  1638. BuildBlockActionDescr 1,4 ; blk 2
  1639. BuildBlockActionDescr 2,4 ; blk 3
  1640. BuildBlockActionDescr 3,4 ; blk 4
  1641. BuildBlockActionDescr 4,4 ; blk 5
  1642. BuildBlockActionDescr 5,4 ; blk 6
  1643. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1644. and al,1 ; Are we at end-of-stream?
  1645. je NextMacroBlock
  1646. add edi,ebp
  1647. sub edi,SIZEOF T_CoeffBlk
  1648. jmp BlockActionStreamBuilt
  1649. MBIsIntraCoded:
  1650. BuildBlockActionDescr 0,0 ; If blk 1 non-empty, record BAD to do as intra.
  1651. BuildBlockActionDescr 1,0 ; blk 2
  1652. BuildBlockActionDescr 2,0 ; blk 3
  1653. BuildBlockActionDescr 3,0 ; blk 4
  1654. BuildBlockActionDescr 4,0 ; blk 5
  1655. BuildBlockActionDescr 5,0 ; blk 6
  1656. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1657. and al,1 ; Are we at end-of-stream?
  1658. je NextMacroBlock
  1659. add edi,ebp
  1660. sub edi,SIZEOF T_CoeffBlk
  1661. jmp BlockActionStreamBuilt
  1662. IFNDEF H261
  1663. NextBMacroBlock:
  1664. ; esi -- MacroBlockActionStream cursor
  1665. ; edi -- BlockActionStream cursor
  1666. ; ebp -- Increment for BlockActionStream cursor
  1667. ; edx -- Address of a block to do
  1668. ; cl -- Used to compute defined columns mask case.
  1669. ; bh -- Coded block pattern for B block
  1670. ; bl -- Coded block pattern for I or P block
  1671. ; al -- Used to compute defined rows mask.
  1672. BuildBBlockActionDescr MACRO BlkNum,LinesDefFutureFrame,ColsDefFutureFrame
  1673. shr bh,1 ; Check if block empty.
  1674. mov cl,[esi].Blk[BlkNum*SIZEOF T_Blk].BestHMVb ; HMVb for block.
  1675. lea edi,[edi+ebp] ; Adjust BlockActionDescr.
  1676. mov al,[esi].Blk[BlkNum*SIZEOF T_Blk].BestVMVb ; VMVb for block.
  1677. sbb ebp,ebp ; -1 iff block not empty.
  1678. mov cl,ColsDefFutureFrame[ecx-96] ; Case of columns to do bidi.
  1679. and ebp,T_CoeffBlk ; 0 iff block empty, else inc.
  1680. mov al,LinesDefFutureFrame[eax-96] ; Mask for lines to do bidi.
  1681. mov [edi].LinesDefined,al ; Stash it.
  1682. mov edx,ColsDefined[ecx]
  1683. mov [edi].Cols03Defined,edx ; Stash it.
  1684. mov edx,ColsDefined[ecx+4]
  1685. mov [edi].Cols47Defined,edx ; Stash it.
  1686. lea edx,[esi].Blk[BlkNum*SIZEOF T_Blk]+8 ; Addr of block addr.
  1687. mov [edi].BlockAddr,edx ; Store address of blk address.
  1688. ENDM
  1689. mov ebx,PD [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  1690. ; Bit 4 set for non-empty U blk.
  1691. ; Bit 5 set for non-empty V blk.
  1692. ; Bit 6 clear except at stream end.
  1693. ; Bit 7 clear. Unused.
  1694. ; Bits 8-13 like bits 0-5, but for B frame.
  1695. ; Bit 14-15 clear. Unused.
  1696. BuildBBlockActionDescr 0, UpperYBlkLinesDef, LeftYBlkColsDef
  1697. BuildBBlockActionDescr 1, UpperYBlkLinesDef, RightYBlkColsDef
  1698. BuildBBlockActionDescr 2, LowerYBlkLinesDef, LeftYBlkColsDef
  1699. BuildBBlockActionDescr 3, LowerYBlkLinesDef, RightYBlkColsDef
  1700. BuildBBlockActionDescr 4, ChromaLinesDef, ChromaColsDef
  1701. BuildBBlockActionDescr 5, ChromaLinesDef, ChromaColsDef
  1702. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1703. and bl,040H ; Are we at end-of-stream?
  1704. je NextBMacroBlock
  1705. add edi,ebp
  1706. sub edi,SIZEOF T_CoeffBlk
  1707. ENDIF
  1708. BlockActionStreamBuilt:
  1709. mov CoeffStream,edi ; Stash address of last block of coeffs.
  1710. NextBlock:
  1711. ;===============================================================================
  1712. ;===============================================================================
  1713. ; Second pass performs frame differencing of Inters and Forward DCT.
  1714. ;===============================================================================
  1715. ;===============================================================================
  1716. mov eax,[edi].BlockAddr ; Fetch address of block to do
  1717. mov ebp,PITCH
  1718. test eax,4 ; Is it an Inter block.
  1719. jne InterOrOBMCBlock ; Jump if doing inter block.
  1720. mov edx,[eax].T_Blk.BlkOffset ; BlkOffset if INTRA; BestMVs if BiDi.
  1721. mov ecx,TargetFrameBaseAddress
  1722. add ecx,edx ; Target block address if INTRA
  1723. mov esi,[eax-8].T_Blk.BlkOffset ; Addr of BlkOffset if BiDi
  1724. IFNDEF H261
  1725. ;; H261 does not execute the BiDi code so it is included only when H261 is not defined
  1726. ;;
  1727. test eax,8 ; Is it a BiDi block?
  1728. jne BiDiBlock ; Jump if doing BiDi block.
  1729. ENDIF
  1730. IntraBlock:
  1731. ; Register usage:
  1732. ; ecx,edi -- Address of block.
  1733. ; ebp -- Pitch.
  1734. ; ebx, eax -- Scratch.
  1735. mov ebx,[ecx]
  1736. mov eax,[ecx+4]
  1737. mov P00,ebx
  1738. mov P04,eax
  1739. mov eax,[ecx+ebp*1]
  1740. mov edx,[ecx+ebp*1+4]
  1741. lea edi,[ecx+PITCH*5]
  1742. lea ecx,[ecx+ebp*2]
  1743. mov P10,eax
  1744. mov P14,edx
  1745. mov eax,[ecx]
  1746. mov edx,[ecx+4]
  1747. mov P20,eax
  1748. mov P24,edx
  1749. mov eax,[ecx+ebp*1]
  1750. mov edx,[ecx+ebp*1+4]
  1751. mov P30,eax
  1752. mov P34,edx
  1753. mov eax,[ecx+ebp*2]
  1754. mov edx,[ecx+ebp*2+4]
  1755. mov P40,eax
  1756. mov P44,edx
  1757. mov eax,[edi]
  1758. mov edx,[edi+4]
  1759. mov P50,eax
  1760. mov P54,edx
  1761. mov eax,[edi+ebp*1]
  1762. mov edx,[edi+ebp*1+4]
  1763. mov P60,eax
  1764. mov P64,edx
  1765. mov eax,[edi+ebp*2]
  1766. mov edx,[edi+ebp*2+4]
  1767. mov P74,edx
  1768. xor ecx,ecx
  1769. and ebx,00000007FH ; Fetch P0.
  1770. mov cl,P03 ; Fetch P3.
  1771. mov P70,eax
  1772. jmp DoForwardDCT
  1773. IFNDEF H261
  1774. ;; H261 does not execute the BiDi code so it is included only when H261 is not defined
  1775. ;;
  1776. BiDiBlock:
  1777. mov BlkActionDescrAddr,eax ; Extract VMVb.
  1778. mov ebp,FutureFrameBaseAddress
  1779. shr edx,25 ; CF == 1 iff VMVb is half pel.
  1780. mov bl,[edi].LinesDefined
  1781. lea esi,[esi+ebp-48] ; Addr 0-MV blk in Future P Frame.
  1782. mov ebp,[edi].Cols47Defined
  1783. IF PITCH-384
  1784. **** Magic leaks out if pitch not equal to 384
  1785. ENDIF
  1786. lea ecx,[edx+edx*2-48*3] ; Mult integer pel VMVb by PITCH.
  1787. mov edi,[edi].Cols03Defined
  1788. mov dl,[eax-8].T_Blk.BestHMVb ; Fetch HMVb.
  1789. jc InterpVert_FuturePFrame
  1790. shl ecx,7
  1791. shr dl,1 ; CF == 1 iff HMVb is half pel.
  1792. mov bh,bl
  1793. lea esi,[esi+ecx] ; Add VMVb contrib to block addr.
  1794. jc InterpHorz_FuturePFrame
  1795. add esi,edx ; Add HMVb contrib to block addr.
  1796. ; esi -- Future P Frame block address.
  1797. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  1798. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  1799. ; bl -- Mask of lines that are in range.
  1800. @@:
  1801. xor esp,4
  1802. add bl,bl ; 0A CF == 1 iff line 0 in range.
  1803. sbb eax,eax ; 0B eax == -1 if line 0 in range.
  1804. mov ecx,[esi] ; 0C Fetch Future P00:P03.
  1805. and eax,edi ; 0D In range among P00,P01,P02,P03.
  1806. add bl,bl ; 1A
  1807. sbb edx,edx ; 1B
  1808. mov Mask00+4,eax ; 0E Stash Mask for use with past pred.
  1809. and eax,ecx ; 0F Select in-range pels.
  1810. mov ecx,[esi+PITCH*1] ; 1C
  1811. mov P00+4,eax ; 0G Stash in-range pels.
  1812. and edx,edi ; 1D
  1813. mov Mask10+4,edx ; 1E
  1814. add bl,bl ; 2A
  1815. sbb eax,eax ; 2B
  1816. and edx,ecx ; 1F
  1817. mov P10+4,edx ; 1G
  1818. mov ecx,[esi+PITCH*2] ; 2C
  1819. and eax,edi ; 2D
  1820. add bl,bl ; 3A
  1821. sbb edx,edx ; 3B
  1822. mov Mask20+4,eax ; 2E
  1823. and eax,ecx ; 2F
  1824. mov ecx,[esi+PITCH*3] ; 3C
  1825. mov P20+4,eax ; 2G
  1826. and edx,edi ; 3D
  1827. mov Mask30+4,edx ; 3E
  1828. add bl,bl ; 4A
  1829. sbb eax,eax ; 4B
  1830. and edx,ecx ; 3F
  1831. mov P30+4,edx ; 3G
  1832. mov ecx,[esi+PITCH*4] ; 4C
  1833. and eax,edi ; 4D
  1834. add bl,bl ; 5A
  1835. sbb edx,edx ; 5B
  1836. mov Mask40+4,eax ; 4E
  1837. and eax,ecx ; 4F
  1838. mov ecx,[esi+PITCH*5] ; 5C
  1839. mov P40+4,eax ; 4G
  1840. and edx,edi ; 5D
  1841. mov Mask50+4,edx ; 5E
  1842. add bl,bl ; 6A
  1843. sbb eax,eax ; 6B
  1844. and edx,ecx ; 5F
  1845. mov P50+4,edx ; 5G
  1846. mov ecx,[esi+PITCH*6] ; 6C
  1847. and eax,edi ; 6D
  1848. add bl,bl ; 7A
  1849. sbb edx,edx ; 7B
  1850. mov Mask60+4,eax ; 6E
  1851. and eax,ecx ; 6F
  1852. mov ecx,[esi+PITCH*7] ; 7C
  1853. mov P60+4,eax ; 6G
  1854. and edx,edi ; 7D
  1855. mov Mask70+4,edx ; 7E
  1856. and edx,ecx ; 7F
  1857. mov P70+4,edx ; 7G
  1858. mov edi,ebp
  1859. mov edx,BlkActionDescrAddr
  1860. add esi,4
  1861. mov ecx,4
  1862. mov bl,bh
  1863. and ecx,esp
  1864. je @b
  1865. mov edi,[edx-8].T_Blk.BlkOffset
  1866. xor eax,eax
  1867. mov al,[edx-8].T_Blk.BestVMVf
  1868. jmp BiDiFuturePredDone
  1869. InterpVert_FuturePFrame:
  1870. shl ecx,7
  1871. shr dl,1 ; CF == 1 iff HMVb is half pel.
  1872. mov bh,bl
  1873. lea esi,[esi+ecx] ; Add VMVb contrib to block addr.
  1874. jc InterpBoth_FuturePFrame
  1875. add esi,edx ; Add HMVb contrib to block addr.
  1876. ; esi -- Future P Frame block address.
  1877. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  1878. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  1879. ; bl -- Mask of lines that are in range.
  1880. ; Interpolate Future Prediction Vertically.
  1881. @@:
  1882. xor esp,4
  1883. add bl,bl ; 0A CF == 1 iff line 0 in range.
  1884. sbb eax,eax ; 0B eax == -1 if line 0 in range.
  1885. mov ecx,[esi] ; 0C Fetch Future P00:P03.
  1886. and eax,edi ; 0D In range among P00,P01,P02,P03.
  1887. mov edx,[esi+PITCH*1] ; 0E Fetch Future P10:P13.
  1888. mov Mask00+4,eax ; 0F Stash Mask for use with past pred.
  1889. add ecx,edx ; 0G Add P00:P03 and P10:P13.
  1890. add ecx,001010101H ; 0H Add rounding.
  1891. shr ecx,1 ; 0I Interpolate (divide by 2).
  1892. add bl,bl ; 1A
  1893. sbb edx,edx ; 1B
  1894. and eax,ecx ; 0J Select in-range pels (and clean).
  1895. mov P00+4,eax ; 0K Stash in-range pels.
  1896. mov ecx,[esi+PITCH*1] ; 1C
  1897. and edx,edi ; 1D
  1898. mov eax,[esi+PITCH*2] ; 1E
  1899. mov Mask10+4,edx ; 1F
  1900. add ecx,eax ; 1G
  1901. add ecx,001010101H ; 1H
  1902. shr ecx,1 ; 1I
  1903. add bl,bl ; 2A
  1904. sbb eax,eax ; 2B
  1905. and edx,ecx ; 1J
  1906. mov P10+4,edx ; 1K
  1907. mov ecx,[esi+PITCH*2] ; 2C
  1908. and eax,edi ; 2D
  1909. mov edx,[esi+PITCH*3] ; 2E
  1910. mov Mask20+4,eax ; 2F
  1911. add ecx,edx ; 2G
  1912. add ecx,001010101H ; 2H
  1913. shr ecx,1 ; 2I
  1914. add bl,bl ; 3A
  1915. sbb edx,edx ; 3B
  1916. and eax,ecx ; 2J
  1917. mov P20+4,eax ; 2K
  1918. mov ecx,[esi+PITCH*3] ; 3C
  1919. and edx,edi ; 3D
  1920. mov eax,[esi+PITCH*4] ; 3E
  1921. mov Mask30+4,edx ; 3F
  1922. add ecx,eax ; 3G
  1923. add ecx,001010101H ; 3H
  1924. shr ecx,1 ; 3I
  1925. add bl,bl ; 4A
  1926. sbb eax,eax ; 4B
  1927. and edx,ecx ; 3J
  1928. mov P30+4,edx ; 3K
  1929. mov ecx,[esi+PITCH*4] ; 4C
  1930. and eax,edi ; 4D
  1931. mov edx,[esi+PITCH*5] ; 4E
  1932. mov Mask40+4,eax ; 4F
  1933. add ecx,edx ; 4G
  1934. add ecx,001010101H ; 4H
  1935. shr ecx,1 ; 4I
  1936. add bl,bl ; 5A
  1937. sbb edx,edx ; 5B
  1938. and eax,ecx ; 4J
  1939. mov P40+4,eax ; 4K
  1940. mov ecx,[esi+PITCH*5] ; 5C
  1941. and edx,edi ; 5D
  1942. mov eax,[esi+PITCH*6] ; 5E
  1943. mov Mask50+4,edx ; 5F
  1944. add ecx,eax ; 5G
  1945. add ecx,001010101H ; 5H
  1946. shr ecx,1 ; 5I
  1947. add bl,bl ; 6A
  1948. sbb eax,eax ; 6B
  1949. and edx,ecx ; 5J
  1950. mov P50+4,edx ; 5K
  1951. mov ecx,[esi+PITCH*6] ; 6C
  1952. and eax,edi ; 6D
  1953. mov edx,[esi+PITCH*7] ; 6E
  1954. mov Mask60+4,eax ; 6F
  1955. add ecx,edx ; 6G
  1956. add ecx,001010101H ; 6H
  1957. add esi,4
  1958. shr ecx,1 ; 6I
  1959. add bl,bl ; 7A
  1960. sbb edx,edx ; 7B
  1961. and eax,ecx ; 6J
  1962. mov P60+4,eax ; 6K
  1963. mov ecx,[esi+PITCH*7-4] ; 7C
  1964. and edx,edi ; 7D
  1965. mov eax,[esi+PITCH*8-4] ; 7E
  1966. mov Mask70+4,edx ; 7F
  1967. add ecx,eax ; 7G
  1968. add ecx,001010101H ; 7H
  1969. mov bl,bh
  1970. shr ecx,1 ; 7I
  1971. and edx,ecx ; 7J
  1972. mov P70+4,edx ; 7K
  1973. mov edi,ebp
  1974. mov edx,BlkActionDescrAddr
  1975. mov ecx,4
  1976. and ecx,esp
  1977. je @b
  1978. mov edi,[edx-8].T_Blk.BlkOffset
  1979. xor eax,eax
  1980. mov al,[edx-8].T_Blk.BestVMVf
  1981. jmp BiDiFuturePredDone
  1982. InterpHorz_FuturePFrame:
  1983. ; esi -- Future P Frame block address.
  1984. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  1985. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  1986. ; bl -- Mask of lines that are in range.
  1987. ; Interpolate Future Prediction Horizontally.
  1988. add esi,edx ; Add HMVb contrib to block addr.
  1989. @@:
  1990. xor esp,4
  1991. add bl,bl ; 0A CF == 1 iff line 0 in range.
  1992. sbb eax,eax ; 0B eax == -1 if line 0 in range.
  1993. mov ecx,[esi] ; 0C Fetch Future P00:P03.
  1994. and eax,edi ; 0D In range among P00,P01,P02,P03.
  1995. mov edx,[esi+1] ; 0E Fetch Future P01:P04.
  1996. mov Mask00+4,eax ; 0F Stash Mask for use with past pred.
  1997. add ecx,edx ; 0G Add P00:P03 and P01:P04.
  1998. add ecx,001010101H ; 0H Add rounding.
  1999. shr ecx,1 ; 0I Interpolate (divide by 2).
  2000. add bl,bl ; 1A
  2001. sbb edx,edx ; 1B
  2002. and eax,ecx ; 0J Select in-range pels (and clean).
  2003. mov P00+4,eax ; 0K Stash in-range pels.
  2004. mov ecx,[esi+PITCH*1] ; 1C
  2005. and edx,edi ; 1D
  2006. mov eax,[esi+PITCH*1+1] ; 1E
  2007. mov Mask10+4,edx ; 1F
  2008. add ecx,eax ; 1G
  2009. add ecx,001010101H ; 1H
  2010. shr ecx,1 ; 1I
  2011. add bl,bl ; 2A
  2012. sbb eax,eax ; 2B
  2013. and edx,ecx ; 1J
  2014. mov P10+4,edx ; 1K
  2015. mov ecx,[esi+PITCH*2] ; 2C
  2016. and eax,edi ; 2D
  2017. mov edx,[esi+PITCH*2+1] ; 2E
  2018. mov Mask20+4,eax ; 2F
  2019. add ecx,edx ; 2G
  2020. add ecx,001010101H ; 2H
  2021. shr ecx,1 ; 2I
  2022. add bl,bl ; 3A
  2023. sbb edx,edx ; 3B
  2024. and eax,ecx ; 2J
  2025. mov P20+4,eax ; 2K
  2026. mov ecx,[esi+PITCH*3] ; 3C
  2027. and edx,edi ; 3D
  2028. mov eax,[esi+PITCH*3+1] ; 3E
  2029. mov Mask30+4,edx ; 3F
  2030. add ecx,eax ; 3G
  2031. add ecx,001010101H ; 3H
  2032. shr ecx,1 ; 3I
  2033. add bl,bl ; 4A
  2034. sbb eax,eax ; 4B
  2035. and edx,ecx ; 3J
  2036. mov P30+4,edx ; 3K
  2037. mov ecx,[esi+PITCH*4] ; 4C
  2038. and eax,edi ; 4D
  2039. mov edx,[esi+PITCH*4+1] ; 4E
  2040. mov Mask40+4,eax ; 4F
  2041. add ecx,edx ; 4G
  2042. add ecx,001010101H ; 4H
  2043. shr ecx,1 ; 4I
  2044. add bl,bl ; 5A
  2045. sbb edx,edx ; 5B
  2046. and eax,ecx ; 4J
  2047. mov P40+4,eax ; 4K
  2048. mov ecx,[esi+PITCH*5] ; 5C
  2049. and edx,edi ; 5D
  2050. mov eax,[esi+PITCH*5+1] ; 5E
  2051. mov Mask50+4,edx ; 5F
  2052. add ecx,eax ; 5G
  2053. add ecx,001010101H ; 5H
  2054. shr ecx,1 ; 5I
  2055. add bl,bl ; 6A
  2056. sbb eax,eax ; 6B
  2057. and edx,ecx ; 5J
  2058. mov P50+4,edx ; 5K
  2059. mov ecx,[esi+PITCH*6] ; 6C
  2060. and eax,edi ; 6D
  2061. mov edx,[esi+PITCH*6+1] ; 6E
  2062. mov Mask60+4,eax ; 6F
  2063. add ecx,edx ; 6G
  2064. add ecx,001010101H ; 6H
  2065. add esi,4
  2066. shr ecx,1 ; 6I
  2067. add bl,bl ; 7A
  2068. sbb edx,edx ; 7B
  2069. and eax,ecx ; 6J
  2070. mov P60+4,eax ; 6K
  2071. mov ecx,[esi+PITCH*7-4] ; 7C
  2072. and edx,edi ; 7D
  2073. mov eax,[esi+PITCH*7+1-4] ; 7E
  2074. mov Mask70+4,edx ; 7F
  2075. add ecx,eax ; 7G
  2076. add ecx,001010101H ; 7H
  2077. mov bl,bh
  2078. shr ecx,1 ; 7I
  2079. and edx,ecx ; 7J
  2080. mov P70+4,edx ; 7K
  2081. mov edi,ebp
  2082. mov edx,BlkActionDescrAddr
  2083. mov ecx,4
  2084. and ecx,esp
  2085. je @b
  2086. mov edi,[edx-8].T_Blk.BlkOffset
  2087. xor eax,eax
  2088. mov al,[edx-8].T_Blk.BestVMVf
  2089. jmp BiDiFuturePredDone
  2090. InterpBoth_FuturePFrame:
  2091. add esi,edx ; Add HMVb contrib to block addr.
  2092. sub esp,68
  2093. ; esi -- Future P Frame block address.
  2094. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  2095. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  2096. ; bl -- Mask of lines that are in range.
  2097. ; Interpolate Future Prediction Vertically.
  2098. @@:
  2099. add esp,8
  2100. mov eax,[esi] ; Fetch Future P00:P03.
  2101. mov ecx,001010101H ; Mask to extract halves.
  2102. mov edx,[esi+1] ; Fetch Future P01:P04.
  2103. add eax,edx ; <P04+P03 ...>.
  2104. mov edx,[esi+PITCH+1] ; Fetch Future P11:P14.
  2105. and ecx,eax ; <(P04+P03)&1 ...>.
  2106. add esi,PITCH ; Advance to next line.
  2107. xor eax,ecx ; <(P04+P03)/2*2 ...>.
  2108. add edx,ecx ; <P14+((P04+P03)&1) ...>.
  2109. shr eax,1 ; <(P04+P03)/2 ...>.
  2110. mov ecx,[esi] ; Fetch Future P10:P13.
  2111. add edx,ecx ; <P14+P13+((P04+P03)&1) ...>.
  2112. add eax,001010101H ; <(P04+P03)/2+1 ...>
  2113. shr edx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  2114. add bl,bl ; CF == 1 iff line 0 in range.
  2115. sbb ecx,ecx ; ecx == -1 if line 0 in range.
  2116. and edx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  2117. add eax,edx ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  2118. and ecx,edi ; In range among P00,P01,P02,P03.
  2119. shr eax,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  2120. mov Mask00+60,ecx ; Stash Mask for use with past prediction.
  2121. and eax,ecx ; Select in-range pels from future pred (and clean).
  2122. test esp,000000038H
  2123. mov P00+60,eax ; Stash in-range pels.
  2124. jne @b
  2125. sub esi,PITCH*8-4 ; Move to right 4 columns.
  2126. mov edx,BlkActionDescrAddr
  2127. mov edi,ebp
  2128. sub esp,60
  2129. mov ecx,4
  2130. mov bl,bh
  2131. and ecx,esp
  2132. jne @b
  2133. add esp,60
  2134. xor eax,eax
  2135. mov edi,[edx-8].T_Blk.BlkOffset
  2136. mov al,[edx-8].T_Blk.BestVMVf
  2137. BiDiFuturePredDone:
  2138. shr al,1 ; CF == 1 iff VMVf is half pel.
  2139. mov esi,TargetFrameBaseAddress
  2140. mov cl,[edx-8].T_Blk.BestHMVf
  2141. mov edx,DistFromTargetToPastP
  2142. lea edi,[edi+esi]
  2143. jc InterpVert_PastPFrame
  2144. shr cl,1 ; CF == 1 iff HMVf is half pel.
  2145. lea eax,[eax+eax*2-48*3] ; Mult integer pel VMVf by PITCH.
  2146. lea esi,[edi+edx-48] ; Addr 0-MV blk in Future P Frame.
  2147. jc InterpHorz_PastPFrame
  2148. shl eax,7
  2149. add esi,ecx ; Add HMVf contrib to block addr.
  2150. add esi,eax ; Add VMVf contrib to block addr.
  2151. sub esp,64
  2152. ; esi -- Past P Frame block address.
  2153. ; edi -- Target block address.
  2154. @@:
  2155. mov eax,[esi] ; 0A Fetch past prediction.
  2156. mov ebx,Mask00+64 ; 0B Fetch bidi-prediction mask.
  2157. mov ecx,P00+64 ; 0C Fetch future pred for bidi predicted pels.
  2158. and ebx,eax ; 0D Extract past for bidi predicted pels.
  2159. mov edx,[esi+4] ; 4A
  2160. mov ebp,Mask04+64 ; 4B
  2161. lea eax,[ecx+eax*2] ; 0E (2*Past) or ((2*Past+Future) for each pel.
  2162. mov ecx,P04+64 ; 4C
  2163. sub eax,ebx ; 0F (2*Past) or (Past+Future) for each pel.
  2164. and ebp,edx ; 4D
  2165. shr eax,1 ; 0G (Past) or ((Past+Future)/2) (dirty).
  2166. lea edx,[ecx+edx*2] ; 4E
  2167. and eax,07F7F7F7FH ; 0H (Past) or ((Past+Future)/2) (clean).
  2168. sub edx,ebp ; 4F
  2169. shr edx,1 ; 4G
  2170. mov ebx,[edi] ; 0I Fetch target pels.
  2171. and edx,07F7F7F7FH ; 4H
  2172. mov ebp,[edi+4] ; 4I
  2173. sub ebx,eax ; 0J Compute correction.
  2174. sub ebp,edx ; 4J
  2175. add ebx,080808080H ; 0K Bias correction.
  2176. add ebp,080808080H ; 4K
  2177. mov P00+64,ebx ; 0K Store correction.
  2178. mov P04+64,ebp ; 4K
  2179. add esi,PITCH
  2180. add esp,8
  2181. test esp,000000038H
  2182. lea edi,[edi+PITCH]
  2183. jne @b
  2184. xor ebx,ebx
  2185. xor ecx,ecx
  2186. mov bl,P00 ; Fetch P0.
  2187. mov cl,P03 ; Fetch P3.
  2188. jmp DoForwardDCT
  2189. InterpVert_PastPFrame:
  2190. shr cl,1 ; CF == 1 iff HMVf is half pel.
  2191. lea eax,[eax+eax*2-48*3] ; Mult integer pel VMVf by PITCH.
  2192. lea esi,[edi+edx-48] ; Addr 0-MV blk in Future P Frame.
  2193. jc InterpBoth_PastPFrame
  2194. shl eax,7
  2195. add esi,ecx ; Add HMVf contrib to block addr.
  2196. add esi,eax ; Add VMVf contrib to block addr.
  2197. sub esp,64
  2198. ; esi -- Past P Frame block address.
  2199. ; edi -- Target block address.
  2200. @@:
  2201. mov eax,[esi] ; 0A Fetch past prediction.
  2202. mov edx,[esi+4] ; 4A
  2203. add eax,[esi+PITCH] ; 0B Add past prediction with which to interpolate.
  2204. add edx,[esi+PITCH+4] ; 4B
  2205. add eax,001010101H ; 0C Add rounding.
  2206. add edx,001010101H ; 0C
  2207. shr eax,1 ; 0D Divide by two (dirty).
  2208. and edx,0FEFEFEFEH ; 1E
  2209. shr edx,1 ; 1D Clean.
  2210. and eax,07F7F7F7FH ; 0E
  2211. mov ebx,Mask00+64 ; 0F Fetch bidi-prediction mask.
  2212. mov ecx,P00+64 ; 0G Fetch future pred for bidi predicted pels.
  2213. and ebx,eax ; 0H Extract past for bidi predicted pels.
  2214. mov ebp,Mask04+64 ; 4F
  2215. lea eax,[ecx+eax*2] ; 0I (2*Past) or ((2*Past+Future) for each pel.
  2216. mov ecx,P04+64 ; 4G
  2217. sub eax,ebx ; 0J (2*Past) or (Past+Future) for each pel.
  2218. and ebp,edx ; 4H
  2219. shr eax,1 ; 0K (Past) or ((Past+Future)/2) (dirty).
  2220. lea edx,[ecx+edx*2] ; 4I
  2221. and eax,07F7F7F7FH ; 0L (Past) or ((Past+Future)/2) (clean).
  2222. sub edx,ebp ; 4J
  2223. shr edx,1 ; 4K
  2224. mov ebx,[edi] ; 0M Fetch target pels.
  2225. and edx,07F7F7F7FH ; 4L
  2226. mov ebp,[edi+4] ; 4M
  2227. sub ebx,eax ; 0N Compute correction.
  2228. sub ebp,edx ; 4N
  2229. add ebx,080808080H ; 0O Bias correction.
  2230. add ebp,080808080H ; 4O
  2231. mov P00+64,ebx ; 0P Store correction.
  2232. mov P04+64,ebp ; 4P
  2233. add esi,PITCH
  2234. add esp,8
  2235. test esp,000000038H
  2236. lea edi,[edi+PITCH]
  2237. jne @b
  2238. xor ebx,ebx
  2239. xor ecx,ecx
  2240. mov bl,P00 ; Fetch P0.
  2241. mov cl,P03 ; Fetch P3.
  2242. jmp DoForwardDCT
  2243. InterpHorz_PastPFrame:
  2244. shl eax,7
  2245. add esi,ecx ; Add HMVf contrib to block addr.
  2246. add esi,eax ; Add VMVf contrib to block addr.
  2247. sub esp,64
  2248. ; esi -- Past P Frame block address.
  2249. ; edi -- Target block address.
  2250. @@:
  2251. mov eax,[esi] ; 0A Fetch past prediction.
  2252. mov edx,[esi+4] ; 4A
  2253. add eax,[esi+1] ; 0B Add past prediction with which to interpolate.
  2254. add edx,[esi+5] ; 4B
  2255. add eax,001010101H ; 0C Add rounding.
  2256. add edx,001010101H ; 0C
  2257. shr eax,1 ; 0D Divide by two (dirty).
  2258. and edx,0FEFEFEFEH ; 1E
  2259. shr edx,1 ; 1D Clean.
  2260. and eax,07F7F7F7FH ; 0E
  2261. mov ebx,Mask00+64 ; 0F Fetch bidi-prediction mask.
  2262. mov ecx,P00+64 ; 0G Fetch future pred for bidi predicted pels.
  2263. and ebx,eax ; 0H Extract past for bidi predicted pels.
  2264. mov ebp,Mask04+64 ; 4F
  2265. lea eax,[ecx+eax*2] ; 0I (2*Past) or ((2*Past+Future) for each pel.
  2266. mov ecx,P04+64 ; 4G
  2267. sub eax,ebx ; 0J (2*Past) or (Past+Future) for each pel.
  2268. and ebp,edx ; 4H
  2269. shr eax,1 ; 0K (Past) or ((Past+Future)/2) (dirty).
  2270. lea edx,[ecx+edx*2] ; 4I
  2271. and eax,07F7F7F7FH ; 0L (Past) or ((Past+Future)/2) (clean).
  2272. sub edx,ebp ; 4J
  2273. shr edx,1 ; 4K
  2274. mov ebx,[edi] ; 0M Fetch target pels.
  2275. and edx,07F7F7F7FH ; 4L
  2276. mov ebp,[edi+4] ; 4M
  2277. sub ebx,eax ; 0N Compute correction.
  2278. sub ebp,edx ; 4N
  2279. add ebx,080808080H ; 0O Bias correction.
  2280. add ebp,080808080H ; 4O
  2281. mov P00+64,ebx ; 0P Store correction.
  2282. mov P04+64,ebp ; 4P
  2283. add esi,PITCH
  2284. add esp,8
  2285. test esp,000000038H
  2286. lea edi,[edi+PITCH]
  2287. jne @b
  2288. xor ebx,ebx
  2289. xor ecx,ecx
  2290. mov bl,P00 ; Fetch P0.
  2291. mov cl,P03 ; Fetch P3.
  2292. jmp DoForwardDCT
  2293. InterpBoth_PastPFrame:
  2294. shl eax,7
  2295. add esi,ecx ; Add HMVf contrib to block addr.
  2296. add esi,eax ; Add VMVf contrib to block addr.
  2297. sub esp,64
  2298. ; esi -- Past P Frame block address.
  2299. ; edi -- Target block address.
  2300. @@:
  2301. mov eax,[esi+1] ; 0A <P04 P03 P02 P01> prediction pels.
  2302. mov ebx,001010101H ; 0B Mask for extraction of halves.
  2303. mov ebp,[esi+PITCH+1] ; 0C <P14 P13 P12 P11>.
  2304. mov ecx,[esi] ; 0D <P03 P02 P01 P00>.
  2305. add eax,ecx ; 0E <P04+P03 P03+P02 P02+P01 P01+P00>.
  2306. mov ecx,[esi+PITCH] ; 0F <P13 P12 P11 P10>.
  2307. and ebx,eax ; 0G <(P04+P03)&1 ...>.
  2308. and eax,0FEFEFEFEH ; 0H Pre-Clean
  2309. shr eax,1 ; 0I <(P04+P03)/2 ...>.
  2310. add ecx,ebp ; 0J <P14+P13 P13+P12 P12+P11 P11+P10>.
  2311. add eax,001010101H ; 0K <(P04+P03)/2+1 ...>.
  2312. add ecx,ebx ; 0L <P14+P13+((P04+P03)&1) ...>.
  2313. shr ecx,1 ; 0M <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  2314. mov edx,[esi+5] ; 4A
  2315. and ecx,07F7F7F7FH ; 0M <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  2316. mov ebx,001010101H ; 4B
  2317. add eax,ecx ; 0N <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  2318. mov ebp,[esi+PITCH+5] ; 4C
  2319. shr eax,1 ; 0O <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>
  2320. mov ecx,[esi+4] ; 4D
  2321. and eax,07F7F7F7FH ; 0P Interpolated prediction.
  2322. add edx,ecx ; 4E
  2323. mov ecx,[esi+PITCH+4] ; 4F
  2324. and ebx,edx ; 4G
  2325. and edx,0FEFEFEFEH ; 4H
  2326. add ecx,ebp ; 4J
  2327. shr edx,1 ; 4I
  2328. add ecx,ebx ; 4L
  2329. shr ecx,1 ; 4M
  2330. add edx,001010101H ; 4K
  2331. and ecx,07F7F7F7FH ; 4M
  2332. mov ebx,Mask00+64 ; 0Q Fetch bidi-prediction mask.
  2333. add edx,ecx ; 4N
  2334. mov ecx,P00+64 ; 0R Fetch future pred for bidi predicted pels.
  2335. shr edx,1 ; 4O
  2336. and ebx,eax ; 0S Extract past for bidi predicted pels.
  2337. and edx,07F7F7F7FH ; 4P
  2338. mov ebp,Mask04+64 ; 4Q
  2339. lea eax,[ecx+eax*2] ; 0T (2*Past) or ((2*Past+Future) for each pel.
  2340. mov ecx,P04+64 ; 4R
  2341. sub eax,ebx ; 0U (2*Past) or (Past+Future) for each pel.
  2342. and ebp,edx ; 4S
  2343. shr eax,1 ; 0V (Past) or ((Past+Future)/2) (dirty).
  2344. lea edx,[ecx+edx*2] ; 4T
  2345. and eax,07F7F7F7FH ; 0W (Past) or ((Past+Future)/2) (clean).
  2346. sub edx,ebp ; 4U
  2347. shr edx,1 ; 4V
  2348. mov ebx,[edi] ; 0X Fetch target pels.
  2349. and edx,07F7F7F7FH ; 4W
  2350. mov ebp,[edi+4] ; 4X
  2351. sub ebx,eax ; 0Y Compute correction.
  2352. sub ebp,edx ; 4Y
  2353. add ebx,080808080H ; 0Z Bias correction.
  2354. add ebp,080808080H ; 4Z
  2355. mov P00+64,ebx ; 0a Store correction.
  2356. mov P04+64,ebp ; 4a
  2357. add esi,PITCH
  2358. add esp,8
  2359. test esp,000000038H
  2360. lea edi,[edi+PITCH]
  2361. jne @b
  2362. xor ebx,ebx
  2363. xor ecx,ecx
  2364. mov bl,P00 ; Fetch P0.
  2365. mov cl,P03 ; Fetch P3.
  2366. jmp DoForwardDCT
  2367. ;; end of section of code not define when H261 defined
  2368. ENDIF
  2369. InterOrOBMCBlock:
  2370. mov esi,TargetFrameBaseAddress
  2371. mov edi,[eax-4].T_Blk.BlkOffset ; Compute Addr of Target block.
  2372. IFNDEF H261
  2373. ;; H261 does not execute the OBMC code so it is included only when H261 is not defined
  2374. ;;
  2375. test eax,8
  2376. jne OBMCBlock
  2377. ENDIF
  2378. add edi,esi
  2379. mov esi,[eax-4].T_Blk.PastRef ; Addr of PrevRef block.
  2380. mov eax,[eax-4].T_Blk.MVs ; al = Horz MV; ah = Vert MV
  2381. mov ecx,080808080H
  2382. IFNDEF H261
  2383. ;; H261 does not execute Interp code so it is included only when H261 is not defined
  2384. ;;
  2385. test al,1
  2386. jne InterpHorzOrBoth
  2387. ENDIF
  2388. lea edx,[ebp+ebp*2]
  2389. lea ebx,[esi+ebp]
  2390. test ah,1
  2391. je NoInterp
  2392. IFNDEF H261
  2393. ;; H261 does not execute Interp code so it is included only when H261 is not defined
  2394. ;;
  2395. InterpVert:
  2396. InterpHorz:
  2397. ; Register usage:
  2398. ; edi -- Address of target block.
  2399. ; esi -- Address of reference block.
  2400. ; ebx -- Address of reference plus either 1 or PITCH, for interpolation.
  2401. ; ebp, edx, ecx, eax -- Scratch.
  2402. sub esp,16
  2403. @@:
  2404. add esp,4
  2405. mov eax,[esi] ; 0A <P03 P02 P01 P00> prediction pels.
  2406. mov ecx,[ebx] ; 0B <P04 ...> or <P13 ...> prediction pels.
  2407. mov edx,[edi] ; 0C <C03 C02 C01 C00> current pels.
  2408. add edx,080808080H ; 0D Add bias.
  2409. mov ebp,[esi+PITCH*2] ; 2A
  2410. lea eax,[eax+ecx+001010101H]; 0E Sum of pred pels to interpolate.
  2411. mov ecx,[ebx+PITCH*2] ; 2B
  2412. shr eax,1 ; 0F Average of prediction pels (dirty).
  2413. and eax,07F7F7F7FH ; 0G Average of prediction pels (clean).
  2414. lea ebp,[ebp+ecx+001010101H]; 2E
  2415. sub edx,eax ; 0H Current - interpolated prediction, biased.
  2416. mov eax,[edi+PITCH*2] ; 2C
  2417. mov P00+12,edx ; 0I Save correction.
  2418. add eax,080808080H ; 2D
  2419. shr ebp,1 ; 2F
  2420. mov edx,[esi+PITCH*4] ; 4A
  2421. and ebp,07F7F7F7FH ; 2G
  2422. mov ecx,[ebx+PITCH*4] ; 4B
  2423. sub eax,ebp ; 2H
  2424. mov ebp,[edi+PITCH*4] ; 4C
  2425. mov P20+12,eax ; 2I
  2426. lea ecx,[ecx+edx+001010101H]; 4E
  2427. shr ecx,1 ; 4F
  2428. add ebp,080808080H ; 4D
  2429. and ecx,07F7F7F7FH ; 4G
  2430. mov eax,[esi+PITCH*6] ; 6A
  2431. sub ebp,ecx ; 4H
  2432. mov ecx,[ebx+PITCH*6] ; 6B
  2433. mov P40+12,ebp ; 4I
  2434. mov ebp,[edi+PITCH*6] ; 6C
  2435. lea ecx,[ecx+eax+001010101H]; 6E
  2436. add ebp,080808080H ; 6D
  2437. shr ecx,1 ; 6F
  2438. add esi,4
  2439. and ecx,07F7F7F7FH ; 6G
  2440. add ebx,4
  2441. sub ebp,ecx ; 6H
  2442. add edi,4
  2443. test esp,4
  2444. mov P60+12,ebp ; 6I
  2445. je @b
  2446. add esi,PITCH-8
  2447. add edi,PITCH-8
  2448. test esp,8
  2449. lea ebx,[ebx+PITCH-8]
  2450. jne @b
  2451. xor ebx,ebx
  2452. xor ecx,ecx
  2453. mov bl,P00 ; Fetch P0.
  2454. mov cl,P03 ; Fetch P3.
  2455. jmp DoForwardDCT
  2456. InterpHorzOrBoth:
  2457. lea ebx,[esi+1]
  2458. test ah,1
  2459. je InterpHorz
  2460. InterpBoth:
  2461. ; Register usage:
  2462. ; edi -- Address of target block.
  2463. ; esi -- Address of reference block.
  2464. ; ecx -- bias value 0x80808080, to make code size smaller.
  2465. ; ebp -- Pitch and scratch.
  2466. ; edx, ebx, eax -- Scratch.
  2467. sub esp,64
  2468. @@:
  2469. mov eax,[esi+1] ; <P04 P03 P02 P01> prediction pels.
  2470. lea edx,[ecx*2+1] ; Get 001010101H mask.
  2471. mov ebx,[esi] ; <P03 P02 P01 P00>.
  2472. add edi,4 ; Pre-increment target block pointer.
  2473. add eax,ebx ; <P04+P03 P03+P02 P02+P01 P01+P00>.
  2474. mov ebx,[esi+ebp*1+1] ; <P14 P13 P12 P11>.
  2475. and edx,eax ; <(P04+P03)&1 ...>.
  2476. mov ebp,[esi+ebp*1] ; <P13 P12 P11 P10>.
  2477. xor eax,edx ; Clear insignificant fractional bit in each byte.
  2478. add ebx,ebp ; <P14+P13 P13+P12 P12+P11 P11+P10>.
  2479. shr eax,1 ; <(P04+P03)/2 ...>.
  2480. add ebx,edx ; <P14+P13+((P04+P03)&1) ...>.
  2481. shr ebx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  2482. add esi,4 ; Advance reference block pointer.
  2483. and ebx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  2484. lea eax,[eax+ecx*2+1] ; <(P04+P03)/2+1 ...>.
  2485. add eax,ebx ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  2486. mov ebx,[edi-4] ; <C03 C02 C01 C00> current pels.
  2487. shr eax,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  2488. add ebx,ecx ; Add bias.
  2489. and eax,07F7F7F7FH ; Interpolated prediction.
  2490. add esp,4 ; Advance frame difference pointer.
  2491. sub ebx,eax ; Correction.
  2492. mov ebp,PITCH ; Reload Pitch.
  2493. test esp,4
  2494. mov P00+60,ebx ; Save correction.
  2495. je @b
  2496. lea esi,[esi+ebp-8]
  2497. xor ebx,ebx
  2498. test esp,000000038H
  2499. lea edi,[edi+ebp-8]
  2500. jne @b
  2501. mov bl,P00 ; Fetch P0.
  2502. xor ecx,ecx
  2503. mov cl,P03 ; Fetch P3.
  2504. jmp DoForwardDCT
  2505. OBMCBlock: ; Do OBMC frame differencing. OBMC prediction computed above.
  2506. mov ecx,080808080H
  2507. mov edi,[eax-12].T_Blk.BlkOffset ; Compute Addr of Target block.
  2508. add edi,esi
  2509. mov esi,[eax-12].T_Blk.PastRef ; Addr of PrevRef block.
  2510. lea edx,[ebp+ebp*2]
  2511. lea ebx,[esi+ebp]
  2512. ;; end of section of code not included when H261 defined
  2513. ENDIF
  2514. NoInterp:
  2515. ; Register usage:
  2516. ; edi -- Address of target block.
  2517. ; esi -- Address of reference block.
  2518. ; ebp -- Pitch.
  2519. ; edx -- Pitch times 3.
  2520. ; ecx -- bias value 0x80808080, to make code size smaller.
  2521. ; ebx, eax -- Scratch.
  2522. @@:
  2523. xor esp,4 ; 1st time: Back off to cache line;
  2524. mov eax,[edi] ; 0A <C3 C2 C1 C0> current pels.
  2525. add eax,ecx ; 0C Add bias.
  2526. mov ebx,[esi] ; 0B <P3 P2 P1 P0> prediction pels.
  2527. sub eax,ebx ; 0D <Cn-Pn> Current - pred, biased.
  2528. mov ebx,[esi+ebp*1] ; 1B
  2529. mov P00+4,eax ; 0E Save <Corr3 Corr2 Corr1 Corr0>
  2530. mov eax,[edi+ebp*1] ; 1A
  2531. sub eax,ebx ; 1D
  2532. mov ebx,[esi+ebp*2] ; 2B
  2533. add eax,ecx ; 1C
  2534. sub ebx,ecx ; 2C
  2535. mov P10+4,eax ; 1E
  2536. mov eax,[edi+ebp*2] ; 2A
  2537. sub eax,ebx ; 2D
  2538. mov ebx,[esi+ebp*4] ; 4B
  2539. mov P20+4,eax ; 2E
  2540. mov eax,[edi+ebp*4] ; 4A
  2541. sub eax,ebx ; 4D
  2542. mov ebx,[esi+edx*1] ; 3B
  2543. add eax,ecx ; 4C
  2544. sub ebx,ecx ; 3C
  2545. mov P40+4,eax ; 4E
  2546. mov eax,[edi+edx*1] ; 3A
  2547. sub eax,ebx ; 3D
  2548. mov ebx,[esi+edx*2] ; 6B
  2549. mov P30+4,eax ; 3E
  2550. lea esi,[esi+ebp+4] ; Advance to line 1.
  2551. mov eax,[edi+edx*2] ; 6A
  2552. lea edi,[edi+ebp+4] ; Advance to line 1.
  2553. sub eax,ebx ; 6D
  2554. mov ebx,[esi+ebp*4-4] ; 5B
  2555. add eax,ecx ; 6C
  2556. sub ebx,ecx ; 5C
  2557. mov P60+4,eax ; 6E
  2558. mov eax,[edi+ebp*4-4] ; 5A
  2559. sub eax,ebx ; 5D
  2560. mov ebx,[esi+edx*2-4] ; 7B
  2561. mov P50+4,eax ; 5E
  2562. mov eax,[edi+edx*2-4] ; 7A
  2563. sub eax,ebx ; 7D
  2564. sub edi,ebp ; Back off to line 0.
  2565. add eax,ecx ; 7C
  2566. sub esi,ebp ; Back off to line 0.
  2567. test esp,4 ; Do twice.
  2568. mov P70+4,eax ; 7E
  2569. je @b
  2570. xor ecx,ecx
  2571. xor ebx,ebx
  2572. mov bl,P00 ; Fetch P0.
  2573. mov cl,P03 ; Fetch P3.
  2574. DoForwardDCT:
  2575. ;=============================================================================
  2576. ;
  2577. ; This section does the Forward Discrete Cosine Transform. It performs a DCT
  2578. ; on a 8*8 block of pels or pel differences. The row transforms are done
  2579. ; first using a table lookup method. Then the columns are done, using
  2580. ; computation.
  2581. ;
  2582. ;
  2583. ; Each intermediate and coefficient is a short. There are four fractional
  2584. ; bits. All coefficients except an intrablock's DC are biased by 08000H.
  2585. ; Perform row transforms.
  2586. ;
  2587. ; Register usage:
  2588. ; ebp - Accumulator for contributions to intermediates I0 (hi) and I2 (lo).
  2589. ; edi - Accumulator for contributions to intermediates I1 (hi) and I3 (lo).
  2590. ; esi - Accumulator for contributions to intermediates I4 (hi) and I6 (lo).
  2591. ; edx - Accumulator for contributions to intermediates I7 (hi) and I5 (lo).
  2592. ; ecx - Pel or pel difference.
  2593. ; ebx - Pel or pel difference.
  2594. ; eax - Place in which to fetch a pel's contribution to two intermediates.
  2595. mov esi,PD P80000_P4545F [ebx*8] ; P0's contribution to I4|I6.
  2596. mov eax,PD P80000_N4545F [ecx*8] ; P3's contribution to I4|I6.
  2597. mov edx,PD P2350B_P6491A [ebx*8] ; P0's contribution to I7|I5.
  2598. mov edi,PD NB18A8_P96831 [ecx*8] ; P3's contribution to I7|I5.
  2599. lea esi,[esi+eax+40004000H] ; P0, P3 contribs to I4|I6, biased.
  2600. mov eax,PD P80000_NA73D7 [ecx*8] ; P3's contribution to I0|I2.
  2601. lea edx,[edx+edi+40004000H] ; P0, P3 contribs to I7|I5, biased.
  2602. mov ebp,PD P80000_PA73D7 [ebx*8] ; P0's contribution to I0|I2.
  2603. mov edi,PD P2350B_N6491A [ecx*8] ; P3's contribution to I1|I3.
  2604. mov cl,P01 ; Fetch P1.
  2605. lea ebp,[ebp+eax+40004000H] ; P0, P3 contribs to I0|I2, biased.
  2606. mov eax,PD NB18A8_N96831 [ebx*8] ; P0's contribution to I1|I3.
  2607. sub edi,eax ; P0, P3 contribs to I1|I3, unbiased.
  2608. mov eax,PD P80000_P4545F [ecx*8] ; P1's contribution to I0|I2.
  2609. add ebp,eax ; P0, P1, P3 contribs to I0|I2.
  2610. mov eax,PD N96831_P2350B [ecx*8] ; P1's contribution to I1|I3.
  2611. sub edi,eax ; P0, P1, P3 contribs to I1|I3, unbiased.
  2612. mov eax,PD P80000_PA73D7 [ecx*8] ; P1's contribution to I4|I6.
  2613. sub esi,eax ; P0, P1, P3 contribs to I4|I6.
  2614. mov bl,P02 ; Fetch P2.
  2615. mov eax,PD P6491A_PB18A8 [ecx*8] ; P1's contribution to I7|I5.
  2616. mov cl,P04 ; Fetch P4.
  2617. sub edx,eax ; P0, P1, P3 contribs to I7|I5.
  2618. mov eax,PD P80000_N4545F [ebx*8] ; P2's contribution to I0|I2.
  2619. add ebp,eax ; P0-P3 contribs to I0|I2.
  2620. mov eax,PD P6491A_NB18A8 [ebx*8] ; P2's contribution to I1|I3.
  2621. add edi,eax ; P0-P3 contribs to I1|I3, unbiased.
  2622. mov eax,PD P80000_NA73D7 [ebx*8] ; P2's contribution to I4|I6.
  2623. sub esi,eax ; P0-P3 contribs to I4|I6.
  2624. mov eax,PD N96831_N2350B [ebx*8] ; P2's contribution to I7|I5.
  2625. sub edx,eax ; P0-P3 contribs to I7|I5.
  2626. mov eax,PD P80000_NA73D7 [ecx*8] ; P4's contribution to I0|I2.
  2627. add ebp,eax ; P0-P4 contribs to I0|I2.
  2628. mov eax,PD P2350B_N6491A [ecx*8] ; P4's contribution to I1|I3.
  2629. sub edi,eax ; P0-P4 contribs to I1|I3, unbiased.
  2630. mov eax,PD P80000_N4545F [ecx*8] ; P4's contribution to I4|I6.
  2631. add esi,eax ; P0-P4 contribs to I4|I6.
  2632. mov bl,P05 ; Fetch P5.
  2633. mov eax,PD NB18A8_P96831 [ecx*8] ; P4's contribution to I7|I5.
  2634. mov cl,P06 ; Fetch P6.
  2635. sub edx,eax ; P0-P4 contribs to I7|I5.
  2636. mov eax,PD P80000_N4545F [ebx*8] ; P5's contribution to I0|I2.
  2637. add ebp,eax ; P0-P5 contribs to I0|I2.
  2638. mov eax,PD P6491A_NB18A8 [ebx*8] ; P5's contribution to I1|I3.
  2639. sub edi,eax ; P0-P5 contribs to I1|I3.
  2640. mov eax,PD P80000_NA73D7 [ebx*8] ; P5's contribution to I4|I6.
  2641. sub esi,eax ; P0-P5 contribs to I4|I6.
  2642. mov eax,PD N96831_N2350B [ebx*8] ; P5's contribution to I7|I5.
  2643. add edx,eax ; P0-P5 contribs to I3|I4.
  2644. mov eax,PD P80000_P4545F [ecx*8] ; P6's contribution to I0|I2.
  2645. add ebp,eax ; P0-P6 contribs to I0|I2.
  2646. mov eax,PD N96831_P2350B [ecx*8] ; P6's contribution to I1|I3.
  2647. add edi,eax ; P0-P6 contribs to I1|I3, unbiased.
  2648. mov eax,PD P80000_PA73D7 [ecx*8] ; P6's contribution to I4|I6.
  2649. sub esi,eax ; P0-P6 contribs to I4|I6.
  2650. mov bl,P07 ; Fetch P7.
  2651. mov eax,PD P6491A_PB18A8 [ecx*8] ; P6's contribution to I7|I5.
  2652. mov cl,P13 ; Fetch P0.
  2653. add edx,eax ; P0-P6 contribs to I7|I5.
  2654. mov eax,PD P80000_PA73D7 [ebx*8] ; P7's contribution to I0|I2.
  2655. add ebp,eax ; P0-P7 contribs to I0|I2.
  2656. mov eax,PD P80000_P4545F [ebx*8] ; P7's contribution to I4|I6.
  2657. add esi,eax ; P0-P7 contribs to I4|I6.
  2658. mov eax,PD NB18A8_N96831 [ebx*8] ; P7's contribution to I1|I3.
  2659. mov I00I02,ebp ; Store I0|I2 for line 0.
  2660. mov I04I06,esi ; Store I4|I6 for line 0.
  2661. lea edi,[edi+eax+40004000H] ; P0-P7 contribs to I1|I3, biased.
  2662. mov eax,PD P2350B_P6491A [ebx*8] ; P7's contribution to I7|I5.
  2663. sub edx,eax ; P0-P7 contribs to I7|I5.
  2664. mov bl,P10 ; Fetch P3 of line 1.
  2665. mov I01I03,edi ; Store I1|I3 for line 0.
  2666. mov I07I05,edx ; Store I7|I5 for line 0.
  2667. mov esi,PD P80000_P4545F [ebx*8]
  2668. mov eax,PD P80000_N4545F [ecx*8]
  2669. mov edx,PD P2350B_P6491A [ebx*8]
  2670. mov edi,PD NB18A8_P96831 [ecx*8]
  2671. lea esi,[esi+eax+40004000H]
  2672. mov eax,PD P80000_NA73D7 [ecx*8]
  2673. lea edx,[edx+edi+40004000H]
  2674. mov ebp,PD P80000_PA73D7 [ebx*8]
  2675. mov edi,PD P2350B_N6491A [ecx*8]
  2676. mov cl,P11
  2677. lea ebp,[ebp+eax+40004000H]
  2678. mov eax,PD NB18A8_N96831 [ebx*8]
  2679. sub edi,eax
  2680. mov eax,PD P80000_P4545F [ecx*8]
  2681. add ebp,eax
  2682. mov eax,PD N96831_P2350B [ecx*8]
  2683. sub edi,eax
  2684. mov eax,PD P80000_PA73D7 [ecx*8]
  2685. sub esi,eax
  2686. mov bl,P12
  2687. mov eax,PD P6491A_PB18A8 [ecx*8]
  2688. mov cl,P14
  2689. sub edx,eax
  2690. mov eax,PD P80000_N4545F [ebx*8]
  2691. add ebp,eax
  2692. mov eax,PD P6491A_NB18A8 [ebx*8]
  2693. add edi,eax
  2694. mov eax,PD P80000_NA73D7 [ebx*8]
  2695. sub esi,eax
  2696. mov eax,PD N96831_N2350B [ebx*8]
  2697. sub edx,eax
  2698. mov eax,PD P80000_NA73D7 [ecx*8]
  2699. add ebp,eax
  2700. mov eax,PD P2350B_N6491A [ecx*8]
  2701. sub edi,eax
  2702. mov eax,PD P80000_N4545F [ecx*8]
  2703. add esi,eax
  2704. mov bl,P15
  2705. mov eax,PD NB18A8_P96831 [ecx*8]
  2706. mov cl,P16
  2707. sub edx,eax
  2708. mov eax,PD P80000_N4545F [ebx*8]
  2709. add ebp,eax
  2710. mov eax,PD P6491A_NB18A8 [ebx*8]
  2711. sub edi,eax
  2712. mov eax,PD P80000_NA73D7 [ebx*8]
  2713. sub esi,eax
  2714. mov eax,PD N96831_N2350B [ebx*8]
  2715. add edx,eax
  2716. mov eax,PD P80000_P4545F [ecx*8]
  2717. add ebp,eax
  2718. mov eax,PD N96831_P2350B [ecx*8]
  2719. add edi,eax
  2720. mov eax,PD P80000_PA73D7 [ecx*8]
  2721. sub esi,eax
  2722. mov bl,P17
  2723. mov eax,PD P6491A_PB18A8 [ecx*8]
  2724. mov cl,P23
  2725. add edx,eax
  2726. mov eax,PD P80000_PA73D7 [ebx*8]
  2727. add ebp,eax
  2728. mov eax,PD P80000_P4545F [ebx*8]
  2729. add esi,eax
  2730. mov eax,PD NB18A8_N96831 [ebx*8]
  2731. mov I10I12,ebp
  2732. mov I14I16,esi
  2733. lea edi,[edi+eax+40004000H]
  2734. mov eax,PD P2350B_P6491A [ebx*8]
  2735. sub edx,eax
  2736. mov bl,P20
  2737. mov I11I13,edi
  2738. mov I17I15,edx
  2739. mov esi,PD P80000_P4545F [ebx*8]
  2740. mov eax,PD P80000_N4545F [ecx*8]
  2741. mov edx,PD P2350B_P6491A [ebx*8]
  2742. mov edi,PD NB18A8_P96831 [ecx*8]
  2743. lea esi,[esi+eax+40004000H]
  2744. mov eax,PD P80000_NA73D7 [ecx*8]
  2745. lea edx,[edx+edi+40004000H]
  2746. mov ebp,PD P80000_PA73D7 [ebx*8]
  2747. mov edi,PD P2350B_N6491A [ecx*8]
  2748. mov cl,P21
  2749. lea ebp,[ebp+eax+40004000H]
  2750. mov eax,PD NB18A8_N96831 [ebx*8]
  2751. sub edi,eax
  2752. mov eax,PD P80000_P4545F [ecx*8]
  2753. add ebp,eax
  2754. mov eax,PD N96831_P2350B [ecx*8]
  2755. sub edi,eax
  2756. mov eax,PD P80000_PA73D7 [ecx*8]
  2757. sub esi,eax
  2758. mov bl,P22
  2759. mov eax,PD P6491A_PB18A8 [ecx*8]
  2760. mov cl,P24
  2761. sub edx,eax
  2762. mov eax,PD P80000_N4545F [ebx*8]
  2763. add ebp,eax
  2764. mov eax,PD P6491A_NB18A8 [ebx*8]
  2765. add edi,eax
  2766. mov eax,PD P80000_NA73D7 [ebx*8]
  2767. sub esi,eax
  2768. mov eax,PD N96831_N2350B [ebx*8]
  2769. sub edx,eax
  2770. mov eax,PD P80000_NA73D7 [ecx*8]
  2771. add ebp,eax
  2772. mov eax,PD P2350B_N6491A [ecx*8]
  2773. sub edi,eax
  2774. mov eax,PD P80000_N4545F [ecx*8]
  2775. add esi,eax
  2776. mov bl,P25
  2777. mov eax,PD NB18A8_P96831 [ecx*8]
  2778. mov cl,P26
  2779. sub edx,eax
  2780. mov eax,PD P80000_N4545F [ebx*8]
  2781. add ebp,eax
  2782. mov eax,PD P6491A_NB18A8 [ebx*8]
  2783. sub edi,eax
  2784. mov eax,PD P80000_NA73D7 [ebx*8]
  2785. sub esi,eax
  2786. mov eax,PD N96831_N2350B [ebx*8]
  2787. add edx,eax
  2788. mov eax,PD P80000_P4545F [ecx*8]
  2789. add ebp,eax
  2790. mov eax,PD N96831_P2350B [ecx*8]
  2791. add edi,eax
  2792. mov eax,PD P80000_PA73D7 [ecx*8]
  2793. sub esi,eax
  2794. mov bl,P27
  2795. mov eax,PD P6491A_PB18A8 [ecx*8]
  2796. mov cl,P33
  2797. add edx,eax
  2798. mov eax,PD P80000_PA73D7 [ebx*8]
  2799. add ebp,eax
  2800. mov eax,PD P80000_P4545F [ebx*8]
  2801. add esi,eax
  2802. mov eax,PD NB18A8_N96831 [ebx*8]
  2803. mov I20I22,ebp
  2804. mov I24I26,esi
  2805. lea edi,[edi+eax+40004000H]
  2806. mov eax,PD P2350B_P6491A [ebx*8]
  2807. sub edx,eax
  2808. mov bl,P30
  2809. mov I21I23,edi
  2810. mov I27I25,edx
  2811. mov esi,PD P80000_P4545F [ebx*8]
  2812. mov eax,PD P80000_N4545F [ecx*8]
  2813. mov edx,PD P2350B_P6491A [ebx*8]
  2814. mov edi,PD NB18A8_P96831 [ecx*8]
  2815. lea esi,[esi+eax+40004000H]
  2816. mov eax,PD P80000_NA73D7 [ecx*8]
  2817. lea edx,[edx+edi+40004000H]
  2818. mov ebp,PD P80000_PA73D7 [ebx*8]
  2819. mov edi,PD P2350B_N6491A [ecx*8]
  2820. mov cl,P31
  2821. lea ebp,[ebp+eax+40004000H]
  2822. mov eax,PD NB18A8_N96831 [ebx*8]
  2823. sub edi,eax
  2824. mov eax,PD P80000_P4545F [ecx*8]
  2825. add ebp,eax
  2826. mov eax,PD N96831_P2350B [ecx*8]
  2827. sub edi,eax
  2828. mov eax,PD P80000_PA73D7 [ecx*8]
  2829. sub esi,eax
  2830. mov bl,P32
  2831. mov eax,PD P6491A_PB18A8 [ecx*8]
  2832. mov cl,P34
  2833. sub edx,eax
  2834. mov eax,PD P80000_N4545F [ebx*8]
  2835. add ebp,eax
  2836. mov eax,PD P6491A_NB18A8 [ebx*8]
  2837. add edi,eax
  2838. mov eax,PD P80000_NA73D7 [ebx*8]
  2839. sub esi,eax
  2840. mov eax,PD N96831_N2350B [ebx*8]
  2841. sub edx,eax
  2842. mov eax,PD P80000_NA73D7 [ecx*8]
  2843. add ebp,eax
  2844. mov eax,PD P2350B_N6491A [ecx*8]
  2845. sub edi,eax
  2846. mov eax,PD P80000_N4545F [ecx*8]
  2847. add esi,eax
  2848. mov bl,P35
  2849. mov eax,PD NB18A8_P96831 [ecx*8]
  2850. mov cl,P36
  2851. sub edx,eax
  2852. mov eax,PD P80000_N4545F [ebx*8]
  2853. add ebp,eax
  2854. mov eax,PD P6491A_NB18A8 [ebx*8]
  2855. sub edi,eax
  2856. mov eax,PD P80000_NA73D7 [ebx*8]
  2857. sub esi,eax
  2858. mov eax,PD N96831_N2350B [ebx*8]
  2859. add edx,eax
  2860. mov eax,PD P80000_P4545F [ecx*8]
  2861. add ebp,eax
  2862. mov eax,PD N96831_P2350B [ecx*8]
  2863. add edi,eax
  2864. mov eax,PD P80000_PA73D7 [ecx*8]
  2865. sub esi,eax
  2866. mov bl,P37
  2867. mov eax,PD P6491A_PB18A8 [ecx*8]
  2868. mov cl,P43
  2869. add edx,eax
  2870. mov eax,PD P80000_PA73D7 [ebx*8]
  2871. add ebp,eax
  2872. mov eax,PD P80000_P4545F [ebx*8]
  2873. add esi,eax
  2874. mov eax,PD NB18A8_N96831 [ebx*8]
  2875. mov I30I32,ebp
  2876. mov I34I36,esi
  2877. lea edi,[edi+eax+40004000H]
  2878. mov eax,PD P2350B_P6491A [ebx*8]
  2879. sub edx,eax
  2880. mov bl,P40
  2881. mov I31I33,edi
  2882. mov I37I35,edx
  2883. mov esi,PD P80000_P4545F [ebx*8]
  2884. mov eax,PD P80000_N4545F [ecx*8]
  2885. mov edx,PD P2350B_P6491A [ebx*8]
  2886. mov edi,PD NB18A8_P96831 [ecx*8]
  2887. add esi,eax
  2888. mov eax,PD P80000_NA73D7 [ecx*8]
  2889. add edx,edi
  2890. mov ebp,PD P80000_PA73D7 [ebx*8]
  2891. mov edi,PD P2350B_N6491A [ecx*8]
  2892. mov cl,P41
  2893. add ebp,eax
  2894. mov eax,PD NB18A8_N96831 [ebx*8]
  2895. sub edi,eax
  2896. mov eax,PD P80000_P4545F [ecx*8]
  2897. add ebp,eax
  2898. mov eax,PD N96831_P2350B [ecx*8]
  2899. sub edi,eax
  2900. mov eax,PD P80000_PA73D7 [ecx*8]
  2901. sub esi,eax
  2902. mov bl,P42
  2903. mov eax,PD P6491A_PB18A8 [ecx*8]
  2904. mov cl,P44
  2905. sub edx,eax
  2906. mov eax,PD P80000_N4545F [ebx*8]
  2907. add ebp,eax
  2908. mov eax,PD P6491A_NB18A8 [ebx*8]
  2909. add edi,eax
  2910. mov eax,PD P80000_NA73D7 [ebx*8]
  2911. sub esi,eax
  2912. mov eax,PD N96831_N2350B [ebx*8]
  2913. sub edx,eax
  2914. mov eax,PD P80000_NA73D7 [ecx*8]
  2915. add ebp,eax
  2916. mov eax,PD P2350B_N6491A [ecx*8]
  2917. sub edi,eax
  2918. mov eax,PD P80000_N4545F [ecx*8]
  2919. add esi,eax
  2920. mov bl,P45
  2921. mov eax,PD NB18A8_P96831 [ecx*8]
  2922. mov cl,P46
  2923. sub edx,eax
  2924. mov eax,PD P80000_N4545F [ebx*8]
  2925. add ebp,eax
  2926. mov eax,PD P6491A_NB18A8 [ebx*8]
  2927. sub edi,eax
  2928. mov eax,PD P80000_NA73D7 [ebx*8]
  2929. sub esi,eax
  2930. mov eax,PD N96831_N2350B [ebx*8]
  2931. add edx,eax
  2932. mov eax,PD P80000_P4545F [ecx*8]
  2933. add ebp,eax
  2934. mov eax,PD N96831_P2350B [ecx*8]
  2935. add edi,eax
  2936. mov eax,PD P80000_PA73D7 [ecx*8]
  2937. sub esi,eax
  2938. mov bl,P47
  2939. mov eax,PD P6491A_PB18A8 [ecx*8]
  2940. mov cl,P53
  2941. add edx,eax
  2942. mov eax,PD P80000_PA73D7 [ebx*8]
  2943. add ebp,eax
  2944. mov eax,PD P80000_P4545F [ebx*8]
  2945. add esi,eax
  2946. mov eax,PD NB18A8_N96831 [ebx*8]
  2947. mov I40I42,ebp
  2948. mov I44I46,esi
  2949. add edi,eax
  2950. mov eax,PD P2350B_P6491A [ebx*8]
  2951. sub edx,eax
  2952. mov bl,P50
  2953. mov I41I43,edi
  2954. mov I47I45,edx
  2955. mov esi,PD P80000_P4545F [ebx*8]
  2956. mov eax,PD P80000_N4545F [ecx*8]
  2957. mov edx,PD P2350B_P6491A [ebx*8]
  2958. mov edi,PD NB18A8_P96831 [ecx*8]
  2959. add esi,eax
  2960. mov eax,PD P80000_NA73D7 [ecx*8]
  2961. add edx,edi
  2962. mov ebp,PD P80000_PA73D7 [ebx*8]
  2963. mov edi,PD P2350B_N6491A [ecx*8]
  2964. mov cl,P51
  2965. add ebp,eax
  2966. mov eax,PD NB18A8_N96831 [ebx*8]
  2967. sub edi,eax
  2968. mov eax,PD P80000_P4545F [ecx*8]
  2969. add ebp,eax
  2970. mov eax,PD N96831_P2350B [ecx*8]
  2971. sub edi,eax
  2972. mov eax,PD P80000_PA73D7 [ecx*8]
  2973. sub esi,eax
  2974. mov bl,P52
  2975. mov eax,PD P6491A_PB18A8 [ecx*8]
  2976. mov cl,P54
  2977. sub edx,eax
  2978. mov eax,PD P80000_N4545F [ebx*8]
  2979. add ebp,eax
  2980. mov eax,PD P6491A_NB18A8 [ebx*8]
  2981. add edi,eax
  2982. mov eax,PD P80000_NA73D7 [ebx*8]
  2983. sub esi,eax
  2984. mov eax,PD N96831_N2350B [ebx*8]
  2985. sub edx,eax
  2986. mov eax,PD P80000_NA73D7 [ecx*8]
  2987. add ebp,eax
  2988. mov eax,PD P2350B_N6491A [ecx*8]
  2989. sub edi,eax
  2990. mov eax,PD P80000_N4545F [ecx*8]
  2991. add esi,eax
  2992. mov bl,P55
  2993. mov eax,PD NB18A8_P96831 [ecx*8]
  2994. mov cl,P56
  2995. sub edx,eax
  2996. mov eax,PD P80000_N4545F [ebx*8]
  2997. add ebp,eax
  2998. mov eax,PD P6491A_NB18A8 [ebx*8]
  2999. sub edi,eax
  3000. mov eax,PD P80000_NA73D7 [ebx*8]
  3001. sub esi,eax
  3002. mov eax,PD N96831_N2350B [ebx*8]
  3003. add edx,eax
  3004. mov eax,PD P80000_P4545F [ecx*8]
  3005. add ebp,eax
  3006. mov eax,PD N96831_P2350B [ecx*8]
  3007. add edi,eax
  3008. mov eax,PD P80000_PA73D7 [ecx*8]
  3009. sub esi,eax
  3010. mov bl,P57
  3011. mov eax,PD P6491A_PB18A8 [ecx*8]
  3012. mov cl,P63
  3013. add edx,eax
  3014. mov eax,PD P80000_PA73D7 [ebx*8]
  3015. add ebp,eax
  3016. mov eax,PD P80000_P4545F [ebx*8]
  3017. add esi,eax
  3018. mov eax,PD NB18A8_N96831 [ebx*8]
  3019. mov I50I52,ebp
  3020. mov I54I56,esi
  3021. add edi,eax
  3022. mov eax,PD P2350B_P6491A [ebx*8]
  3023. sub edx,eax
  3024. mov bl,P60
  3025. mov I51I53,edi
  3026. mov I57I55,edx
  3027. mov esi,PD P80000_P4545F [ebx*8]
  3028. mov eax,PD P80000_N4545F [ecx*8]
  3029. mov edx,PD P2350B_P6491A [ebx*8]
  3030. mov edi,PD NB18A8_P96831 [ecx*8]
  3031. add esi,eax
  3032. mov eax,PD P80000_NA73D7 [ecx*8]
  3033. add edx,edi
  3034. mov ebp,PD P80000_PA73D7 [ebx*8]
  3035. mov edi,PD P2350B_N6491A [ecx*8]
  3036. mov cl,P61
  3037. add ebp,eax
  3038. mov eax,PD NB18A8_N96831 [ebx*8]
  3039. sub edi,eax
  3040. mov eax,PD P80000_P4545F [ecx*8]
  3041. add ebp,eax
  3042. mov eax,PD N96831_P2350B [ecx*8]
  3043. sub edi,eax
  3044. mov eax,PD P80000_PA73D7 [ecx*8]
  3045. sub esi,eax
  3046. mov bl,P62
  3047. mov eax,PD P6491A_PB18A8 [ecx*8]
  3048. mov cl,P64
  3049. sub edx,eax
  3050. mov eax,PD P80000_N4545F [ebx*8]
  3051. add ebp,eax
  3052. mov eax,PD P6491A_NB18A8 [ebx*8]
  3053. add edi,eax
  3054. mov eax,PD P80000_NA73D7 [ebx*8]
  3055. sub esi,eax
  3056. mov eax,PD N96831_N2350B [ebx*8]
  3057. sub edx,eax
  3058. mov eax,PD P80000_NA73D7 [ecx*8]
  3059. add ebp,eax
  3060. mov eax,PD P2350B_N6491A [ecx*8]
  3061. sub edi,eax
  3062. mov eax,PD P80000_N4545F [ecx*8]
  3063. add esi,eax
  3064. mov bl,P65
  3065. mov eax,PD NB18A8_P96831 [ecx*8]
  3066. mov cl,P66
  3067. sub edx,eax
  3068. mov eax,PD P80000_N4545F [ebx*8]
  3069. add ebp,eax
  3070. mov eax,PD P6491A_NB18A8 [ebx*8]
  3071. sub edi,eax
  3072. mov eax,PD P80000_NA73D7 [ebx*8]
  3073. sub esi,eax
  3074. mov eax,PD N96831_N2350B [ebx*8]
  3075. add edx,eax
  3076. mov eax,PD P80000_P4545F [ecx*8]
  3077. add ebp,eax
  3078. mov eax,PD N96831_P2350B [ecx*8]
  3079. add edi,eax
  3080. mov eax,PD P80000_PA73D7 [ecx*8]
  3081. sub esi,eax
  3082. mov bl,P67
  3083. mov eax,PD P6491A_PB18A8 [ecx*8]
  3084. mov cl,P73
  3085. add edx,eax
  3086. mov eax,PD P80000_PA73D7 [ebx*8]
  3087. add ebp,eax
  3088. mov eax,PD P80000_P4545F [ebx*8]
  3089. add esi,eax
  3090. mov eax,PD NB18A8_N96831 [ebx*8]
  3091. mov I60I62,ebp
  3092. mov I64I66,esi
  3093. add edi,eax
  3094. mov eax,PD P2350B_P6491A [ebx*8]
  3095. sub edx,eax
  3096. mov bl,P70
  3097. mov I61I63,edi
  3098. mov I67I65,edx
  3099. mov esi,PD P80000_P4545F [ebx*8]
  3100. mov eax,PD P80000_N4545F [ecx*8]
  3101. mov edx,PD P2350B_P6491A [ebx*8]
  3102. mov edi,PD NB18A8_P96831 [ecx*8]
  3103. add esi,eax
  3104. mov eax,PD P80000_NA73D7 [ecx*8]
  3105. add edx,edi
  3106. mov ebp,PD P80000_PA73D7 [ebx*8]
  3107. mov edi,PD P2350B_N6491A [ecx*8]
  3108. mov cl,P71
  3109. add ebp,eax
  3110. mov eax,PD NB18A8_N96831 [ebx*8]
  3111. sub edi,eax
  3112. mov eax,PD P80000_P4545F [ecx*8]
  3113. add ebp,eax
  3114. mov eax,PD N96831_P2350B [ecx*8]
  3115. sub edi,eax
  3116. mov eax,PD P80000_PA73D7 [ecx*8]
  3117. sub esi,eax
  3118. mov bl,P72
  3119. mov eax,PD P6491A_PB18A8 [ecx*8]
  3120. mov cl,P74
  3121. sub edx,eax
  3122. mov eax,PD P80000_N4545F [ebx*8]
  3123. add ebp,eax
  3124. mov eax,PD P6491A_NB18A8 [ebx*8]
  3125. add edi,eax
  3126. mov eax,PD P80000_NA73D7 [ebx*8]
  3127. sub esi,eax
  3128. mov eax,PD N96831_N2350B [ebx*8]
  3129. sub edx,eax
  3130. mov eax,PD P80000_NA73D7 [ecx*8]
  3131. add ebp,eax
  3132. mov eax,PD P2350B_N6491A [ecx*8]
  3133. sub edi,eax
  3134. mov eax,PD P80000_N4545F [ecx*8]
  3135. add esi,eax
  3136. mov bl,P75
  3137. mov eax,PD NB18A8_P96831 [ecx*8]
  3138. mov cl,P76
  3139. sub edx,eax
  3140. mov eax,PD P80000_N4545F [ebx*8]
  3141. add ebp,eax
  3142. mov eax,PD P6491A_NB18A8 [ebx*8]
  3143. sub edi,eax
  3144. mov eax,PD P80000_NA73D7 [ebx*8]
  3145. sub esi,eax
  3146. mov eax,PD N96831_N2350B [ebx*8]
  3147. add edx,eax
  3148. mov eax,PD P80000_P4545F [ecx*8]
  3149. add ebp,eax
  3150. mov eax,PD N96831_P2350B [ecx*8]
  3151. add edi,eax
  3152. mov eax,PD P80000_PA73D7 [ecx*8]
  3153. sub esi,eax
  3154. mov bl,P77
  3155. mov eax,PD P6491A_PB18A8 [ecx*8]
  3156. mov ecx,I00I02 ; Fetch I0 (upper_lim <skew>) = 2000 4000
  3157. ; ; (lower_lim is -upper_limit)
  3158. add edx,eax
  3159. mov eax,PD P80000_PA73D7 [ebx*8]
  3160. add ebp,eax ; I70I72, aka I7. 2000 0000
  3161. mov eax,PD P80000_P4545F [ebx*8]
  3162. add esi,eax
  3163. mov eax,PD NB18A8_N96831 [ebx*8]
  3164. mov I74I76,esi
  3165. mov esi,I30I32 ; Fetch I3 2000 4000
  3166. add edi,eax
  3167. mov eax,I40I42 ; Fetch I4 2000 0000
  3168. sub esi,eax ; I3 - I4 4000 4000
  3169. sub ecx,ebp ; I0 - I7 4000 4000
  3170. shr ecx,1 ; R7 = (I0-I7)/2 (dirty) 2000 2000
  3171. and esi,0FFFEFFFFH ; pre-clean R4
  3172. shr esi,1 ; R4 = (I3-I4)/2 (dirty) 2000 2000
  3173. and ecx,0FFFF7FFFH ; R7 = (I0-I7)/2 (clean) 2000 2000
  3174. mov ebx,PD P2350B_P6491A [ebx*8]
  3175. mov I71I73,edi
  3176. sub edx,ebx
  3177. lea ebx,[ecx+ecx*2] ; 3R7 6000 6000
  3178. mov I77I75,edx
  3179. lea edi,[esi+esi*2] ; 3R4 6000 6000
  3180. ; eax: I4 2000 0000
  3181. ; ebx: 3R7 6000 6000
  3182. ; ecx: R7 2000 2000
  3183. ; edx: available
  3184. ; esi: R4 2000 2000
  3185. ; edi: 3R4 6000 6000
  3186. ; ebp: I7 2000 0000
  3187. lea ebp,[ebp+ecx+40004000H] ; R0 = (I0+I7)/2 2000 6000
  3188. add eax,esi ; R3 = (I3+I4)/2 2000 2000
  3189. shr ecx,1 ; R7/2 (dirty) 1000 1000
  3190. and esi,0FFFEFFFFH ; pre-clean
  3191. shr esi,1 ; R4/2 (clean) 1000 1000
  3192. and ecx,0FFFF7FFFH ; clean
  3193. add ebx,ecx ; 7R7/2 7000 7000
  3194. add edi,esi ; 7R4/2 7000 7000
  3195. shr ebx,6 ; 7R7/128 (dirty) 01C0 01C0
  3196. and edi,0FFC0FFFFH ; pre-clean
  3197. shr edi,6 ; 7R4/128 (clean) 01C0 01C0
  3198. and ebx,0FFFF03FFH ; clean
  3199. add ebx,ecx ; 71R7/128 11C0 11C0
  3200. add edi,esi ; 71R4/128 11C0 11C0
  3201. lea edx,[eax+ebp-40004000H] ; S0 = R0 + R3 4000 4000
  3202. sub ebp,eax ; S3 = R0 - R3 4000 4000
  3203. lea ecx,[ebx+ebx*2+6E406E40H] ; 213R7/128 3540 A380
  3204. lea esi,[edi+edi*2+27402740H] ; 213R4/128 3540 5C80
  3205. shr ecx,1 ; 213R7/256 (dirty) 1AA0 51C0
  3206. and esi,0FFFEFFFFH ; pre-clean
  3207. shr esi,1 ; 213R4/256 (clean) 1AA0 2E40
  3208. and ecx,0FFFF7FFFH ; clean
  3209. sub ecx,edi ; S7 = (213R7 - 142R4)/256 2C60 4000
  3210. mov S0,edx ; Free register for work.
  3211. mov S3,ebp ; Free register for work.
  3212. lea esi,[esi+ebx+80008000H] ; S4 = (142R7 + 213R3)/256 2C60 C000
  3213. mov S7,ecx ; Free register for work.
  3214. mov eax,I10I12 ; Fetch I1 2000 4000
  3215. mov S4,esi ; Free register for work.
  3216. ; mem: S4 2C60 C000
  3217. ; mem: S7 2C60 4000
  3218. ; mem: S0 4000 4000
  3219. ; mem: S3 4000 4000
  3220. mov ebx,I20I22 ; Fetch I2 2000 4000
  3221. mov ecx,I50I52 ; Fetch I5 2000 0000
  3222. mov edx,I60I62 ; Fetch I6 2000 0000
  3223. sub eax,edx ; I1 - I6 4000 4000
  3224. sub ebx,ecx ; I2 - I5 4000 4000
  3225. shr eax,1 ; R6 = (I1-I6)/2 (dirty) 2000 2000
  3226. and ebx,0FFFEFFFFH ; pre-clean R4
  3227. shr ebx,1 ; R5 = (I2-I5)/2 (dirty) 2000 2000
  3228. and eax,0FFFF7FFFH ; R6 = (I1-I6)/2 (clean) 2000 2000
  3229. ; eax: R6 2000 2000
  3230. ; ebx: R5 2000 2000
  3231. ; ecx: I5 2000 0000
  3232. ; edx: I6 2000 0000
  3233. ; mem: S4 2C60 C000
  3234. ; mem: S7 2C60 4000
  3235. ; mem: S0 4000 4000
  3236. ; mem: S3 4000 4000
  3237. mov esi,ebx ; R5 2000 2000
  3238. mov edi,eax ; R6 2000 2000
  3239. shr esi,6 ; R5/64 0080 0080
  3240. and edi,0FFC0FFFFH ; pre-clean
  3241. shr edi,6 ; R6/65 0080 0080
  3242. and esi,0FFFF03FFH ; clean
  3243. lea edx,[eax+edx+20002000H] ; R1 = (I1+I6)/2 2000 4000
  3244. lea ecx,[ecx+ebx-20002000H] ; R2 = (I2+I5)/2 2000 0000
  3245. lea ebp,[ebx+ebx*2] ; 3R5 6000 6000
  3246. sub ebx,esi ; 63R5/64 1F80 1F80
  3247. shr ebp,4 ; 3R5/16 (dirty) 0600 0600
  3248. lea esi,[eax+eax*2] ; 3R6 6000 6000
  3249. sub eax,edi ; 63R6/64 1F80 1F80
  3250. mov edi,ebx ; 63R5/64 1F80 1F80
  3251. shr edi,7 ; 63R5/8192 (dirty) 003F 003F
  3252. and ebp,0FFFF0FFFH ; clean
  3253. shr esi,4 ; 3R6/16 (dirty) 0600 0600
  3254. and edi,0FFFF01FFH ; clean
  3255. and esi,0FFFF0FFFH ; clean
  3256. sub edx,ecx ; S2 = R1 - R2 4000 4000
  3257. lea edi,[edi+ebp-46BF46BFH] ; 1599R5/8192 063F -4080
  3258. mov ebp,eax ; 63R6/64 1F80 1F80
  3259. shr ebp,7 ; 63R6/8192 (dirty) 003F 003F
  3260. sub eax,edi ; S6 = 8064R6/8192 - 1599R5/8192 25BF 6000
  3261. and ebp,0FFFF01FFH ; clean
  3262. lea ecx,[edx+ecx*2-80008000H] ; S1 = R1 + R2 4000 -4000
  3263. add ebp,esi ; 1599R6/8192 063F 063F
  3264. mov esi,S0 ; Reload S0 4000 4000
  3265. mov edi,CoeffStream ; Fetch addr at which to place blk of coeffs.
  3266. sub esi,ecx ; C4 = T1 = S0 - S1 8000 8000
  3267. lea ebx,[ebx+ebp-45BF45BFH] ; S5 = 8064R5/8192 + 1599R6/8192 25BF -2000
  3268. mov ebp,S4 ; Reload S4 2C60 C000
  3269. ; eax: S6 25BF 6000
  3270. ; ebx: S5 25BF -2000
  3271. ; ecx: S0 4000 4000
  3272. ; edx: S2 4000 4000
  3273. ; esi: C4 8000 8000
  3274. ; edi: Destination pointer.
  3275. ; ebp: S4 2C60 C000
  3276. ; mem: S7 2C60 4000
  3277. ; mem: S3 4000 4000
  3278. sub ebp,eax ; T6 = S4 - S6 521F 6000
  3279. mov PD [edi+C40C42],esi ; Store coeffs C40 and C42.
  3280. lea ecx,[esi+ecx*2+80008000H] ; C0 = T0 = S0 + S1 8000 8000
  3281. mov esi,S7 ; Reload S7 2C60 4000
  3282. sub esi,ebx ; T5 = S7 - S5 521F 6000
  3283. lea eax,[ebp+eax*2-0C000C000H] ; T4 = S4 + S6 521F 6000
  3284. mov PD [edi+C00C02],ecx ; Store coeffs C00 and C02.
  3285. mov ecx,ebp ; T6 521F 6000
  3286. shr ebp,2 ; T6/4 (dirty) 1487 1800
  3287. lea ebx,[esi+ebx*2+0C000C000H] ; T7 = S7 + S5 521F E000
  3288. ; eax: T4 521F 6000
  3289. ; ebx: T7 521F 6000
  3290. ; ecx: T6 521F 6000
  3291. ; edx: S2 4000 4000
  3292. ; esi: T5 521F 6000
  3293. ; edi: Destination pointer.
  3294. ; ebp: T6/4 (dirty) 1487 1800
  3295. ; mem: S3 4000 4000
  3296. ; done: C0, C4
  3297. and ebp,0FFFF3FFFH ; T6/4 (clean) 1487 1800
  3298. sub ebx,eax ; C7 = T7 - T4 <7642> 8000
  3299. add ecx,ebp ; 5T6/4 66A6 7800
  3300. mov PD [edi+C70C72],ebx ; Store coeffs C70 and C72.
  3301. mov ebp,ecx ; 5T6/4 66A6 7800
  3302. and ecx,0FFF8FFFFH ; pre-clean
  3303. shr ecx,3 ; 5T6/32 (clean) 0CD4 0F00
  3304. lea eax,[ebx+eax*2-0C000C000H] ; C1 = T7 + T4 <7642> 8000
  3305. mov ebx,esi ; T5 521F 6000
  3306. and esi,0FFFCFFFFH ; pre-clean
  3307. shr esi,2 ; T5/4 (clean) 1487 1800
  3308. lea ecx,[ecx+ebp-07000700H] ; C5 = 45T6/32 737A 8000
  3309. mov PD [edi+C50C52],ecx ; Store coeffs C50 and C52.
  3310. add esi,ebx ; 5T5/4 66A6 7800
  3311. mov ebx,esi ; 5T5/4 66A6 7800
  3312. and esi,0FFF8FFFFH ; pre-clean
  3313. shr esi,3 ; 5T5/32 (clean) 0CD4 0F00
  3314. mov ebp,S3 ; Reload S3 4000 4000
  3315. mov ecx,edx ; S2 4000 4000
  3316. lea esi,[esi+ebx-07000700H] ; C3 = 45T5/32 737A 8000
  3317. mov ebx,ebp ; S3 4000 4000
  3318. ;
  3319. ; eax: C1 521E 8000
  3320. ; ebx: S3 4000 4000
  3321. ; ecx: S2 4000 4000
  3322. ; edx: S2 4000 4000
  3323. ; esi: C3 737A 8000
  3324. ; edi: Destination pointer.
  3325. ; ebp: S3 4000 4000
  3326. ; done: C0, C4, C5, C7
  3327. shr ebp,2 ; S3/4 (dirty) 1000 1000
  3328. and ecx,0FFFCFFFFH ; pre-clean
  3329. shr ecx,2 ; S2/4 (clean) 1000 1000
  3330. and ebp,0FFFF3FFFH ; S3/4 (clean) 1000 1000
  3331. mov PD [edi+C10C12],eax ; Store coeffs C10 and C12.
  3332. mov PD [edi+C30C32],esi ; Store coeffs C30 and C32.
  3333. lea eax,[edx+ecx] ; 5S2/4 5000 5000
  3334. lea esi,[ebx+ebp] ; 5S3/4 5000 5000
  3335. shr ebp,2 ; S3/16 (dirty) 0400 0400
  3336. and ecx,0FFFCFFFFH ; pre-clean
  3337. shr ecx,2 ; S2/16 (clean) 0400 0400
  3338. and ebp,0FFFF3FFFH ; S3/16 (clean) 0400 0400
  3339. add ecx,eax ; 21S2/16 5400 5400
  3340. add ebp,esi ; 21S3/16 5400 5400
  3341. shr eax,5 ; 5S2/128 (dirty) 0280 0280
  3342. and esi,0FFE0FFFFH ; pre-clean
  3343. shr esi,5 ; 5S3/128 (clean) 0280 0280
  3344. and eax,0FFFF07FFH ; 5S2/128 (clean) 0280 0280
  3345. shr edx,1 ; S2/2 (dirty) 2000 2000
  3346. and ebx,0FFFEFFFFH ; pre-clean
  3347. shr ebx,1 ; S3/2 (clean) 2000 2000
  3348. and edx,0FFFF7FFFH ; S2/2 (clean) 2000 2000
  3349. sub ebx,ecx ; (64S3 - 168S2) / 128 7400 -3400
  3350. add eax,ebp ; (5S2 + 168S3) / 128 5680 5680
  3351. mov ecx,I01I03
  3352. mov ebp,I71I73
  3353. lea ebx,[ebx+esi+0B180B180H] ; C6 = (69S3 - 168S2) / 128 7680 8000
  3354. lea edx,[eax+edx+009800980H] ; C2 = (69S2 + 168S3) / 128 7680 8000
  3355. mov esi,I31I33
  3356. mov eax,I41I43
  3357. sub esi,eax
  3358. sub ecx,ebp
  3359. shr ecx,1
  3360. and esi,0FFFEFFFFH
  3361. shr esi,1
  3362. and ecx,0FFFF7FFFH
  3363. mov PD [edi+C60C62],ebx
  3364. mov PD [edi+C20C22],edx
  3365. lea ebx,[ecx+ecx*2]
  3366. lea edi,[esi+esi*2]
  3367. lea ebp,[ebp+ecx+40004000H]
  3368. add eax,esi
  3369. shr ecx,1
  3370. and esi,0FFFEFFFFH
  3371. shr esi,1
  3372. and ecx,0FFFF7FFFH
  3373. add ebx,ecx
  3374. add edi,esi
  3375. shr ebx,6
  3376. and edi,0FFC0FFFFH
  3377. shr edi,6
  3378. and ebx,0FFFF03FFH
  3379. add ebx,ecx
  3380. add edi,esi
  3381. lea edx,[eax+ebp-40004000H]
  3382. sub ebp,eax
  3383. lea ecx,[ebx+ebx*2+6E406E40H]
  3384. lea esi,[edi+edi*2+27402740H]
  3385. shr ecx,1
  3386. and esi,0FFFEFFFFH
  3387. shr esi,1
  3388. and ecx,0FFFF7FFFH
  3389. sub ecx,edi
  3390. mov S0,edx
  3391. mov S3,ebp
  3392. lea esi,[esi+ebx+80008000H]
  3393. mov S7,ecx
  3394. mov eax,I11I13
  3395. mov S4,esi
  3396. mov ebx,I21I23
  3397. mov ecx,I51I53
  3398. mov edx,I61I63
  3399. sub eax,edx
  3400. sub ebx,ecx
  3401. shr eax,1
  3402. and ebx,0FFFEFFFFH
  3403. shr ebx,1
  3404. and eax,0FFFF7FFFH
  3405. mov esi,ebx
  3406. mov edi,eax
  3407. shr esi,6
  3408. and edi,0FFC0FFFFH
  3409. shr edi,6
  3410. and esi,0FFFF03FFH
  3411. lea edx,[eax+edx+20002000H]
  3412. lea ecx,[ecx+ebx-20002000H]
  3413. lea ebp,[ebx+ebx*2]
  3414. sub ebx,esi
  3415. shr ebp,4
  3416. lea esi,[eax+eax*2]
  3417. sub eax,edi
  3418. mov edi,ebx
  3419. shr edi,7
  3420. and ebp,0FFFF0FFFH
  3421. shr esi,4
  3422. and edi,0FFFF01FFH
  3423. and esi,0FFFF0FFFH
  3424. sub edx,ecx
  3425. lea edi,[edi+ebp-46BF46BFH]
  3426. mov ebp,eax
  3427. shr ebp,7
  3428. sub eax,edi
  3429. and ebp,0FFFF01FFH
  3430. lea ecx,[edx+ecx*2-80008000H]
  3431. add ebp,esi
  3432. mov esi,S0
  3433. mov edi,CoeffStream
  3434. sub esi,ecx
  3435. lea ebx,[ebx+ebp-45BF45BFH]
  3436. mov ebp,S4
  3437. sub ebp,eax
  3438. mov PD [edi+C41C43],esi
  3439. lea ecx,[esi+ecx*2+80008000H]
  3440. mov esi,S7
  3441. sub esi,ebx
  3442. lea eax,[ebp+eax*2-0C000C000H]
  3443. mov PD [edi+C01C03],ecx
  3444. mov ecx,ebp
  3445. shr ebp,2
  3446. lea ebx,[esi+ebx*2+0C000C000H]
  3447. and ebp,0FFFF3FFFH
  3448. sub ebx,eax
  3449. add ecx,ebp
  3450. mov PD [edi+C71C73],ebx
  3451. mov ebp,ecx
  3452. and ecx,0FFF8FFFFH
  3453. shr ecx,3
  3454. lea eax,[ebx+eax*2-0C000C000H]
  3455. mov ebx,esi
  3456. and esi,0FFFCFFFFH
  3457. shr esi,2
  3458. lea ecx,[ecx+ebp-07000700H]
  3459. mov PD [edi+C51C53],ecx
  3460. add esi,ebx
  3461. mov ebx,esi
  3462. and esi,0FFF8FFFFH
  3463. shr esi,3
  3464. mov ebp,S3
  3465. mov ecx,edx
  3466. lea esi,[esi+ebx-07000700H]
  3467. mov ebx,ebp
  3468. ;
  3469. shr ebp,2
  3470. and ecx,0FFFCFFFFH
  3471. shr ecx,2
  3472. and ebp,0FFFF3FFFH
  3473. mov PD [edi+C11C13],eax
  3474. mov PD [edi+C31C33],esi
  3475. lea eax,[edx+ecx]
  3476. lea esi,[ebx+ebp]
  3477. shr ebp,2
  3478. and ecx,0FFFCFFFFH
  3479. shr ecx,2
  3480. and ebp,0FFFF3FFFH
  3481. add ecx,eax
  3482. add ebp,esi
  3483. shr eax,5
  3484. and esi,0FFE0FFFFH
  3485. shr esi,5
  3486. and eax,0FFFF07FFH
  3487. shr edx,1
  3488. and ebx,0FFFEFFFFH
  3489. shr ebx,1
  3490. and edx,0FFFF7FFFH
  3491. sub ebx,ecx
  3492. add eax,ebp
  3493. mov ecx,I04I06
  3494. mov ebp,I74I76
  3495. lea ebx,[ebx+esi+0B180B180H]
  3496. lea edx,[eax+edx+009800980H]
  3497. mov esi,I34I36
  3498. mov eax,I44I46
  3499. sub esi,eax
  3500. sub ecx,ebp
  3501. shr ecx,1
  3502. and esi,0FFFEFFFFH
  3503. shr esi,1
  3504. and ecx,0FFFF7FFFH
  3505. mov PD [edi+C61C63],ebx
  3506. mov PD [edi+C21C23],edx
  3507. lea ebx,[ecx+ecx*2]
  3508. lea edi,[esi+esi*2]
  3509. lea ebp,[ebp+ecx+40004000H]
  3510. add eax,esi
  3511. shr ecx,1
  3512. and esi,0FFFEFFFFH
  3513. shr esi,1
  3514. and ecx,0FFFF7FFFH
  3515. add ebx,ecx
  3516. add edi,esi
  3517. shr ebx,6
  3518. and edi,0FFC0FFFFH
  3519. shr edi,6
  3520. and ebx,0FFFF03FFH
  3521. add ebx,ecx
  3522. add edi,esi
  3523. lea edx,[eax+ebp-40004000H]
  3524. sub ebp,eax
  3525. lea ecx,[ebx+ebx*2+6E406E40H]
  3526. lea esi,[edi+edi*2+27402740H]
  3527. shr ecx,1
  3528. and esi,0FFFEFFFFH
  3529. shr esi,1
  3530. and ecx,0FFFF7FFFH
  3531. sub ecx,edi
  3532. mov S0,edx
  3533. mov S3,ebp
  3534. lea esi,[esi+ebx+80008000H]
  3535. mov S7,ecx
  3536. mov eax,I14I16
  3537. mov S4,esi
  3538. mov ebx,I24I26
  3539. mov ecx,I54I56
  3540. mov edx,I64I66
  3541. sub eax,edx
  3542. sub ebx,ecx
  3543. shr eax,1
  3544. and ebx,0FFFEFFFFH
  3545. shr ebx,1
  3546. and eax,0FFFF7FFFH
  3547. mov esi,ebx
  3548. mov edi,eax
  3549. shr esi,6
  3550. and edi,0FFC0FFFFH
  3551. shr edi,6
  3552. and esi,0FFFF03FFH
  3553. lea edx,[eax+edx+20002000H]
  3554. lea ecx,[ecx+ebx-20002000H]
  3555. lea ebp,[ebx+ebx*2]
  3556. sub ebx,esi
  3557. shr ebp,4
  3558. lea esi,[eax+eax*2]
  3559. sub eax,edi
  3560. mov edi,ebx
  3561. shr edi,7
  3562. and ebp,0FFFF0FFFH
  3563. shr esi,4
  3564. and edi,0FFFF01FFH
  3565. and esi,0FFFF0FFFH
  3566. sub edx,ecx
  3567. lea edi,[edi+ebp-46BF46BFH]
  3568. mov ebp,eax
  3569. shr ebp,7
  3570. sub eax,edi
  3571. and ebp,0FFFF01FFH
  3572. lea ecx,[edx+ecx*2-80008000H]
  3573. add ebp,esi
  3574. mov esi,S0
  3575. mov edi,CoeffStream
  3576. sub esi,ecx
  3577. lea ebx,[ebx+ebp-45BF45BFH]
  3578. mov ebp,S4
  3579. sub ebp,eax
  3580. mov PD [edi+C44C46],esi
  3581. lea ecx,[esi+ecx*2+80008000H]
  3582. mov esi,S7
  3583. sub esi,ebx
  3584. lea eax,[ebp+eax*2-0C000C000H]
  3585. mov PD [edi+C04C06],ecx
  3586. mov ecx,ebp
  3587. shr ebp,2
  3588. lea ebx,[esi+ebx*2+0C000C000H]
  3589. and ebp,0FFFF3FFFH
  3590. sub ebx,eax
  3591. add ecx,ebp
  3592. mov PD [edi+C74C76],ebx
  3593. mov ebp,ecx
  3594. and ecx,0FFF8FFFFH
  3595. shr ecx,3
  3596. lea eax,[ebx+eax*2-0C000C000H]
  3597. mov ebx,esi
  3598. and esi,0FFFCFFFFH
  3599. shr esi,2
  3600. lea ecx,[ecx+ebp-07000700H]
  3601. mov PD [edi+C54C56],ecx
  3602. add esi,ebx
  3603. mov ebx,esi
  3604. and esi,0FFF8FFFFH
  3605. shr esi,3
  3606. mov ebp,S3
  3607. mov ecx,edx
  3608. lea esi,[esi+ebx-07000700H]
  3609. mov ebx,ebp
  3610. ;
  3611. shr ebp,2
  3612. and ecx,0FFFCFFFFH
  3613. shr ecx,2
  3614. and ebp,0FFFF3FFFH
  3615. mov PD [edi+C14C16],eax
  3616. mov PD [edi+C34C36],esi
  3617. lea eax,[edx+ecx]
  3618. lea esi,[ebx+ebp]
  3619. shr ebp,2
  3620. and ecx,0FFFCFFFFH
  3621. shr ecx,2
  3622. and ebp,0FFFF3FFFH
  3623. add ecx,eax
  3624. add ebp,esi
  3625. shr eax,5
  3626. and esi,0FFE0FFFFH
  3627. shr esi,5
  3628. and eax,0FFFF07FFH
  3629. shr edx,1
  3630. and ebx,0FFFEFFFFH
  3631. shr ebx,1
  3632. and edx,0FFFF7FFFH
  3633. sub ebx,ecx
  3634. add eax,ebp
  3635. mov ecx,I07I05
  3636. mov ebp,I77I75
  3637. lea ebx,[ebx+esi+0B180B180H]
  3638. lea edx,[eax+edx+009800980H]
  3639. mov esi,I37I35
  3640. mov eax,I47I45
  3641. sub esi,eax
  3642. sub ecx,ebp
  3643. shr ecx,1
  3644. and esi,0FFFEFFFFH
  3645. shr esi,1
  3646. and ecx,0FFFF7FFFH
  3647. mov PD [edi+C64C66],ebx
  3648. mov PD [edi+C24C26],edx
  3649. lea ebx,[ecx+ecx*2]
  3650. lea edi,[esi+esi*2]
  3651. lea ebp,[ebp+ecx+40004000H]
  3652. add eax,esi
  3653. shr ecx,1
  3654. and esi,0FFFEFFFFH
  3655. shr esi,1
  3656. and ecx,0FFFF7FFFH
  3657. add ebx,ecx
  3658. add edi,esi
  3659. shr ebx,6
  3660. and edi,0FFC0FFFFH
  3661. shr edi,6
  3662. and ebx,0FFFF03FFH
  3663. add ebx,ecx
  3664. add edi,esi
  3665. lea edx,[eax+ebp-40004000H]
  3666. sub ebp,eax
  3667. lea ecx,[ebx+ebx*2+6E406E40H]
  3668. lea esi,[edi+edi*2+27402740H]
  3669. shr ecx,1
  3670. and esi,0FFFEFFFFH
  3671. shr esi,1
  3672. and ecx,0FFFF7FFFH
  3673. sub ecx,edi
  3674. mov S0,edx
  3675. mov S3,ebp
  3676. lea esi,[esi+ebx+80008000H]
  3677. mov S7,ecx
  3678. mov eax,I17I15
  3679. mov S4,esi
  3680. mov ebx,I27I25
  3681. mov ecx,I57I55
  3682. mov edx,I67I65
  3683. sub eax,edx
  3684. sub ebx,ecx
  3685. shr eax,1
  3686. and ebx,0FFFEFFFFH
  3687. shr ebx,1
  3688. and eax,0FFFF7FFFH
  3689. mov esi,ebx
  3690. mov edi,eax
  3691. shr esi,6
  3692. and edi,0FFC0FFFFH
  3693. shr edi,6
  3694. and esi,0FFFF03FFH
  3695. lea edx,[eax+edx+20002000H]
  3696. lea ecx,[ecx+ebx-20002000H]
  3697. lea ebp,[ebx+ebx*2]
  3698. sub ebx,esi
  3699. shr ebp,4
  3700. lea esi,[eax+eax*2]
  3701. sub eax,edi
  3702. mov edi,ebx
  3703. shr edi,7
  3704. and ebp,0FFFF0FFFH
  3705. shr esi,4
  3706. and edi,0FFFF01FFH
  3707. and esi,0FFFF0FFFH
  3708. sub edx,ecx
  3709. lea edi,[edi+ebp-46BF46BFH]
  3710. mov ebp,eax
  3711. shr ebp,7
  3712. sub eax,edi
  3713. and ebp,0FFFF01FFH
  3714. lea ecx,[edx+ecx*2-80008000H]
  3715. add ebp,esi
  3716. mov esi,S0
  3717. mov edi,CoeffStream
  3718. sub esi,ecx
  3719. lea ebx,[ebx+ebp-45BF45BFH]
  3720. mov ebp,S4
  3721. sub ebp,eax
  3722. mov PD [edi+C47C45],esi
  3723. lea ecx,[esi+ecx*2+80008000H]
  3724. mov esi,S7
  3725. sub esi,ebx
  3726. lea eax,[ebp+eax*2-0C000C000H]
  3727. mov PD [edi+C07C05],ecx
  3728. mov ecx,ebp
  3729. shr ebp,2
  3730. lea ebx,[esi+ebx*2+0C000C000H]
  3731. and ebp,0FFFF3FFFH
  3732. sub ebx,eax
  3733. add ecx,ebp
  3734. mov PD [edi+C77C75],ebx
  3735. mov ebp,ecx
  3736. and ecx,0FFF8FFFFH
  3737. shr ecx,3
  3738. lea eax,[ebx+eax*2-0C000C000H]
  3739. mov ebx,esi
  3740. and esi,0FFFCFFFFH
  3741. shr esi,2
  3742. lea ecx,[ecx+ebp-07000700H]
  3743. mov PD [edi+C57C55],ecx
  3744. add esi,ebx
  3745. mov ebx,esi
  3746. and esi,0FFF8FFFFH
  3747. shr esi,3
  3748. mov ebp,S3
  3749. mov ecx,edx
  3750. lea esi,[esi+ebx-07000700H]
  3751. mov ebx,ebp
  3752. ;
  3753. shr ebp,2
  3754. and ecx,0FFFCFFFFH
  3755. shr ecx,2
  3756. and ebp,0FFFF3FFFH
  3757. mov PD [edi+C17C15],eax
  3758. mov PD [edi+C37C35],esi
  3759. lea eax,[edx+ecx]
  3760. lea esi,[ebx+ebp]
  3761. shr ebp,2
  3762. and ecx,0FFFCFFFFH
  3763. shr ecx,2
  3764. and ebp,0FFFF3FFFH
  3765. add ecx,eax
  3766. add ebp,esi
  3767. shr eax,5
  3768. and esi,0FFE0FFFFH
  3769. shr esi,5
  3770. and eax,0FFFF07FFH
  3771. shr edx,1
  3772. and ebx,0FFFEFFFFH
  3773. shr ebx,1
  3774. and edx,0FFFF7FFFH
  3775. sub ebx,ecx
  3776. add eax,ebp
  3777. mov ecx,CoeffStreamStart
  3778. lea ebp,[edi-SIZEOF T_CoeffBlk] ; Advance cursor for block action stream.
  3779. lea ebx,[ebx+esi+0B180B180H]
  3780. lea edx,[eax+edx+009800980H]
  3781. mov PD [edi+C67C65],ebx
  3782. mov PD [edi+C27C25],edx
  3783. ; Forward Slant Transform is done
  3784. cmp ebp,ecx
  3785. mov edi,ebp
  3786. mov CoeffStream,edi
  3787. jae NextBlock ; Process next block.
  3788. Done:
  3789. mov esp,StashESP
  3790. pop ebx
  3791. pop ebp
  3792. pop edi
  3793. pop esi
  3794. rturn
  3795. FORWARDDCT endp
  3796. END