Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4142 lines
147 KiB

  1. ;/* *************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;** *************************************************************************
  13. ;*/
  14. ;////////////////////////////////////////////////////////////////////////////
  15. ;//
  16. ;// $Header: R:\h26x\h26x\src\enc\ex5fdct.asv 1.5 14 May 1996 12:18:50 BNICKERS $
  17. ;// $Log: R:\h26x\h26x\src\enc\ex5fdct.asv $
  18. ;//
  19. ;// Rev 1.5 14 May 1996 12:18:50 BNICKERS
  20. ;// Initial debugging of MMx B-Frame ME.
  21. ;//
  22. ;// Rev 1.4 11 Apr 1996 16:02:06 AKASAI
  23. ;// Updated H261 encoder to new interface and macroblock action stream
  24. ;// data structure in e3mbad.inc for FORWARDDCT. Files updated together
  25. ;// e1enc.cpp, e1enc.h, ex5fdct.asm, e3mbad.inc.
  26. ;//
  27. ;// Added IFNDEF H261 in ex5fdct so that code used only in H263 is
  28. ;// not assembled for H261.
  29. ;//
  30. ;// Rev 1.3 24 Jan 1996 13:21:28 BNICKERS
  31. ;// Implement OBMC
  32. ;//
  33. ;// Rev 1.1 27 Dec 1995 15:32:42 RMCKENZX
  34. ;// Added copyright notice
  35. ;//
  36. ;////////////////////////////////////////////////////////////////////////////
  37. ;
  38. ; e35fdct -- This function performs a Forward Discrete Cosine Transform for H263, on a stream of macroblocks comprised
  39. ; of 8*8 blocks of pels or pel diffs. This version is tuned for the Pentium Microprocessor.
  40. ;
  41. ; Arguments:
  42. ;
  43. ; MBlockActionStream (Input)
  44. ;
  45. ; A stream of MacroBlock Action Descriptors. Each descriptor indicates which blocks of a macroblock are non-empty
  46. ; and thus need to be transformed. There are from 0 to 12 non-empty blocks in each macroblock.
  47. ;
  48. ; Processing commences with the macroblock described by the first descriptor in the stream (regardless of whether
  49. ; it's End-Of-Stream bit is set). Processing continues up to but not including the next descriptor that has the
  50. ; End-Of-Stream bit set.
  51. ;
  52. ; This function requires each descripgor in the MBlockActionStream to be 16-byte aligned. Moreover, each of the
  53. ; T_Blk elements in the descriptor must also be 16-byte aligned, and ordered as they are now. (Note that I am
  54. ; talking about the address of these pointer variables, not the alignement of the data they point to.)
  55. ;
  56. ; Best performance will be attained when 8*8 blocks are (or usually are) DWORD aligned. MMx implementations will
  57. ; probably prefer 8-byte alignment.
  58. ;
  59. ; The complete format of the MacroBlock Action Descriptors is provided in e3mbad.inc.
  60. ;
  61. ; TargetFrameBaseAddress -- Address of upper left viewable pel in the target Y plane. When doing B frames, this
  62. ; is the Target B Frame Base Address.
  63. ;
  64. ; PreviousFrameBaseAddress -- Address of the reconstructed previous frame. This really isn't needed for P-frame
  65. ; processing, estimation since the address of each block's prediction was recorded by
  66. ; MotionEstimation. It's only used by B-frame processing.
  67. ;
  68. ; FutureFrameBaseAddress -- Address of the reconstructed future (a.k.a. current) P-frame. Only used when processing
  69. ; B frames.
  70. ;
  71. ; CoeffStream (Output)
  72. ;
  73. ; A stream of storage blocks which receive the DCT output coefficient
  74. ; blocks for each non-empty blocks described in the MBlockActionStream.
  75. ; Each coefficient block is 128 bytes. The stream must be large enough
  76. ; to hold all the output coefficient blocks.
  77. ;
  78. ; Best performance will be attained by assuring the storage is 32-byte
  79. ; aligned. Best performance will be attained by using the output before
  80. ; the data cache gets changed by other data. Consuming the coefficient
  81. ; blocks in forward order is best, since they are defined in reverse
  82. ; order (and thus the first blocks are most likely to be in cache).
  83. ;
  84. ; The complete format of the coefficient blocks is provided in encdctc.inc.
  85. ;
  86. ; IsBFrame (Input)
  87. ;
  88. ; 0 (False) if doing Key or P frame. 1 (True) if doing B frame.
  89. OPTION PROLOGUE:None
  90. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  91. OPTION M510
  92. include e3inst.inc ; Encoder instance data
  93. include e3mbad.inc ; MacroBlock Action Descriptor struct layout
  94. include e3dctc.inc ; DCT Coefficient block layout
  95. .xlist
  96. include memmodel.inc
  97. .list
  98. .DATA
  99. InitTbl MACRO WeightHi,WeightLo,TableLabel
  100. TableLabel LABEL DWORD
  101. CNT = -128
  102. REPEAT 128
  103. DWORD ((WeightHi*CNT-08000H)/010000H*010000H)+((WeightLo*CNT-08000H)/010000H)
  104. DWORD ((WeightHi*CNT-08000H)/010000H*010000H)-((WeightLo*CNT-08000H)/010000H)
  105. CNT = CNT + 1
  106. ENDM
  107. REPEAT 128
  108. DWORD ((WeightHi*CNT+08000H)/010000H*010000H)+((WeightLo*CNT+08000H)/010000H)
  109. DWORD ((WeightHi*CNT+08000H)/010000H*010000H)-((WeightLo*CNT+08000H)/010000H)
  110. CNT = CNT + 1
  111. ENDM
  112. ENDM
  113. InitTbl 080000H,04545FH,P80000_P4545F
  114. P80000_N4545F = P80000_P4545F + 4
  115. InitTbl 080000H,0A73D7H,P80000_PA73D7
  116. P80000_NA73D7 = P80000_PA73D7 + 4
  117. BYTE 680 DUP (?) ; To assure that tables interleave nicely in cache.
  118. InitTbl 02350BH, 06491AH,P2350B_P6491A
  119. P2350B_N6491A = P2350B_P6491A + 4
  120. InitTbl -0B18A8H,-096831H,NB18A8_N96831
  121. NB18A8_P96831 = NB18A8_N96831 + 4
  122. BYTE 680 DUP (?) ; To assure that tables interleave nicely in cache.
  123. InitTbl -096831H, 02350BH,N96831_P2350B
  124. N96831_N2350B = N96831_P2350B + 4
  125. InitTbl 06491AH, 0B18A8H,P6491A_PB18A8
  126. P6491A_NB18A8 = P6491A_PB18A8 + 4
  127. ColsDefined DD 000000000H,000000000H,07F7F7F7FH,07F7F7F7FH
  128. DD 000000000H,07F7F7F00H,07F7F7F7FH,00000007FH
  129. DD 000000000H,07F7F0000H,07F7F7F7FH,000007F7FH
  130. DD 000000000H,07F000000H,07F7F7F7FH,0007F7F7FH,000000000H
  131. ; Right Left Chroma
  132. DB 0 ; -22.0
  133. DB 0 ; -21.5
  134. DB 0 ; -21.0
  135. DB 0 ; -20.5
  136. DB 0 ; -20.0
  137. DB 0 ; -19.5
  138. DB 0 ; -19.0
  139. DB 0 ; -18.5
  140. DB 0 ; -18.0
  141. DB 0 ; -17.5
  142. DB 0 ; -17.0
  143. DB 0 ; -16.5
  144. DB 0 ; -16.0
  145. DB 0 ; -15.5
  146. DB 0 ; -15.0
  147. DB 0 ; -14.5
  148. DB 0 ; -22.0 -14.0
  149. DB 0 ; -21.5 -13.5
  150. DB 0 ; -21.0 -13.0
  151. DB 0 ; -20.5 -12.5
  152. DB 0 ; -20.0 -12.0
  153. DB 0 ; -19.5 -11.5
  154. DB 0 ; -19.0 -11.0
  155. DB 0 ; -18.5 -10.5
  156. DB 0 ; -18.0 -10.0
  157. DB 0 ; -17.5 -9.5
  158. DB 0 ; -17.0 -9.0
  159. DB 0 ; -16.5 -8.5
  160. DB 0 ; -16.0 -8.0
  161. DB 0 ; -15.5 -7.5
  162. DB 48 ; -15.0 -7.0
  163. DB 48 ; -14.5 -6.5
  164. DB 32 ; -14.0 -6.0
  165. DB 32 ; -13.5 -5.5
  166. DB 16 ; -13.0 -5.0
  167. DB 16 ; -12.5 -4.5
  168. DB 4 ; -12.0 -4.0
  169. DB 4 ; -11.5 -3.5
  170. DB 52 ; -11.0 -3.0
  171. DB 52 ; -10.5 -2.5
  172. DB 36 ; -10.0 -2.0
  173. DB 36 ; -9.5 -1.5
  174. DB 20 ; -9.0 -1.0
  175. DB 20 ; -8.5 -.5
  176. LeftYBlkColsDef DB 8 ; -8.0 0
  177. DB 8 ; -7.5 .5
  178. DB 8 ; -7.0 1.0
  179. DB 8 ; -6.5 1.5
  180. DB 8 ; -6.0 2.0
  181. DB 8 ; -5.5 2.5
  182. DB 8 ; -5.0 3.0
  183. DB 8 ; -4.5 3.5
  184. DB 8 ; -4.0 4.0
  185. DB 8 ; -3.5 4.5
  186. DB 8 ; -3.0 5.0
  187. DB 8 ; -2.5 5.5
  188. DB 8 ; -2.0 6.0
  189. DB 8 ; -1.5 6.5
  190. DB 8 ; -1.0 7.0
  191. DB 8 ; -.5 7.5
  192. RightYBlkColsDef DB 8 ; 0 8.0
  193. DB 56 ; .5 8.5
  194. DB 56 ; 1.0 9.0
  195. DB 40 ; 1.5 9.5
  196. DB 40 ; 2.0 10.0
  197. DB 24 ; 2.5 10.5
  198. DB 24 ; 3.0 11.0
  199. DB 12 ; 3.5 11.5
  200. DB 12 ; 4.0 12.0
  201. DB 60 ; 4.5 12.5
  202. DB 60 ; 5.0 13.0
  203. DB 44 ; 5.5 13.5
  204. DB 44 ; 6.0 14.0
  205. DB 28 ; 6.5 14.5
  206. DB 28 ; 7.0 15.0
  207. DB 0 ; 7.5 15.5
  208. DB 0 ; 8.0 16.0
  209. DB 0 ; 8.5 16.5
  210. DB 0 ; 9.0 17.0
  211. DB 0 ; 9.5 17.5
  212. DB 0 ; 10.0 18.0
  213. DB 0 ; 10.5 18.5
  214. DB 0 ; 11.0 19.0
  215. DB 0 ; 11.5 19.5
  216. DB 0 ; 12.0 20.0
  217. DB 0 ; 12.5 20.5
  218. DB 0 ; 13.0 21.0
  219. DB 0 ; 13.5 21.5
  220. DB 0 ; 14.0 22.0
  221. DB 0 ; 14.5
  222. DB 0 ; 15.0
  223. DB 0 ; 15.5
  224. DB 0 ; 16.0
  225. DB 0 ; 16.5
  226. DB 0 ; 17.0
  227. DB 0 ; 17.5
  228. DB 0 ; 18.0
  229. DB 0 ; 18.5 -11.0
  230. DB 0 ; 19.0 -10.5
  231. DB 0 ; 19.5 -10.0
  232. DB 0 ; 20.0 -9.5
  233. DB 0 ; 20.5 -9.0
  234. DB 0 ; 21.0 -8.5
  235. DB 0 ; 21.5 -8.0
  236. DB 0 ; 22.0 -7.5
  237. DB 48 ; -7.0
  238. DB 48 ; -6.5
  239. DB 32 ; -6.0
  240. DB 32 ; -5.5
  241. DB 16 ; -5.0
  242. DB 16 ; -4.5
  243. DB 4 ; -4.0
  244. DB 4 ; -3.5
  245. DB 52 ; -3.0
  246. DB 52 ; -2.5
  247. DB 36 ; -2.0
  248. DB 36 ; -1.5
  249. DB 20 ; -1.0
  250. DB 20 ; -.5
  251. ChromaColsDef DB 8 ; 0
  252. DB 56 ; .5
  253. DB 56 ; 1.0
  254. DB 40 ; 1.5
  255. DB 40 ; 2.0
  256. DB 24 ; 2.5
  257. DB 24 ; 3.0
  258. DB 12 ; 3.5
  259. DB 12 ; 4.0
  260. DB 60 ; 4.5
  261. DB 60 ; 5.0
  262. DB 44 ; 5.5
  263. DB 44 ; 6.0
  264. DB 28 ; 6.5
  265. DB 28 ; 7.0
  266. DB 0 ; 7.5
  267. DB 0 ; 8.0
  268. DB 0 ; 8.5
  269. DB 0 ; 9.0
  270. DB 0 ; 9.5
  271. DB 0 ; 10.0
  272. DB 0 ; 10.5
  273. DB 0 ; 11.0
  274. ; Lower Upper Chroma
  275. DB 000H ; -22.0
  276. DB 000H ; -21.5
  277. DB 000H ; -21.0
  278. DB 000H ; -20.5
  279. DB 000H ; -20.0
  280. DB 000H ; -19.5
  281. DB 000H ; -19.0
  282. DB 000H ; -18.5
  283. DB 000H ; -18.0
  284. DB 000H ; -17.5
  285. DB 000H ; -17.0
  286. DB 000H ; -16.5
  287. DB 000H ; -16.0
  288. DB 000H ; -15.5
  289. DB 000H ; -15.0
  290. DB 000H ; -14.5
  291. DB 000H ; -22.0 -14.0
  292. DB 000H ; -21.5 -13.5
  293. DB 000H ; -21.0 -13.0
  294. DB 000H ; -20.5 -12.5
  295. DB 000H ; -20.0 -12.0
  296. DB 000H ; -19.5 -11.5
  297. DB 000H ; -19.0 -11.0
  298. DB 000H ; -18.5 -10.5
  299. DB 000H ; -18.0 -10.0
  300. DB 000H ; -17.5 -9.5
  301. DB 000H ; -17.0 -9.0
  302. DB 000H ; -16.5 -8.5
  303. DB 000H ; -16.0 -8.0
  304. DB 000H ; -15.5 -7.5
  305. DB 001H ; -15.0 -7.0
  306. DB 001H ; -14.5 -6.5
  307. DB 003H ; -14.0 -6.0
  308. DB 003H ; -13.5 -5.5
  309. DB 007H ; -13.0 -5.0
  310. DB 007H ; -12.5 -4.5
  311. DB 00FH ; -12.0 -4.0
  312. DB 00FH ; -11.5 -3.5
  313. DB 01FH ; -11.0 -3.0
  314. DB 01FH ; -10.5 -2.5
  315. DB 03FH ; -10.0 -2.0
  316. DB 03FH ; -9.5 -1.5
  317. DB 07FH ; -9.0 -1.0
  318. DB 07FH ; -8.5 -.5
  319. UpperYBlkLinesDef DB 0FFH ; -8.0 0
  320. DB 0FFH ; -7.5 .5
  321. DB 0FFH ; -7.0 1.0
  322. DB 0FFH ; -6.5 1.5
  323. DB 0FFH ; -6.0 2.0
  324. DB 0FFH ; -5.5 2.5
  325. DB 0FFH ; -5.0 3.0
  326. DB 0FFH ; -4.5 3.5
  327. DB 0FFH ; -4.0 4.0
  328. DB 0FFH ; -3.5 4.5
  329. DB 0FFH ; -3.0 5.0
  330. DB 0FFH ; -2.5 5.5
  331. DB 0FFH ; -2.0 6.0
  332. DB 0FFH ; -1.5 6.5
  333. DB 0FFH ; -1.0 7.0
  334. DB 0FFH ; -.5 7.5
  335. LowerYBlkLinesDef DB 0FFH ; 0 8.0
  336. DB 0FEH ; .5 8.5
  337. DB 0FEH ; 1.0 9.0
  338. DB 0FCH ; 1.5 9.5
  339. DB 0FCH ; 2.0 10.0
  340. DB 0F8H ; 2.5 10.5
  341. DB 0F8H ; 3.0 11.0
  342. DB 0F0H ; 3.5 11.5
  343. DB 0F0H ; 4.0 12.0
  344. DB 0E0H ; 4.5 12.5
  345. DB 0E0H ; 5.0 13.0
  346. DB 0C0H ; 5.5 13.5
  347. DB 0C0H ; 6.0 14.0
  348. DB 080H ; 6.5 14.5
  349. DB 080H ; 7.0 15.0
  350. DB 000H ; 7.5 15.5
  351. DB 000H ; 8.0 16.0
  352. DB 000H ; 8.5 16.5
  353. DB 000H ; 9.0 17.0
  354. DB 000H ; 9.5 17.5
  355. DB 000H ; 10.0 18.0
  356. DB 000H ; 10.5 18.5
  357. DB 000H ; 11.0 19.0
  358. DB 000H ; 11.5 19.5
  359. DB 000H ; 12.0 20.0
  360. DB 000H ; 12.5 20.5
  361. DB 000H ; 13.0 21.0
  362. DB 000H ; 13.5 21.5
  363. DB 000H ; 14.0 22.0
  364. DB 000H ; 14.5
  365. DB 000H ; 15.0
  366. DB 000H ; 15.5
  367. DB 000H ; 16.0
  368. DB 000H ; 16.5
  369. DB 000H ; 17.0
  370. DB 000H ; 17.5
  371. DB 000H ; 18.0
  372. DB 000H ; 18.5 -11.0
  373. DB 000H ; 19.0 -10.5
  374. DB 000H ; 19.5 -10.0
  375. DB 000H ; 20.0 -9.5
  376. DB 000H ; 20.5 -9.0
  377. DB 000H ; 21.0 -8.5
  378. DB 000H ; 21.5 -8.0
  379. DB 000H ; 22.0 -7.5
  380. DB 001H ; -7.0
  381. DB 001H ; -6.5
  382. DB 003H ; -6.0
  383. DB 003H ; -5.5
  384. DB 007H ; -5.0
  385. DB 007H ; -4.5
  386. DB 00FH ; -4.0
  387. DB 00FH ; -3.5
  388. DB 01FH ; -3.0
  389. DB 01FH ; -2.5
  390. DB 03FH ; -2.0
  391. DB 03FH ; -1.5
  392. DB 07FH ; -1.0
  393. DB 07FH ; -.5
  394. ChromaLinesDef DB 0FFH ; 0
  395. DB 0FEH ; .5
  396. DB 0FEH ; 1.0
  397. DB 0FCH ; 1.5
  398. DB 0FCH ; 2.0
  399. DB 0F8H ; 2.5
  400. DB 0F8H ; 3.0
  401. DB 0F0H ; 3.5
  402. DB 0F0H ; 4.0
  403. DB 0E0H ; 4.5
  404. DB 0E0H ; 5.0
  405. DB 0C0H ; 5.5
  406. DB 0C0H ; 6.0
  407. DB 080H ; 6.5
  408. DB 080H ; 7.0
  409. DB 000H ; 7.5
  410. DB 000H ; 8.0
  411. DB 000H ; 8.5
  412. DB 000H ; 9.0
  413. DB 000H ; 9.5
  414. DB 000H ; 10.0
  415. DB 000H ; 10.5
  416. DB 000H ; 11.0
  417. .CODE
  418. ;ASSUME cs : FLAT
  419. ;ASSUME ds : FLAT
  420. ;ASSUME es : FLAT
  421. ;ASSUME fs : FLAT
  422. ;ASSUME gs : FLAT
  423. ;ASSUME ss : FLAT
  424. FORWARDDCT proc C AMBlockActionStream: DWORD,
  425. ATargetFrameBaseAddress: DWORD, APreviousFrameBaseAddress: DWORD,
  426. AFutureFrameBaseAddress: DWORD, ACoeffStream: DWORD, AIsBFrame: DWORD,
  427. AIsAdvancedPrediction: DWORD, AIsPOfPBPair: DWORD, AScratchBlocks: DWORD,
  428. ANumMBlksInGOB: DWORD
  429. LocalFrameSize = 196
  430. RegisterStorageSize = 16
  431. ; Arguments:
  432. MBlockActionStream = RegisterStorageSize + 4
  433. TargetFrameBaseAddress_arg = RegisterStorageSize + 8
  434. PreviousFrameBaseAddress_arg = RegisterStorageSize + 12
  435. FutureFrameBaseAddress_arg = RegisterStorageSize + 16
  436. CoeffStream_arg = RegisterStorageSize + 20
  437. IsBFrame = RegisterStorageSize + 24
  438. IsAdvancedPrediction = RegisterStorageSize + 28
  439. IsPOfPBPair = RegisterStorageSize + 32
  440. ScratchBlocks = RegisterStorageSize + 36
  441. NumMBlksInGOB = RegisterStorageSize + 40
  442. EndOfArgList = RegisterStorageSize + 44
  443. ; Locals (on local stack frame)
  444. P00 EQU [esp+ 8] ; Biased Pels or Biased Pel Differences
  445. P01 EQU [esp+ 9]
  446. P02 EQU [esp+ 10]
  447. P03 EQU [esp+ 11]
  448. P04 EQU [esp+ 12]
  449. P05 EQU [esp+ 13]
  450. P06 EQU [esp+ 14]
  451. P07 EQU [esp+ 15]
  452. P10 EQU [esp+ 16]
  453. P11 EQU [esp+ 17]
  454. P12 EQU [esp+ 18]
  455. P13 EQU [esp+ 19]
  456. P14 EQU [esp+ 20]
  457. P15 EQU [esp+ 21]
  458. P16 EQU [esp+ 22]
  459. P17 EQU [esp+ 23]
  460. P20 EQU [esp+ 24]
  461. P21 EQU [esp+ 25]
  462. P22 EQU [esp+ 26]
  463. P23 EQU [esp+ 27]
  464. P24 EQU [esp+ 28]
  465. P25 EQU [esp+ 29]
  466. P26 EQU [esp+ 30]
  467. P27 EQU [esp+ 31]
  468. P30 EQU [esp+ 32]
  469. P31 EQU [esp+ 33]
  470. P32 EQU [esp+ 34]
  471. P33 EQU [esp+ 35]
  472. P34 EQU [esp+ 36]
  473. P35 EQU [esp+ 37]
  474. P36 EQU [esp+ 38]
  475. P37 EQU [esp+ 39]
  476. P40 EQU [esp+ 40]
  477. P41 EQU [esp+ 41]
  478. P42 EQU [esp+ 42]
  479. P43 EQU [esp+ 43]
  480. P44 EQU [esp+ 44]
  481. P45 EQU [esp+ 45]
  482. P46 EQU [esp+ 46]
  483. P47 EQU [esp+ 47]
  484. P50 EQU [esp+ 48]
  485. P51 EQU [esp+ 49]
  486. P52 EQU [esp+ 50]
  487. P53 EQU [esp+ 51]
  488. P54 EQU [esp+ 52]
  489. P55 EQU [esp+ 53]
  490. P56 EQU [esp+ 54]
  491. P57 EQU [esp+ 55]
  492. P60 EQU [esp+ 56]
  493. P61 EQU [esp+ 57]
  494. P62 EQU [esp+ 58]
  495. P63 EQU [esp+ 59]
  496. P64 EQU [esp+ 60]
  497. P65 EQU [esp+ 61]
  498. P66 EQU [esp+ 62]
  499. P67 EQU [esp+ 63]
  500. P70 EQU [esp+ 64]
  501. P71 EQU [esp+ 65]
  502. P72 EQU [esp+ 66]
  503. P73 EQU [esp+ 67]
  504. P74 EQU [esp+ 68]
  505. P75 EQU [esp+ 69]
  506. P76 EQU [esp+ 70]
  507. P77 EQU [esp+ 71]
  508. I00I02 EQU P00 ; Intermed for row 0, columns 0 and 2.
  509. I01I03 EQU P04 ; Share storage with pels.
  510. I04I06 EQU [esp+ 72]
  511. Mask00 EQU [esp+ 72]
  512. I07I05 EQU [esp+ 76]
  513. Mask04 EQU [esp+ 76]
  514. I10I12 EQU P10
  515. I11I13 EQU P14
  516. I14I16 EQU [esp+ 80]
  517. Mask10 EQU [esp+ 80]
  518. I17I15 EQU [esp+ 84]
  519. Mask14 EQU [esp+ 84]
  520. I20I22 EQU P20
  521. I21I23 EQU P24
  522. I24I26 EQU [esp+ 88]
  523. Mask20 EQU [esp+ 88]
  524. I27I25 EQU [esp+ 92]
  525. Mask24 EQU [esp+ 92]
  526. I30I32 EQU P30
  527. I31I33 EQU P34
  528. I34I36 EQU [esp+ 96]
  529. Mask30 EQU [esp+ 96]
  530. I37I35 EQU [esp+100]
  531. Mask34 EQU [esp+100]
  532. I40I42 EQU P40
  533. I41I43 EQU P44
  534. I44I46 EQU [esp+104]
  535. Mask40 EQU [esp+104]
  536. I47I45 EQU [esp+108]
  537. Mask44 EQU [esp+108]
  538. I50I52 EQU P50
  539. I51I53 EQU P54
  540. I54I56 EQU [esp+112]
  541. Mask50 EQU [esp+112]
  542. I57I55 EQU [esp+116]
  543. Mask54 EQU [esp+116]
  544. I60I62 EQU P60
  545. I61I63 EQU P64
  546. I64I66 EQU [esp+120]
  547. Mask60 EQU [esp+120]
  548. I67I65 EQU [esp+124]
  549. Mask64 EQU [esp+124]
  550. I70I72 EQU P70
  551. I71I73 EQU P74
  552. I74I76 EQU [esp+128]
  553. Mask70 EQU [esp+128]
  554. I77I75 EQU [esp+132]
  555. Mask74 EQU [esp+132]
  556. S4 EQU I10I12 ; Temp storage, shared.
  557. S7 EQU I00I02 ; Temp storage, shared.
  558. S3 EQU I30I32 ; Temp storage, shared.
  559. S0 EQU I40I42 ; Temp storage, shared.
  560. CoeffStreamStart EQU [esp+ 0]
  561. CoeffStream EQU [esp+ 4]
  562. BlkActionDescrAddr EQU [esp+136]
  563. FutureFrameBaseAddress EQU [esp+140]
  564. DistFromTargetToPastP EQU [esp+144]
  565. TargetFrameBaseAddress EQU [esp+148]
  566. PredictionsBaseAddress EQU [esp+152]
  567. IsPlainPFrame EQU [esp+156]
  568. PreviousFrameBaseAddress EQU [esp+160]
  569. DistToBlockToLeft EQU [esp+164]
  570. DistToBlockAbove EQU [esp+168]
  571. DistToBlockToRight EQU [esp+172]
  572. DistToBlockBelow EQU [esp+176]
  573. DistFromBlk1ToBlk3Above EQU [esp+180]
  574. MBActionCursor EQU [esp+184]
  575. CentralRefAddrAndInterps EQU [esp+188]
  576. StashESP EQU [esp+192]
  577. push esi
  578. push edi
  579. push ebp
  580. push ebx
  581. mov ebx,esp
  582. sub esp,LocalFrameSize+4
  583. mov edi,[ebx+CoeffStream_arg] ; Get address of storage for coeffs.
  584. and esp,0FFFFFFC0H ; Get 64-byte aligned.
  585. xor ebp,ebp
  586. add esp,4 ; esp at cache line plus 4.
  587. mov esi,[ebx+MBlockActionStream] ; Get address of MB action stream.
  588. mov StashESP,ebx
  589. mov edx,[ebx+TargetFrameBaseAddress_arg]
  590. mov TargetFrameBaseAddress,edx
  591. mov eax,[ebx+PreviousFrameBaseAddress_arg]
  592. mov PreviousFrameBaseAddress,eax
  593. sub eax,edx
  594. mov ecx,[ebx+FutureFrameBaseAddress_arg]
  595. mov FutureFrameBaseAddress,ecx
  596. mov DistFromTargetToPastP,eax
  597. mov CoeffStreamStart,edi
  598. xor eax,eax
  599. xor ecx,ecx
  600. IFNDEF H261
  601. ;; H261 does not execute the OBMC code so it is included only when H261 is not defined
  602. ;;
  603. cmp ebp,[ebx+IsBFrame]
  604. mov edx,PITCH
  605. jne NextBMacroBlock
  606. cmp ebp,[ebx+IsAdvancedPrediction]
  607. je NextMacroBlock
  608. mov eax,[ebx+ScratchBlocks] ; We must do OBMC.
  609. mov ecx,[esi].BlkY1.BlkOffset
  610. sub eax,ecx
  611. mov ebp,[ebx+IsPOfPBPair]
  612. xor ebp,1
  613. mov PredictionsBaseAddress,eax
  614. mov IsPlainPFrame,ebp
  615. mov ebp,[ebx+NumMBlksInGOB]
  616. imul ebp,-SIZEOF T_MacroBlockActionDescr
  617. add ebp,2*SIZEOF T_Blk
  618. mov DistFromBlk1ToBlk3Above,ebp
  619. ;===============================================================================
  620. ;===============================================================================
  621. ; First pass builds block action stream from macroblock action stream.
  622. ;===============================================================================
  623. ;===============================================================================
  624. ; esi -- MacroBlockActionStream cursor
  625. ; edi -- BlockActionStream cursor
  626. ; edx -- Address of a block to do
  627. ; bl -- BlockType;
  628. ; MB edge condition: 1 off if left edge | 2: right | 4: top | 8: bottom
  629. ; eax -- Coded block pattern for P block;
  630. ; (Block_number - 1) * SIZEOF T_Blk
  631. NextMacroBlock_OBMC:
  632. mov bl,PB [esi].BlockType
  633. mov al,PB [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  634. ; Bit 4 set for non-empty U blk.
  635. ; Bit 5 set for non-empty V blk.
  636. ; Bit 6 clear except at stream end.
  637. ; Bit 7 clear. Unused.
  638. and bl,IsINTRA
  639. jne MBIsIntraCoded_OBMC
  640. lea edx,[esi].BlkY1+12 ; Addr of block addr (plus 12).
  641. test al,1 ; Check if block 1 empty.
  642. mov [edi].BlockAddr,edx ; Store address of block address.
  643. je Block1DescrBuilt
  644. mov al,[esi].MBEdgeType
  645. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  646. shl eax,31
  647. mov ecx,-SIZEOF T_MacroBlockActionDescr + SIZEOF T_Blk
  648. sar eax,31
  649. mov CoeffStream,edi ; Stash block descriptor ptr.
  650. and ecx,eax ; Blk to left is blk 2 of mb to the left, or off edge.
  651. mov al,[esi].MBEdgeType
  652. shl eax,29
  653. mov DistToBlockToLeft,ecx
  654. sar eax,31
  655. mov ecx,DistFromBlk1ToBlk3Above
  656. and ecx,eax ; Blk above is in macroblock above, or off upper edge.
  657. mov eax,SIZEOF T_Blk ; Blk to right is blk 2 of current macroblock.
  658. mov DistToBlockAbove,ecx
  659. mov ecx,2*SIZEOF T_Blk; Blk below is blk 3 of current macroblock.
  660. mov DistToBlockToRight,eax
  661. mov DistToBlockBelow,ecx
  662. mov ebp,T_MacroBlockActionDescr.BlkY1
  663. jmp BuildOBMCPrediction
  664. Block1DescrBuilt:
  665. test al,2 ; Check if block 2 empty.
  666. lea edx,[esi].BlkY2+12 ; Addr of block addr (plus 12).
  667. mov [edi].BlockAddr,edx ; Store address of block address.
  668. je Block2DescrBuilt
  669. mov al,[esi].MBEdgeType
  670. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  671. shl eax,30
  672. mov ecx,SIZEOF T_MacroBlockActionDescr - SIZEOF T_Blk
  673. sar eax,31
  674. mov CoeffStream,edi ; Stash block descriptor ptr.
  675. and ecx,eax ; Blk to right is blk 1 of mb to right, or off edge.
  676. mov al,[esi].MBEdgeType
  677. shl eax,29
  678. mov DistToBlockToRight,ecx
  679. sar eax,31
  680. mov ecx,DistFromBlk1ToBlk3Above
  681. and ecx,eax ; Blk above is in macroblock above, or off upper edge.
  682. mov eax,-SIZEOF T_Blk ; Blk to left is blk 1 of current macroblock.
  683. mov DistToBlockAbove,ecx
  684. mov ecx,2*SIZEOF T_Blk; Blk below is blk 4 of current macroblock.
  685. mov DistToBlockToLeft,eax
  686. mov DistToBlockBelow,ecx
  687. mov ebp,T_MacroBlockActionDescr.BlkY2
  688. jmp BuildOBMCPrediction
  689. Block1or2DescrBuilt:
  690. mov al,PB [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  691. mov edi,CoeffStream ; Restore block descriptor ptr.
  692. jl Block1DescrBuilt
  693. Block2DescrBuilt:
  694. test al,4 ; Check if block 3 empty.
  695. lea edx,[esi].BlkY3+12 ; Addr of block addr (plus 12).
  696. mov [edi].BlockAddr,edx ; Store address of block address.
  697. je Block3DescrBuilt
  698. mov al,[esi].MBEdgeType
  699. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  700. shl eax,31
  701. mov ecx,-SIZEOF T_MacroBlockActionDescr + SIZEOF T_Blk
  702. sar eax,31
  703. mov CoeffStream,edi ; Stash block descriptor ptr.
  704. and eax,ecx ; Blk to left is blk 4 of mb to the left, or off edge.
  705. mov ecx,-2*SIZEOF T_Blk ; Blk above is blk 1 of current mb.
  706. mov DistToBlockToLeft,eax
  707. mov eax,SIZEOF T_Blk ; Blk to right is blk 4 of current macroblock.
  708. mov DistToBlockAbove,ecx
  709. xor ecx,ecx ; Blk below is current block.
  710. mov DistToBlockToRight,eax
  711. mov DistToBlockBelow,ecx
  712. mov ebp,T_MacroBlockActionDescr.BlkY3
  713. jmp BuildOBMCPrediction
  714. Block3DescrBuilt:
  715. test al,8 ; Check if block 4 empty.
  716. lea edx,[esi].BlkY4+12 ; Addr of block addr (plus 12).
  717. mov [edi].BlockAddr,edx ; Store address of block address.
  718. je Block4DescrBuilt
  719. mov al,[esi].MBEdgeType
  720. add edi,T_CoeffBlk ; Advance block descriptor ptr.
  721. shl eax,30
  722. mov ecx,SIZEOF T_MacroBlockActionDescr - SIZEOF T_Blk
  723. sar eax,31
  724. mov CoeffStream,edi ; Stash block descriptor ptr.
  725. and eax,ecx ; Blk to right is blk 3 of mb to right, or off edge.
  726. mov ecx,-2*SIZEOF T_Blk ; Blk above is blk 2 of current mb.
  727. mov DistToBlockToRight,eax
  728. mov eax,-SIZEOF T_Blk ; Blk to left is blk 3 of current macroblock.
  729. mov DistToBlockAbove,ecx
  730. xor ecx,ecx ; Blk below is current block.
  731. mov DistToBlockToLeft,eax
  732. mov DistToBlockBelow,ecx
  733. mov ebp,T_MacroBlockActionDescr.BlkY4
  734. BuildOBMCPrediction:
  735. ; esi -- MacroBlockActionStream cursor
  736. ; ebp -- T_MacroBlockActionDescr.BlkYN
  737. ; edi -- Address at which to put prediction block
  738. mov edi,PredictionsBaseAddress
  739. mov eax,[esi+ebp*1].T_Blk.BlkOffset; BlkOffset
  740. add edi,eax ; Compute addr at which to put OBMC pred.
  741. mov eax,[esi+ebp*1].T_Blk.MVs ; al = horz MV; ah = vert MV.
  742. test eax,1
  743. mov edx,[esi+ebp*1].T_Blk.PastRef ; Fetch address for ref block.
  744. mov MBActionCursor,esi
  745. jne HorzInterpInCentralPred
  746. mov [esi+ebp*1].T_Blk.PastRef,edi ; Update address for ref block.
  747. test eax,0100H
  748. mov ecx,PITCH
  749. jne VertInterpInCentralPred
  750. ; No half pel interpolation for central point required. Just copy it.
  751. @@:
  752. mov eax,[edx+0]
  753. mov ebx,[edx+4]
  754. mov [edi+ 0],eax
  755. mov [edi+ 4],ebx
  756. mov [edi+ 8],eax
  757. mov [edi+12],ebx
  758. mov [edi+28],eax
  759. mov [edi+32],ebx
  760. add edx,PITCH
  761. add edi,PITCH
  762. add ebp,020000000H
  763. jnc @b
  764. sub edi,PITCH*8
  765. sub edx,PITCH*8-080000000H ; Address of ref, xor 10 in high 2 bits.
  766. jmp CentralPredGottenForOBMC
  767. HorzInterpInCentralPred:
  768. mov [esi+ebp*1].T_Blk.PastRef,edi ; Update address for ref block.
  769. test eax,0100H
  770. mov ecx,1
  771. jne BothInterpInCentralPred
  772. VertInterpInCentralPred:
  773. @@:
  774. mov eax,[edx+0]
  775. mov ebx,[edx+4]
  776. add eax,[edx+ecx+0]
  777. add ebx,[edx+ecx+4]
  778. add eax,001010101H
  779. add ebx,001010101H
  780. shr eax,1
  781. and ebx,0FEFEFEFEH
  782. shr ebx,1
  783. and eax,07F7F7F7FH
  784. mov [edi+ 0],eax
  785. mov [edi+ 4],ebx
  786. mov [edi+ 8],eax
  787. mov [edi+12],ebx
  788. mov [edi+28],eax
  789. mov [edi+32],ebx
  790. add edx,PITCH
  791. add edi,PITCH
  792. add ebp,020000000H
  793. jnc @b
  794. sub edi,PITCH*8
  795. sub edx,PITCH*8
  796. shl ecx,30
  797. xor edx,ecx ; Address of ref, xor 00 in high 2 bits if vertically
  798. ; ; interpolated; xor 01 if horizontally interpolated.
  799. jmp CentralPredGottenForOBMC
  800. BothInterpInCentralPred:
  801. @@:
  802. mov eax,[edx+1] ; <P04 P03 P02 P01> prediction pels.
  803. mov esi,001010101H ; Get 001010101H mask.
  804. mov ebx,[edx] ; <P03 P02 P01 P00>.
  805. add edi,4 ; Pre-increment OBMC prediction block pointer.
  806. mov ecx,[edx+PITCH+1] ; <P14 P13 P12 P11>.
  807. add eax,ebx ; <P04+P03 P03+P02 P02+P01 P01+P00>.
  808. mov ebx,[edx+PITCH] ; <P13 P12 P11 P10>.
  809. and esi,eax ; <(P04+P03)&1 ...>.
  810. shr eax,1 ; <(P04+P03)/2 ...> (dirty).
  811. add ebx,ecx ; <P14+P13 P13+P12 P12+P11 P11+P10>.
  812. and eax,07F7F7F7FH ; <(P04+P03)/2 ...> (clean).
  813. add ebx,esi ; <P14+P13+((P04+P03)&1) ...>.
  814. shr ebx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  815. add edx,4 ; Advance reference block pointer.
  816. and ebx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  817. add eax,001010101H ; <(P04+P03)/2+1 ...>.
  818. add ebx,eax ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  819. mov eax,4
  820. shr ebx,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  821. mov esi,MBActionCursor ; Speculatively restore esi.
  822. and ebx,07F7F7F7FH ; Interpolated prediction.
  823. and eax,edi
  824. mov [edi-4],ebx
  825. mov [edi+8-4],ebx
  826. mov [edi+28-4],ebx
  827. jne @b
  828. add edi,PITCH-8 ; Advance to next line of block.
  829. add edx,PITCH-8 ; Advance to next line of block.
  830. add ebp,020000000H ; Iterate 8 times. Quit when carry flag gets set.
  831. jnc @b
  832. sub edx,PITCH*8
  833. xor edx,0C0000000H ; Address of ref, xor 11 in high 2 bits.
  834. sub edi,PITCH*8
  835. CentralPredGottenForOBMC:
  836. ; At this point, the central contribution to OBMC prediction is in its scratch
  837. ; block, whose address has been written to PastRef in the block action descr.
  838. ;
  839. ; esi -- MacroBlockActionStream cursor
  840. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  841. ; edi -- Address at which to put prediction block
  842. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  843. ; 00 -- If central ref was interpolated vertically.
  844. ; 01 -- If central ref was interpolated horizontally.
  845. ; 10 -- If central ref was not interpolated.
  846. ; 11 -- If central ref was interpolated both ways.
  847. ; eax -- Offset to block descriptor for block to left.
  848. mov eax,DistToBlockToLeft
  849. lea ebx,[esi+ebp]
  850. add ebx,eax ; Address of block descriptor for block to the left.
  851. mov ecx,-SIZEOF T_MacroBlockActionDescr
  852. and ecx,ebx ; Address of macroblock descr for block to the left.
  853. mov ah,IsPlainPFrame ; 0 if P of PB; 1 if run-of-the-mill P frame.
  854. mov ebx,[ebx].T_Blk.MVs
  855. mov CentralRefAddrAndInterps,edx ; Stash function of ref addr and interps.
  856. mov al,[ecx].BlockType ; Bottom bit set if left neighbor is INTRA.
  857. mov cl,bh
  858. and al,ah ; 0 if PB frame or if not INTRA
  859. jne LeftPredGottenForOBMC ; Jump if INTRA in plain P frame. (Use central)
  860. shl ebx,24 ; Get horz MV in [24:31].
  861. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  862. sar ecx,1 ; CF==1 if interp vertically.
  863. jc InterpVertForTheLeftContrib
  864. shl ecx,25
  865. sar ebx,25 ; Sign extend horz MV. CF==1 if interp horizontally.
  866. jc InterpHorzForTheLeftContrib
  867. IF PITCH-384
  868. **** Magic leaks out if pitch not equal to 384
  869. ENDIF
  870. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  871. add eax,ebx ; Start accumulating left ref addr in eax.
  872. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  873. mov ebx,PreviousFrameBaseAddress
  874. add eax,ebx ; Continue to accumulate left ref addr in eax.
  875. xor edx,080000000H ; Hi 2 bits of central ref same as this ref if
  876. ; ; central ref also was not interpolated.
  877. add ecx,eax ; Finish accumulating left ref addr in ecx.
  878. cmp ecx,edx ; Is central ref the same?
  879. je LeftPredGottenForOBMC
  880. mov ebx,[ecx+PITCH*0]
  881. mov [edi+PITCH*0+8],ebx
  882. mov ebx,[ecx+PITCH*1]
  883. mov [edi+PITCH*1+8],ebx
  884. mov ebx,[ecx+PITCH*2]
  885. mov [edi+PITCH*2+8],ebx
  886. mov ebx,[ecx+PITCH*3]
  887. mov [edi+PITCH*3+8],ebx
  888. mov ebx,[ecx+PITCH*4]
  889. mov [edi+PITCH*4+8],ebx
  890. mov ebx,[ecx+PITCH*5]
  891. mov [edi+PITCH*5+8],ebx
  892. mov ebx,[ecx+PITCH*6]
  893. mov [edi+PITCH*6+8],ebx
  894. mov ebx,[ecx+PITCH*7]
  895. mov [edi+PITCH*7+8],ebx
  896. jmp LeftPredGottenForOBMC
  897. InterpVertForTheLeftContrib:
  898. shl ecx,25
  899. sar ebx,25 ; Sign extend horz MV. CF==1 if interp horizontally.
  900. jc InterpBothForTheLeftContrib
  901. IF PITCH-384
  902. **** Magic leaks out if pitch not equal to 384
  903. ENDIF
  904. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  905. add eax,ebx ; Start accumulating left ref addr in eax.
  906. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  907. mov ebx,PreviousFrameBaseAddress
  908. add ebx,eax ; Continue to accumulate left ref addr in eax.
  909. ; ; Hi 2 bits of central ref same as this ref if
  910. ; ; central ref also interpolated vertically.
  911. add ecx,ebx ; Finish accumulating left ref addr in ecx.
  912. mov ebx,PITCH
  913. cmp ecx,edx ; Is central ref the same?
  914. je LeftPredGottenForOBMC
  915. DoInterpHorzForTheLeftContrib:
  916. @@:
  917. mov eax,[ecx+0]
  918. add edi,PITCH
  919. mov edx,[ecx+ebx+0]
  920. add eax,001010101H
  921. add eax,edx
  922. add ecx,PITCH
  923. shr eax,1
  924. ;
  925. and eax,07F7F7F7FH
  926. add ebp,020000000H
  927. mov [edi+ 8-PITCH],eax
  928. jnc @b
  929. sub edi,PITCH*8
  930. jmp LeftPredGottenForOBMC
  931. InterpBothForTheLeftContrib:
  932. IF PITCH-384
  933. **** Magic leaks out if pitch not equal to 384
  934. ENDIF
  935. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  936. add eax,ebx ; Start accumulating left ref addr in eax.
  937. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  938. mov ebx,PreviousFrameBaseAddress
  939. add eax,ebx ; Continue to accumulate left ref addr in eax.
  940. xor edx,0C0000000H ; Hi 2 bits of central ref same as this ref if
  941. ; ; central ref also interpolated both ways.
  942. add ecx,eax ; Finish accumulating left ref addr in ecx.
  943. cmp ecx,edx ; Is central ref the same?
  944. je LeftPredGottenForOBMC
  945. @@:
  946. mov eax,[ecx+1] ; <P04 P03 P02 P01> prediction pels.
  947. mov esi,001010101H ; Get 001010101H mask.
  948. mov ebx,[ecx] ; <P03 P02 P01 P00>.
  949. add edi,PITCH ; Pre-increment OBMC prediction block pointer.
  950. mov edx,[ecx+PITCH+1] ; <P14 P13 P12 P11>.
  951. add eax,ebx ; <P04+P03 P03+P02 P02+P01 P01+P00>.
  952. mov ebx,[ecx+PITCH] ; <P13 P12 P11 P10>.
  953. and esi,eax ; <(P04+P03)&1 ...>.
  954. shr eax,1 ; <(P04+P03)/2 ...> (dirty).
  955. add ebx,edx ; <P14+P13 P13+P12 P12+P11 P11+P10>.
  956. and eax,07F7F7F7FH ; <(P04+P03)/2 ...> (clean).
  957. add ebx,esi ; <P14+P13+((P04+P03)&1) ...>.
  958. shr ebx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  959. add ecx,PITCH ; Advance reference block pointer.
  960. and ebx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  961. add eax,001010101H ; <(P04+P03)/2+1 ...>.
  962. add ebx,eax ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  963. shr ebx,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  964. mov esi,MBActionCursor ; Speculatively restore esi.
  965. and ebx,07F7F7F7FH ; Interpolated prediction.
  966. add ebp,020000000H ; Iterate 8 times. Quit when carry flag gets set.
  967. mov [edi+8-PITCH],ebx
  968. jnc @b
  969. sub edi,PITCH*8
  970. jmp LeftPredGottenForOBMC
  971. InterpHorzForTheLeftContrib:
  972. IF PITCH-384
  973. **** Magic leaks out if pitch not equal to 384
  974. ENDIF
  975. lea ecx,[ecx+ecx*2] ; Multiply vert by 3 (to affect mult by 384)
  976. add eax,ebx ; Start accumulating left ref addr in eax.
  977. sar ecx,18 ; Sign extend vert MV. It's now linearized.
  978. mov ebx,PreviousFrameBaseAddress
  979. add eax,ebx ; Continue to accumulate left ref addr in eax.
  980. xor edx,040000000H ; Hi 2 bits of central ref same as this ref if
  981. ; ; central ref also interpolated horizontally.
  982. add ecx,eax ; Finish accumulating left ref addr in ecx.
  983. mov ebx,1
  984. cmp ecx,edx ; Is central ref the same?
  985. jne DoInterpHorzForTheLeftContrib
  986. LeftPredGottenForOBMC:
  987. ; At this point, the left contribution to OBMC prediction is in its scratch
  988. ; half block. Now do the right contribution.
  989. ;
  990. ; esi -- MacroBlockActionStream cursor
  991. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  992. ; edi -- Address at which to put prediction block
  993. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  994. ; 00 -- If central ref was interpolated vertically.
  995. ; 01 -- If central ref was interpolated horizontally.
  996. ; 10 -- If central ref was not interpolated.
  997. ; 11 -- If central ref was interpolated both ways.
  998. ; eax -- Offset to block descriptor for block to right.
  999. mov eax,DistToBlockToRight
  1000. lea ebx,[esi+ebp]
  1001. add ebx,eax
  1002. mov ecx,-SIZEOF T_MacroBlockActionDescr
  1003. and ecx,ebx
  1004. mov ah,IsPlainPFrame
  1005. mov ebx,[ebx].T_Blk.MVs
  1006. mov edx,CentralRefAddrAndInterps ; Reload function of ref addr and interps.
  1007. mov al,[ecx].BlockType
  1008. mov cl,bh
  1009. and al,ah
  1010. jne RightPredGottenForOBMC
  1011. shl ebx,24
  1012. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  1013. sar ecx,1
  1014. jc InterpVertForTheRightContrib
  1015. shl ecx,25
  1016. sar ebx,25
  1017. jc InterpHorzForTheRightContrib
  1018. IF PITCH-384
  1019. **** Magic leaks out if pitch not equal to 384
  1020. ENDIF
  1021. lea ecx,[ecx+ecx*2]
  1022. add eax,ebx
  1023. sar ecx,18
  1024. mov ebx,PreviousFrameBaseAddress
  1025. add eax,ebx
  1026. xor edx,080000000H
  1027. add ecx,eax
  1028. cmp ecx,edx
  1029. je RightPredGottenForOBMC
  1030. mov ebx,[ecx+PITCH*0+4]
  1031. mov [edi+PITCH*0+12],ebx
  1032. mov ebx,[ecx+PITCH*1+4]
  1033. mov [edi+PITCH*1+12],ebx
  1034. mov ebx,[ecx+PITCH*2+4]
  1035. mov [edi+PITCH*2+12],ebx
  1036. mov ebx,[ecx+PITCH*3+4]
  1037. mov [edi+PITCH*3+12],ebx
  1038. mov ebx,[ecx+PITCH*4+4]
  1039. mov [edi+PITCH*4+12],ebx
  1040. mov ebx,[ecx+PITCH*5+4]
  1041. mov [edi+PITCH*5+12],ebx
  1042. mov ebx,[ecx+PITCH*6+4]
  1043. mov [edi+PITCH*6+12],ebx
  1044. mov ebx,[ecx+PITCH*7+4]
  1045. mov [edi+PITCH*7+12],ebx
  1046. jmp RightPredGottenForOBMC
  1047. InterpVertForTheRightContrib:
  1048. shl ecx,25
  1049. sar ebx,25
  1050. jc InterpBothForTheRightContrib
  1051. IF PITCH-384
  1052. **** Magic leaks out if pitch not equal to 384
  1053. ENDIF
  1054. lea ecx,[ecx+ecx*2]
  1055. add eax,ebx
  1056. sar ecx,18
  1057. mov ebx,PreviousFrameBaseAddress
  1058. add ebx,eax
  1059. add ecx,ebx
  1060. mov ebx,PITCH
  1061. cmp ecx,edx
  1062. je RightPredGottenForOBMC
  1063. DoInterpHorzForTheRightContrib:
  1064. @@:
  1065. mov eax,[ecx+4]
  1066. add edi,PITCH
  1067. mov edx,[ecx+ebx+4]
  1068. add eax,001010101H
  1069. add eax,edx
  1070. add ecx,PITCH
  1071. shr eax,1
  1072. ;
  1073. and eax,07F7F7F7FH
  1074. add ebp,020000000H
  1075. mov [edi+12-PITCH],eax
  1076. jnc @b
  1077. sub edi,PITCH*8
  1078. jmp RightPredGottenForOBMC
  1079. InterpBothForTheRightContrib:
  1080. IF PITCH-384
  1081. **** Magic leaks out if pitch not equal to 384
  1082. ENDIF
  1083. lea ecx,[ecx+ecx*2]
  1084. add eax,ebx
  1085. sar ecx,18
  1086. mov ebx,PreviousFrameBaseAddress
  1087. add eax,ebx
  1088. xor edx,0C0000000H
  1089. add ecx,eax
  1090. cmp ecx,edx
  1091. je RightPredGottenForOBMC
  1092. @@:
  1093. mov eax,[ecx+5]
  1094. mov esi,001010101H
  1095. mov ebx,[ecx+4]
  1096. add edi,PITCH
  1097. mov edx,[ecx+PITCH+5]
  1098. add eax,ebx
  1099. mov ebx,[ecx+PITCH+4]
  1100. and esi,eax
  1101. shr eax,1
  1102. add ebx,edx
  1103. and eax,07F7F7F7FH
  1104. add ebx,esi
  1105. shr ebx,1
  1106. add ecx,PITCH
  1107. and ebx,07F7F7F7FH
  1108. add eax,001010101H
  1109. add ebx,eax
  1110. shr ebx,1
  1111. mov esi,MBActionCursor
  1112. and ebx,07F7F7F7FH
  1113. add ebp,020000000H
  1114. mov [edi+12-PITCH],ebx
  1115. jnc @b
  1116. sub edi,PITCH*8
  1117. jmp RightPredGottenForOBMC
  1118. InterpHorzForTheRightContrib:
  1119. IF PITCH-384
  1120. **** Magic leaks out if pitch not equal to 384
  1121. ENDIF
  1122. lea ecx,[ecx+ecx*2]
  1123. add eax,ebx
  1124. sar ecx,18
  1125. mov ebx,PreviousFrameBaseAddress
  1126. add eax,ebx
  1127. xor edx,040000000H
  1128. add ecx,eax
  1129. mov ebx,1
  1130. cmp ecx,edx
  1131. jne DoInterpHorzForTheRightContrib
  1132. RightPredGottenForOBMC:
  1133. ; At this point, the left and right contributions to OBMC prediction are in
  1134. ; their scratch half blocks. Now do the contribution for the block above.
  1135. ;
  1136. ; esi -- MacroBlockActionStream cursor
  1137. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  1138. ; edi -- Address at which to put prediction block
  1139. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  1140. ; 00 -- If central ref was interpolated vertically.
  1141. ; 01 -- If central ref was interpolated horizontally.
  1142. ; 10 -- If central ref was not interpolated.
  1143. ; 11 -- If central ref was interpolated both ways.
  1144. ; eax -- Offset to block descriptor for block above.
  1145. mov eax,DistToBlockAbove
  1146. lea ebx,[esi+ebp]
  1147. add ebx,eax
  1148. mov ecx,-SIZEOF T_MacroBlockActionDescr
  1149. and ecx,ebx
  1150. mov ah,IsPlainPFrame
  1151. mov ebx,[ebx].T_Blk.MVs
  1152. mov edx,CentralRefAddrAndInterps
  1153. mov al,[ecx].BlockType
  1154. mov cl,bh
  1155. and al,ah
  1156. jne AbovePredGottenForOBMC
  1157. shl ebx,24
  1158. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  1159. sar ecx,1
  1160. jc InterpVertForTheAboveContrib
  1161. shl ecx,25
  1162. sar ebx,25
  1163. jc InterpHorzForTheAboveContrib
  1164. IF PITCH-384
  1165. **** Magic leaks out if pitch not equal to 384
  1166. ENDIF
  1167. lea ecx,[ecx+ecx*2]
  1168. add eax,ebx
  1169. sar ecx,18
  1170. mov ebx,PreviousFrameBaseAddress
  1171. add eax,ebx
  1172. xor edx,080000000H
  1173. add ecx,eax
  1174. cmp ecx,edx
  1175. je AbovePredGottenForOBMC
  1176. mov edx,[ecx+PITCH*0+0]
  1177. mov ebx,[ecx+PITCH*0+4]
  1178. mov [edi+PITCH*0+28],edx
  1179. mov [edi+PITCH*0+32],ebx
  1180. mov edx,[ecx+PITCH*1+0]
  1181. mov ebx,[ecx+PITCH*1+4]
  1182. mov [edi+PITCH*1+32],ebx
  1183. mov [edi+PITCH*1+28],edx
  1184. mov edx,[ecx+PITCH*2+0]
  1185. mov ebx,[ecx+PITCH*2+4]
  1186. mov [edi+PITCH*2+28],edx
  1187. mov [edi+PITCH*2+32],ebx
  1188. mov edx,[ecx+PITCH*3+0]
  1189. mov ebx,[ecx+PITCH*3+4]
  1190. mov [edi+PITCH*3+32],ebx
  1191. mov [edi+PITCH*3+28],edx
  1192. jmp AbovePredGottenForOBMC
  1193. InterpVertForTheAboveContrib:
  1194. shl ecx,25
  1195. sar ebx,25
  1196. jc InterpBothForTheAboveContrib
  1197. IF PITCH-384
  1198. **** Magic leaks out if pitch not equal to 384
  1199. ENDIF
  1200. lea ecx,[ecx+ecx*2]
  1201. add eax,ebx
  1202. sar ecx,18
  1203. mov ebx,PreviousFrameBaseAddress
  1204. add ebx,eax
  1205. add ecx,ebx
  1206. mov ebx,PITCH
  1207. cmp ecx,edx
  1208. je AbovePredGottenForOBMC
  1209. DoInterpHorzForTheAboveContrib:
  1210. @@:
  1211. mov eax,[ecx+0]
  1212. mov edx,[ecx+4]
  1213. add eax,[ecx+ebx+0]
  1214. add edx,[ecx+ebx+4]
  1215. add eax,001010101H
  1216. add edx,001010101H
  1217. shr eax,1
  1218. and edx,0FEFEFEFEH
  1219. shr edx,1
  1220. and eax,07F7F7F7FH
  1221. mov [edi+28],eax
  1222. mov [edi+32],edx
  1223. add ecx,PITCH
  1224. add edi,PITCH
  1225. add ebp,040000000H
  1226. jnc @b
  1227. sub edi,PITCH*4
  1228. jmp AbovePredGottenForOBMC
  1229. InterpBothForTheAboveContrib:
  1230. IF PITCH-384
  1231. **** Magic leaks out if pitch not equal to 384
  1232. ENDIF
  1233. lea ecx,[ecx+ecx*2]
  1234. add eax,ebx
  1235. sar ecx,18
  1236. mov ebx,PreviousFrameBaseAddress
  1237. add eax,ebx
  1238. xor edx,0C0000000H
  1239. add ecx,eax
  1240. cmp ecx,edx
  1241. je AbovePredGottenForOBMC
  1242. @@:
  1243. mov eax,[ecx+1]
  1244. mov esi,001010101H
  1245. mov ebx,[ecx]
  1246. add edi,4
  1247. mov edx,[ecx+PITCH+1]
  1248. add eax,ebx
  1249. mov ebx,[ecx+PITCH]
  1250. and esi,eax
  1251. shr eax,1
  1252. add ebx,edx
  1253. and eax,07F7F7F7FH
  1254. add ebx,esi
  1255. shr ebx,1
  1256. add ecx,4
  1257. and ebx,07F7F7F7FH
  1258. add eax,001010101H
  1259. add ebx,eax
  1260. mov eax,4
  1261. shr ebx,1
  1262. mov esi,MBActionCursor
  1263. and ebx,07F7F7F7FH
  1264. and eax,edi
  1265. mov [edi+28-4],ebx
  1266. jne @b
  1267. add edi,PITCH-8
  1268. add ecx,PITCH-8
  1269. add ebp,040000000H
  1270. jnc @b
  1271. sub edi,PITCH*4
  1272. jmp AbovePredGottenForOBMC
  1273. InterpHorzForTheAboveContrib:
  1274. IF PITCH-384
  1275. **** Magic leaks out if pitch not equal to 384
  1276. ENDIF
  1277. lea ecx,[ecx+ecx*2]
  1278. add eax,ebx
  1279. sar ecx,18
  1280. mov ebx,PreviousFrameBaseAddress
  1281. add eax,ebx
  1282. xor edx,040000000H
  1283. add ecx,eax
  1284. mov ebx,1
  1285. cmp ecx,edx
  1286. jne DoInterpHorzForTheAboveContrib
  1287. AbovePredGottenForOBMC:
  1288. ; At this point, the left, right, and above contributions to OBMC prediction
  1289. ; are in their scratch half blocks. Now do contribution for the block below.
  1290. ;
  1291. ; esi -- MacroBlockActionStream cursor
  1292. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  1293. ; edi -- Address at which to put prediction block
  1294. ; edx -- Address of central reference. High 2 bits xor'ed as follows:
  1295. ; 00 -- If central ref was interpolated vertically.
  1296. ; 01 -- If central ref was interpolated horizontally.
  1297. ; 10 -- If central ref was not interpolated.
  1298. ; 11 -- If central ref was interpolated both ways.
  1299. ; eax -- Offset to block descriptor for block above.
  1300. mov eax,DistToBlockBelow
  1301. lea ebx,[esi+ebp]
  1302. add ebx,eax
  1303. mov ecx,-SIZEOF T_MacroBlockActionDescr
  1304. and ecx,ebx
  1305. mov ah,IsPlainPFrame
  1306. mov ebx,[ebx].T_Blk.MVs
  1307. mov edx,CentralRefAddrAndInterps
  1308. mov al,[ecx].BlockType
  1309. mov cl,bh
  1310. and al,ah
  1311. jne BelowPredGottenForOBMC
  1312. shl ebx,24
  1313. mov eax,[esi+ebp*1].T_Blk.BlkOffset
  1314. sar ecx,1
  1315. jc InterpVertForTheBelowContrib
  1316. shl ecx,25
  1317. sar ebx,25
  1318. jc InterpHorzForTheBelowContrib
  1319. IF PITCH-384
  1320. **** Magic leaks out if pitch not equal to 384
  1321. ENDIF
  1322. lea ecx,[ecx+ecx*2]
  1323. add eax,ebx
  1324. sar ecx,18
  1325. mov ebx,PreviousFrameBaseAddress
  1326. add eax,ebx
  1327. xor edx,080000000H
  1328. add ecx,eax
  1329. cmp ecx,edx
  1330. je BelowPredGottenForOBMC
  1331. mov edx,[ecx+PITCH*4+0]
  1332. mov ebx,[ecx+PITCH*4+4]
  1333. mov [edi+PITCH*4+28],edx
  1334. mov [edi+PITCH*4+32],ebx
  1335. mov edx,[ecx+PITCH*5+0]
  1336. mov ebx,[ecx+PITCH*5+4]
  1337. mov [edi+PITCH*5+32],ebx
  1338. mov [edi+PITCH*5+28],edx
  1339. mov edx,[ecx+PITCH*6+0]
  1340. mov ebx,[ecx+PITCH*6+4]
  1341. mov [edi+PITCH*6+28],edx
  1342. mov [edi+PITCH*6+32],ebx
  1343. mov edx,[ecx+PITCH*7+0]
  1344. mov ebx,[ecx+PITCH*7+4]
  1345. mov [edi+PITCH*7+32],ebx
  1346. mov [edi+PITCH*7+28],edx
  1347. jmp BelowPredGottenForOBMC
  1348. InterpVertForTheBelowContrib:
  1349. shl ecx,25
  1350. sar ebx,25
  1351. jc InterpBothForTheBelowContrib
  1352. IF PITCH-384
  1353. **** Magic leaks out if pitch not equal to 384
  1354. ENDIF
  1355. lea ecx,[ecx+ecx*2]
  1356. add eax,ebx
  1357. sar ecx,18
  1358. mov ebx,PreviousFrameBaseAddress
  1359. add eax,ebx
  1360. add ecx,eax
  1361. mov ebx,PITCH
  1362. cmp ecx,edx
  1363. je BelowPredGottenForOBMC
  1364. DoInterpHorzForTheBelowContrib:
  1365. @@:
  1366. mov eax,[ecx+PITCH*4+0]
  1367. mov edx,[ecx+PITCH*4+4]
  1368. add eax,[ecx+ebx+PITCH*4+0]
  1369. add edx,[ecx+ebx+PITCH*4+4]
  1370. add eax,001010101H
  1371. add edx,001010101H
  1372. shr eax,1
  1373. and edx,0FEFEFEFEH
  1374. shr edx,1
  1375. and eax,07F7F7F7FH
  1376. mov [edi+PITCH*4+28],eax
  1377. mov [edi+PITCH*4+32],edx
  1378. add ecx,PITCH
  1379. add edi,PITCH
  1380. add ebp,040000000H
  1381. jnc @b
  1382. sub edi,PITCH*4
  1383. jmp BelowPredGottenForOBMC
  1384. InterpBothForTheBelowContrib:
  1385. IF PITCH-384
  1386. **** Magic leaks out if pitch not equal to 384
  1387. ENDIF
  1388. lea ecx,[ecx+ecx*2]
  1389. add eax,ebx
  1390. sar ecx,18
  1391. mov ebx,PreviousFrameBaseAddress
  1392. add eax,ebx
  1393. xor edx,0C0000000H
  1394. add ecx,eax
  1395. cmp ecx,edx
  1396. je BelowPredGottenForOBMC
  1397. @@:
  1398. mov eax,[ecx+PITCH*4+1]
  1399. mov esi,001010101H
  1400. mov ebx,[ecx+PITCH*4]
  1401. add edi,4
  1402. mov edx,[ecx+PITCH*5+1]
  1403. add eax,ebx
  1404. mov ebx,[ecx+PITCH*5]
  1405. and esi,eax
  1406. shr eax,1
  1407. add ebx,edx
  1408. and eax,07F7F7F7FH
  1409. add ebx,esi
  1410. shr ebx,1
  1411. add ecx,4
  1412. and ebx,07F7F7F7FH
  1413. add eax,001010101H
  1414. add ebx,eax
  1415. mov eax,4
  1416. shr ebx,1
  1417. mov esi,MBActionCursor
  1418. and ebx,07F7F7F7FH
  1419. and eax,edi
  1420. mov [edi+PITCH*4+28-4],ebx
  1421. jne @b
  1422. add edi,PITCH-8
  1423. add ecx,PITCH-8
  1424. add ebp,040000000H
  1425. jnc @b
  1426. sub edi,PITCH*4
  1427. jmp BelowPredGottenForOBMC
  1428. InterpHorzForTheBelowContrib:
  1429. IF PITCH-384
  1430. **** Magic leaks out if pitch not equal to 384
  1431. ENDIF
  1432. lea ecx,[ecx+ecx*2]
  1433. add eax,ebx
  1434. sar ecx,18
  1435. mov ebx,PreviousFrameBaseAddress
  1436. add eax,ebx
  1437. xor edx,040000000H
  1438. add ecx,eax
  1439. mov ebx,1
  1440. cmp ecx,edx
  1441. jne DoInterpHorzForTheBelowContrib
  1442. BelowPredGottenForOBMC:
  1443. ; At this point all the contributions to OBMC prediction are in their scratch
  1444. ; half blocks. Now combine them to get the OBMC prediction.
  1445. ;
  1446. ; ebp -- (Block_number - 1) * SIZEOF T_Blk
  1447. ; edi -- Address at which to put prediction block
  1448. @@:
  1449. mov eax,[edi+4] ; <C07 C05 C05 C04> or <C77 C76 C75 C74>
  1450. mov ebx,[edi+12] ; <R07 R06 R05 R04> or <R77 R76 R75 R74>
  1451. mov ecx,[edi+32] ; <A07 A06 A05 A04> or <B77 B76 B75 B74>
  1452. mov esi,[edi] ; <C03 C02 C01 C00> or <C73 C72 C71 C70>
  1453. lea edx,[eax+ebx] ; <junk C6+R6 C5+R5 C4+R4>
  1454. and ebx,0FF000000H ; <R7 __ __ __>
  1455. shr edx,1 ; <junk (C6+R6)/2 (C5+R5)/2 (C4+R4)/2> dirty
  1456. add ecx,ebx ; <A7+R7 A6 A5 A4>
  1457. and edx,0007F7F7FH ; <__ (C6+R6)/2 (C5+R5)/2 (C4+R4)/2> clean
  1458. mov ebx,[edi+8] ; <L03 L02 L01 L00> or <L73 L72 L71 L70>
  1459. add edx,ecx ; <(2A7+2R7)/2 (2A6+C5+R5)/2 ...>
  1460. add edi,PITCH*7 ; Move from line 0 to 7 (or 7 to 14)
  1461. shr edx,1 ; <(2A7+2R7)/4 (2A6+C5+R5)/4 ...> dirty
  1462. add ebx,esi ; <C3+L3 C2+L2 C1+L1 junk>
  1463. shr ebx,1 ; <(C3+L3)/2 (C2+L2)/2 (C1+L1)/2 junk> dirty
  1464. and edx,07F7F7F7FH ; <(2A7+2R7)/4 (2A6+C5+R5)/4 ...> clean
  1465. and ebx,07F7F7F7FH ; <(C3+L3)/2 (C2+L2)/2 (C1+L1)/2 junk> clean
  1466. mov ecx,[edi+28-PITCH*7] ; <A03 A02 A01 A00> or <B73 B72 B71 B70>
  1467. lea eax,[eax+edx+001010101H]; <(2A7+4C7+2R7+4)/4 (2A6+5C5+R5+4)/4 ...>
  1468. mov bl,[edi+8-PITCH*7] ; <(C3+L3)/2 (C2+L2)/2 (C1+L1)/2 L0>
  1469. shr eax,1 ; <(2A7+4C7+2R7+4)/8 (2A6+5C5+R5+4)/8 ...> dirty
  1470. add ebx,ecx ; <... (2A1+C1+L1)/2 (2A0+2L0)/2>
  1471. shr ebx,1 ; <... (2A1+C1+L1)/4 (2A0+2L0)/4> dirty
  1472. and eax,07F7F7F7FH ; <(2A7+4C7+2R7+4)/8 (2A6+5C5+R5+4)/8 ...> clean
  1473. and ebx,07F7F7F7FH ; <... (2A1+C1+L1)/4 (2A0+2L0)/4> clean
  1474. add esi,001010101H ; <C3+1 C2+1 C1+1 C0+1>
  1475. add ebx,esi ; <... (2A1+5C1+L1+4)/4 (2A0+4C0+2L0+4)/4>
  1476. mov [edi+4-PITCH*7],eax ; Store OBMC pred for pels 4-7 of line 0 or 7.
  1477. shr ebx,1 ; <... (2A1+5C1+L1+4)/8 (2A0+4C0+2L0+4)/8> dirty
  1478. lea esi,[edi-PITCH*13] ; Speculatively advance to line 1.
  1479. and ebx,07F7F7F7FH ; <... (2A1+5C1+L1+4)/8 (2A0+4C0+2L0+4)/8> clean
  1480. add ebp,080000000H
  1481. mov [edi-PITCH*7],ebx ; Store OBMC pred for pels 0-3 of line 0 or 7.
  1482. jnc @b
  1483. @@:
  1484. mov edx,[esi+28] ; <A13 A12 A11 A10> or <B63 B62 B61 B60>
  1485. mov eax,[esi+8] ; <L13 L12 L11 L10> or <L63 L62 L61 L60>
  1486. mov ecx,[esi+32] ; <A17 A16 A15 A14> or <B67 B66 B65 B64>
  1487. mov ebx,[esi+12] ; <R17 R16 R15 R14> or <R67 R66 R65 R64>
  1488. mov edi,[esi] ; <C13 C12 C11 C10> or <C63 C62 C61 C60>
  1489. add esi,PITCH*5 ; Move from line 1 to 6 (or 6 to 11)
  1490. xchg dx,ax ; edx: <A3 A2 L1 L0> eax: <L3 L2 A1 A0>
  1491. xchg cx,bx ; ecx: <A7 A6 R5 R4> ebx: <R7 R6 A5 A4>
  1492. add eax,edi ; <C3+L3 C2+L2 C1+A1 C0+A0>
  1493. mov edi,[esi+4-PITCH*5] ; <C17 C15 C15 C14> or <C67 C66 C65 C64>
  1494. shr eax,1 ; <(C3+L3)/2 (C2+L2)/2 (C1+A1)/2 (C0+A0)/2>dirty
  1495. add ecx,edi ; <C7+A7 C6+A6 C5+R5 C4+R4>
  1496. shr ecx,1 ; <(C7+A7)/2 (C6+A6)/2 (C5+R5)/2 (C4+R4)/2>dirty
  1497. and eax,07F7F7F7FH ; <(C3+L3)/2 (C2+L2)/2 (C1+A1)/2 (C0+A0)/2>clean
  1498. add eax,edx ; <(C3+L3+2A3)/2 ... (C1+2L1+A1)/2 ...>
  1499. and ecx,07F7F7F7FH ; <(C7+A7)/2 (C6+A6)/2 (C5+R5)/2 (C4+R4)/2>clean
  1500. shr eax,1 ; <(C3+L3+2A3)/4 ... (C1+2L1+A1)/4 ...> dirty
  1501. add ecx,ebx ; <(C7+2R7+A7)/2 ... (C5+R5+2A5)/2 ...>
  1502. mov ebx,[esi-PITCH*5] ; <C13 C12 C11 C10> or <C63 C62 C61 C60>
  1503. and eax,07F7F7F7FH ; <(C3+L3+2A3)/4 ... (C1+2L1+A1)/4 ...> clean
  1504. shr ecx,1 ; <(C7+2R7+A7)/4 ... (C5+R5+2A5)/4 ...> dirty
  1505. add edi,001010101H ; <C7+1 C6+1 C5+1 C4+1>
  1506. and ecx,07F7F7F7FH ; <(C7+2R7+A7)/4 ... (C5+R5+2A5)/4 ...> clean
  1507. lea eax,[eax+ebx+001010101H]; <(5C3+L3+2A3+4)/4 ... (5C1+2L1+A1)/4 ...>
  1508. shr eax,1 ; <(5C3+L3+2A3+4)/8 ... (5C1+2L1+A1)/8 ...>dirty
  1509. add ecx,edi ; <(5C7+2R7+A7+4)/4 ... (5C5+R5+2A5)/4 ...>
  1510. shr ecx,1 ; <(5C7+2R7+A7+4)/8 ... (5C5+R5+2A5)/8 ...>dirty
  1511. and eax,07F7F7F7FH ; <(5C3+L3+2A3+4)/8 ... (5C1+2L1+A1)/8 ...>clean
  1512. and ecx,07F7F7F7FH ; <(5C7+2R7+A7+4)/8 ... (5C5+R5+2A5)/8 ...>clean
  1513. mov [esi-PITCH*5],eax ; Store OBMC pred for pels 4-7 of line 1 or 6.
  1514. mov [esi+4-PITCH*5],ecx ; Store OBMC pred for pels 0-3 of line 1 or 6.
  1515. lea edi,[esi-PITCH*9] ; Speculatively advance to line 2.
  1516. add ebp,080000000H
  1517. jnc @b
  1518. @@:
  1519. mov eax,[edi+4] ; <C27 C26 C25 C24> ... <C57 C56 C55 C54>
  1520. mov ebx,[edi+12] ; <R27 R26 R25 R24> ... <R57 R56 R55 R54>
  1521. add bl,al ; <R7 R6 R5 C4+R4>
  1522. mov ecx,[edi] ; <C23 C22 C21 C20> ... <C53 C52 C51 C50>
  1523. shr bl,1 ; <R7 R6 R5 (C4+R4)/2>
  1524. mov edx,[edi+8] ; <L23 L22 L21 L20> ... <L53 L52 L51 L50>
  1525. add bh,ah ; <R7 R6 C5+R5 (C4+R4)/2>
  1526. add edx,ecx ; <C3+L3 C2+L2 junk junk>
  1527. shr bh,1 ; <2R7/2 2R6/2 (C5+R5)/2 (C4+R4)/2>
  1528. mov esi,[edi+32] ; <A27 A26 A25 A24> ... <B57 B56 B55 B54>
  1529. shr edx,1 ; <(C3+L3)/2 (C2+L2)/2 junk junk> dirty
  1530. add esi,eax ; <C7+A7 C6+A6 C5+A5 C4+A4>
  1531. shr esi,1 ; <(C7+A7)/2 (C6+A6)/2 (C5+A5)/2 (C4+A4)/2>dirty
  1532. and edx,07F7F7F7FH ; <(C3+L3)/2 (C2+L2)/2 junk junk> clean
  1533. and esi,07F7F7F7FH ; <(C7+A7)/2 (C6+A6)/2 (C5+A5)/2 (C4+A4)/2>clean
  1534. mov dl,[edi+8] ; <(C3+L3)/2 (C2+L2)/2 junk 2L0/2>
  1535. add esi,ebx ; <(C7+2R7+A7)/2 ... (2C5+R5+A5)/2 ...>
  1536. mov ebx,[edi+28] ; <A23 A22 A21 A20> ... <B53 B52 B51 B50>
  1537. shr esi,1 ; <(C7+2R7+A7)/4 ... (2C5+R5+A5)/4 ...> dirty
  1538. add ebx,ecx ; <C3+A3 C2+A2 C1+A1 C0+A0>
  1539. shr ebx,1 ; <(C3+A3)/2 (C2+A2)/2 (C1+A1)/2 (C0+A0)/2>dirty
  1540. and esi,07F7F7F7FH ; <(C7+2R7+A7)/4 ... (2C5+R5+A5)/4 ...> clean
  1541. and ebx,07F7F7F7FH ; <(C3+A3)/2 (C2+A2)/2 (C1+A1)/2 (C0+A0)/2>clean
  1542. mov dh,[edi+9] ; <(C3+L3)/2 (C2+L2)/2 2L1/2 2L0/2>
  1543. add ebx,edx ; <(2C3+L3+A3)/2 ... (C1+2L1+A1)/2 ...>
  1544. lea eax,[eax+esi+001010101H]; <(5C7+2R7+A7+4)/4 ... (6C5+R5+A5+4)/4 ...>
  1545. shr ebx,1 ; <(2C3+L3+A3)/4 ... (C1+2L1+A1)/4 ...> dirty
  1546. add ecx,001010101H ; <C3+1 C2+1 C1+1 C0+1>
  1547. shr eax,1 ; <(5C7+2R7+A7+4)/8 ... (6C5+R5+A5+4)/8...>dirty
  1548. and ebx,07F7F7F7FH ; <(2C3+L3+A3)/4 ... (C1+2L1+A1)/4 ...> clean
  1549. add ebx,ecx ; <(6C3+L3+A3+4)/4 ... (5C1+2L1+A1+4)/4 ...>
  1550. and eax,07F7F7F7FH ; <(5C7+2R7+A7+4)/8 ... (6C5+R5+A5+4)/8...>clean
  1551. shr ebx,1 ; <(6C3+L3+A3+4)/8 ... (5C1+2L1+A1+4)/8...>dirty
  1552. mov [edi+4],eax ; Store OBMC pred for pels 4-7 of line 2 thru 5.
  1553. and ebx,07F7F7F7FH ; <(6C3+L3+A3+4)/8 ... (5C1+2L1+A1+4)/8...>clean
  1554. mov [edi],ebx ; Store OBMC pred for pels 0-3 of line 2 thru 5.
  1555. add edi,PITCH ; Advance to next line.
  1556. add ebp,040000000H
  1557. jnc @b
  1558. mov esi,MBActionCursor
  1559. cmp ebp,T_MacroBlockActionDescr.BlkY2
  1560. jle Block1or2DescrBuilt
  1561. mov al,PB [esi].CodedBlocks
  1562. mov edi,CoeffStream ; Restore block descriptor ptr.
  1563. cmp ebp,T_MacroBlockActionDescr.BlkY3
  1564. je Block3DescrBuilt
  1565. Block4DescrBuilt:
  1566. shr al,5 ; Check if block 5 (U) empty.
  1567. lea edx,[esi].BlkU+4 ; Addr of block addr (plus 4).
  1568. sbb ebp,ebp ; -1 iff block not empty.
  1569. mov [edi].BlockAddr,edx ; Store address of block address.
  1570. shr al,1 ; Check if block 6 (Y) empty.
  1571. lea edx,[esi].BlkV+4 ; Addr of block addr (plus 4).
  1572. sbb ebx,ebx ; -1 iff block not empty.
  1573. and ebp,T_CoeffBlk ; 0 iff block empty, else inc.
  1574. and ebx,T_CoeffBlk ; 0 iff block empty, else inc.
  1575. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next macroblock descriptor.
  1576. mov [edi+ebp*1].BlockAddr,edx ; Store address of block address.
  1577. add edi,ebp ; Inc block descr ptr if blk non-empty.
  1578. add edi,ebx ; Inc block descr ptr if blk non-empty.
  1579. xor ebp,ebp
  1580. and al,1 ; Are we at end-of-stream?
  1581. je NextMacroBlock_OBMC
  1582. sub edi,SIZEOF T_CoeffBlk
  1583. jmp BlockActionStreamBuilt
  1584. ;; partial end of section only defined when H261 not defined.
  1585. ENDIF
  1586. BuildBlockActionDescr MACRO BlockNumber,AddrOffset
  1587. shr al,1 ; Check if block empty.
  1588. lea edi,[edi+ebp] ; Adjust BlockActionDescr cursor.
  1589. sbb ebp,ebp ; -1 iff block not empty.
  1590. lea edx,[esi].Blk[BlockNumber*SIZEOF T_Blk]+AddrOffset ; Addr of block addr.
  1591. and ebp,T_CoeffBlk ; 0 iff block empty, else inc.
  1592. mov [edi].BlockAddr,edx ; Store address of block address.
  1593. ENDM
  1594. IFNDEF H261
  1595. ;; more code only used when H261 not defined
  1596. MBIsIntraCoded_OBMC:
  1597. shr al,1 ; Same as BuildBlockActionDescr macro, except don't inc edi.
  1598. sbb ebp,ebp
  1599. lea edx,[esi].BlkY1
  1600. and ebp,T_CoeffBlk
  1601. mov [edi].BlockAddr,edx
  1602. BuildBlockActionDescr 1,0 ; If blk 2 non-empty, record BAD to do as intra.
  1603. BuildBlockActionDescr 2,0 ; blk 3
  1604. BuildBlockActionDescr 3,0 ; blk 4
  1605. BuildBlockActionDescr 4,0 ; blk 5
  1606. BuildBlockActionDescr 5,0 ; blk 6
  1607. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1608. add edi,ebp
  1609. test al,1 ; Are we at end-of-stream?
  1610. je NextMacroBlock_OBMC
  1611. sub edi,SIZEOF T_CoeffBlk
  1612. jmp BlockActionStreamBuilt
  1613. ;; end of section only defined when H261 not defined.
  1614. ENDIF
  1615. ;===============================================================================
  1616. ;===============================================================================
  1617. ; First pass builds block action stream from macroblock action stream.
  1618. ;===============================================================================
  1619. ;===============================================================================
  1620. ; esi -- MacroBlockActionStream cursor
  1621. ; edi -- BlockActionStream cursor
  1622. ; ebp -- Increment for BlockActionStream cursor
  1623. ; edx -- Address of a block to do
  1624. ; al -- Coded block pattern for I or P block
  1625. ; bl -- BlockType
  1626. NextMacroBlock:
  1627. mov bl,PB [esi].BlockType
  1628. mov al,PB [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  1629. ; Bit 4 set for non-empty U blk.
  1630. ; Bit 5 set for non-empty V blk.
  1631. ; Bit 6 clear except at stream end.
  1632. ; Bit 7 clear. Unused.
  1633. and bl,IsINTRA
  1634. jne MBIsIntraCoded
  1635. BuildBlockActionDescr 0,4 ; If blk 1 non-empty, record BAD to do as inter.
  1636. BuildBlockActionDescr 1,4 ; blk 2
  1637. BuildBlockActionDescr 2,4 ; blk 3
  1638. BuildBlockActionDescr 3,4 ; blk 4
  1639. BuildBlockActionDescr 4,4 ; blk 5
  1640. BuildBlockActionDescr 5,4 ; blk 6
  1641. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1642. and al,1 ; Are we at end-of-stream?
  1643. je NextMacroBlock
  1644. add edi,ebp
  1645. sub edi,SIZEOF T_CoeffBlk
  1646. jmp BlockActionStreamBuilt
  1647. MBIsIntraCoded:
  1648. BuildBlockActionDescr 0,0 ; If blk 1 non-empty, record BAD to do as intra.
  1649. BuildBlockActionDescr 1,0 ; blk 2
  1650. BuildBlockActionDescr 2,0 ; blk 3
  1651. BuildBlockActionDescr 3,0 ; blk 4
  1652. BuildBlockActionDescr 4,0 ; blk 5
  1653. BuildBlockActionDescr 5,0 ; blk 6
  1654. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1655. and al,1 ; Are we at end-of-stream?
  1656. je NextMacroBlock
  1657. add edi,ebp
  1658. sub edi,SIZEOF T_CoeffBlk
  1659. jmp BlockActionStreamBuilt
  1660. NextBMacroBlock:
  1661. ; esi -- MacroBlockActionStream cursor
  1662. ; edi -- BlockActionStream cursor
  1663. ; ebp -- Increment for BlockActionStream cursor
  1664. ; edx -- Address of a block to do
  1665. ; cl -- Used to compute defined columns mask case.
  1666. ; bh -- Coded block pattern for B block
  1667. ; bl -- Coded block pattern for I or P block
  1668. ; al -- Used to compute defined rows mask.
  1669. BuildBBlockActionDescr MACRO BlkNum,LinesDefFutureFrame,ColsDefFutureFrame
  1670. shr bh,1 ; Check if block empty.
  1671. mov cl,[esi].Blk[BlkNum*SIZEOF T_Blk].BestHMVb ; HMVb for block.
  1672. lea edi,[edi+ebp] ; Adjust BlockActionDescr.
  1673. mov al,[esi].Blk[BlkNum*SIZEOF T_Blk].BestVMVb ; VMVb for block.
  1674. sbb ebp,ebp ; -1 iff block not empty.
  1675. mov cl,ColsDefFutureFrame[ecx-96] ; Case of columns to do bidi.
  1676. and ebp,T_CoeffBlk ; 0 iff block empty, else inc.
  1677. mov al,LinesDefFutureFrame[eax-96] ; Mask for lines to do bidi.
  1678. mov [edi].LinesDefined,al ; Stash it.
  1679. mov edx,ColsDefined[ecx]
  1680. mov [edi].Cols03Defined,edx ; Stash it.
  1681. mov edx,ColsDefined[ecx+4]
  1682. mov [edi].Cols47Defined,edx ; Stash it.
  1683. lea edx,[esi].Blk[BlkNum*SIZEOF T_Blk]+8 ; Addr of block addr.
  1684. mov [edi].BlockAddr,edx ; Store address of blk address.
  1685. ENDM
  1686. mov ebx,PD [esi].CodedBlocks ; Bits 0- 3 set for non-empty Y blks.
  1687. ; Bit 4 set for non-empty U blk.
  1688. ; Bit 5 set for non-empty V blk.
  1689. ; Bit 6 clear except at stream end.
  1690. ; Bit 7 clear. Unused.
  1691. ; Bits 8-13 like bits 0-5, but for B frame.
  1692. ; Bit 14-15 clear. Unused.
  1693. BuildBBlockActionDescr 0, UpperYBlkLinesDef, LeftYBlkColsDef
  1694. BuildBBlockActionDescr 1, UpperYBlkLinesDef, RightYBlkColsDef
  1695. BuildBBlockActionDescr 2, LowerYBlkLinesDef, LeftYBlkColsDef
  1696. BuildBBlockActionDescr 3, LowerYBlkLinesDef, RightYBlkColsDef
  1697. BuildBBlockActionDescr 4, ChromaLinesDef, ChromaColsDef
  1698. BuildBBlockActionDescr 5, ChromaLinesDef, ChromaColsDef
  1699. add esi,SIZEOF T_MacroBlockActionDescr ; Move to next descriptor
  1700. and bl,040H ; Are we at end-of-stream?
  1701. je NextBMacroBlock
  1702. add edi,ebp
  1703. sub edi,SIZEOF T_CoeffBlk
  1704. BlockActionStreamBuilt:
  1705. mov CoeffStream,edi ; Stash address of last block of coeffs.
  1706. NextBlock:
  1707. ;===============================================================================
  1708. ;===============================================================================
  1709. ; Second pass performs frame differencing of Inters and Forward DCT.
  1710. ;===============================================================================
  1711. ;===============================================================================
  1712. mov eax,[edi].BlockAddr ; Fetch address of block to do
  1713. mov ebp,PITCH
  1714. test eax,4 ; Is it an Inter block.
  1715. jne InterOrOBMCBlock ; Jump if doing inter block.
  1716. mov edx,[eax].T_Blk.BlkOffset ; BlkOffset if INTRA; BestMVs if BiDi.
  1717. mov ecx,TargetFrameBaseAddress
  1718. add ecx,edx ; Target block address if INTRA
  1719. mov esi,[eax-8].T_Blk.BlkOffset ; Addr of BlkOffset if BiDi
  1720. IFNDEF H261
  1721. ;; H261 does not execute the BiDi code so it is included only when H261 is not defined
  1722. ;;
  1723. test eax,8 ; Is it a BiDi block?
  1724. jne BiDiBlock ; Jump if doing BiDi block.
  1725. ENDIF
  1726. IntraBlock:
  1727. ; Register usage:
  1728. ; ecx,edi -- Address of block.
  1729. ; ebp -- Pitch.
  1730. ; ebx, eax -- Scratch.
  1731. mov ebx,[ecx]
  1732. mov eax,[ecx+4]
  1733. mov P00,ebx
  1734. mov P04,eax
  1735. mov eax,[ecx+ebp*1]
  1736. mov edx,[ecx+ebp*1+4]
  1737. lea edi,[ecx+PITCH*5]
  1738. lea ecx,[ecx+ebp*2]
  1739. mov P10,eax
  1740. mov P14,edx
  1741. mov eax,[ecx]
  1742. mov edx,[ecx+4]
  1743. mov P20,eax
  1744. mov P24,edx
  1745. mov eax,[ecx+ebp*1]
  1746. mov edx,[ecx+ebp*1+4]
  1747. mov P30,eax
  1748. mov P34,edx
  1749. mov eax,[ecx+ebp*2]
  1750. mov edx,[ecx+ebp*2+4]
  1751. mov P40,eax
  1752. mov P44,edx
  1753. mov eax,[edi]
  1754. mov edx,[edi+4]
  1755. mov P50,eax
  1756. mov P54,edx
  1757. mov eax,[edi+ebp*1]
  1758. mov edx,[edi+ebp*1+4]
  1759. mov P60,eax
  1760. mov P64,edx
  1761. mov eax,[edi+ebp*2]
  1762. mov edx,[edi+ebp*2+4]
  1763. mov P74,edx
  1764. xor ecx,ecx
  1765. and ebx,00000007FH ; Fetch P0.
  1766. mov cl,P03 ; Fetch P3.
  1767. mov P70,eax
  1768. jmp DoForwardDCT
  1769. IFNDEF H261
  1770. ;; H261 does not execute the BiDi code so it is included only when H261 is not defined
  1771. ;;
  1772. BiDiBlock:
  1773. mov BlkActionDescrAddr,eax ; Extract VMVb.
  1774. mov ebp,FutureFrameBaseAddress
  1775. shr edx,25 ; CF == 1 iff VMVb is half pel.
  1776. mov bl,[edi].LinesDefined
  1777. lea esi,[esi+ebp-48] ; Addr 0-MV blk in Future P Frame.
  1778. mov ebp,[edi].Cols47Defined
  1779. IF PITCH-384
  1780. **** Magic leaks out if pitch not equal to 384
  1781. ENDIF
  1782. lea ecx,[edx+edx*2-48*3] ; Mult integer pel VMVb by PITCH.
  1783. mov edi,[edi].Cols03Defined
  1784. mov dl,[eax-8].T_Blk.BestHMVb ; Fetch HMVb.
  1785. jc InterpVert_FuturePFrame
  1786. shl ecx,7
  1787. shr dl,1 ; CF == 1 iff HMVb is half pel.
  1788. mov bh,bl
  1789. lea esi,[esi+ecx] ; Add VMVb contrib to block addr.
  1790. jc InterpHorz_FuturePFrame
  1791. add esi,edx ; Add HMVb contrib to block addr.
  1792. ; esi -- Future P Frame block address.
  1793. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  1794. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  1795. ; bl -- Mask of lines that are in range.
  1796. @@:
  1797. xor esp,4
  1798. add bl,bl ; 0A CF == 1 iff line 0 in range.
  1799. sbb eax,eax ; 0B eax == -1 if line 0 in range.
  1800. mov ecx,[esi] ; 0C Fetch Future P00:P03.
  1801. and eax,edi ; 0D In range among P00,P01,P02,P03.
  1802. add bl,bl ; 1A
  1803. sbb edx,edx ; 1B
  1804. mov Mask00+4,eax ; 0E Stash Mask for use with past pred.
  1805. and eax,ecx ; 0F Select in-range pels.
  1806. mov ecx,[esi+PITCH*1] ; 1C
  1807. mov P00+4,eax ; 0G Stash in-range pels.
  1808. and edx,edi ; 1D
  1809. mov Mask10+4,edx ; 1E
  1810. add bl,bl ; 2A
  1811. sbb eax,eax ; 2B
  1812. and edx,ecx ; 1F
  1813. mov P10+4,edx ; 1G
  1814. mov ecx,[esi+PITCH*2] ; 2C
  1815. and eax,edi ; 2D
  1816. add bl,bl ; 3A
  1817. sbb edx,edx ; 3B
  1818. mov Mask20+4,eax ; 2E
  1819. and eax,ecx ; 2F
  1820. mov ecx,[esi+PITCH*3] ; 3C
  1821. mov P20+4,eax ; 2G
  1822. and edx,edi ; 3D
  1823. mov Mask30+4,edx ; 3E
  1824. add bl,bl ; 4A
  1825. sbb eax,eax ; 4B
  1826. and edx,ecx ; 3F
  1827. mov P30+4,edx ; 3G
  1828. mov ecx,[esi+PITCH*4] ; 4C
  1829. and eax,edi ; 4D
  1830. add bl,bl ; 5A
  1831. sbb edx,edx ; 5B
  1832. mov Mask40+4,eax ; 4E
  1833. and eax,ecx ; 4F
  1834. mov ecx,[esi+PITCH*5] ; 5C
  1835. mov P40+4,eax ; 4G
  1836. and edx,edi ; 5D
  1837. mov Mask50+4,edx ; 5E
  1838. add bl,bl ; 6A
  1839. sbb eax,eax ; 6B
  1840. and edx,ecx ; 5F
  1841. mov P50+4,edx ; 5G
  1842. mov ecx,[esi+PITCH*6] ; 6C
  1843. and eax,edi ; 6D
  1844. add bl,bl ; 7A
  1845. sbb edx,edx ; 7B
  1846. mov Mask60+4,eax ; 6E
  1847. and eax,ecx ; 6F
  1848. mov ecx,[esi+PITCH*7] ; 7C
  1849. mov P60+4,eax ; 6G
  1850. and edx,edi ; 7D
  1851. mov Mask70+4,edx ; 7E
  1852. and edx,ecx ; 7F
  1853. mov P70+4,edx ; 7G
  1854. mov edi,ebp
  1855. mov edx,BlkActionDescrAddr
  1856. add esi,4
  1857. mov ecx,4
  1858. mov bl,bh
  1859. and ecx,esp
  1860. je @b
  1861. mov edi,[edx-8].T_Blk.BlkOffset
  1862. xor eax,eax
  1863. mov al,[edx-8].T_Blk.BestVMVf
  1864. jmp BiDiFuturePredDone
  1865. InterpVert_FuturePFrame:
  1866. shl ecx,7
  1867. shr dl,1 ; CF == 1 iff HMVb is half pel.
  1868. mov bh,bl
  1869. lea esi,[esi+ecx] ; Add VMVb contrib to block addr.
  1870. jc InterpBoth_FuturePFrame
  1871. add esi,edx ; Add HMVb contrib to block addr.
  1872. ; esi -- Future P Frame block address.
  1873. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  1874. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  1875. ; bl -- Mask of lines that are in range.
  1876. ; Interpolate Future Prediction Vertically.
  1877. @@:
  1878. xor esp,4
  1879. add bl,bl ; 0A CF == 1 iff line 0 in range.
  1880. sbb eax,eax ; 0B eax == -1 if line 0 in range.
  1881. mov ecx,[esi] ; 0C Fetch Future P00:P03.
  1882. and eax,edi ; 0D In range among P00,P01,P02,P03.
  1883. mov edx,[esi+PITCH*1] ; 0E Fetch Future P10:P13.
  1884. mov Mask00+4,eax ; 0F Stash Mask for use with past pred.
  1885. add ecx,edx ; 0G Add P00:P03 and P10:P13.
  1886. add ecx,001010101H ; 0H Add rounding.
  1887. shr ecx,1 ; 0I Interpolate (divide by 2).
  1888. add bl,bl ; 1A
  1889. sbb edx,edx ; 1B
  1890. and eax,ecx ; 0J Select in-range pels (and clean).
  1891. mov P00+4,eax ; 0K Stash in-range pels.
  1892. mov ecx,[esi+PITCH*1] ; 1C
  1893. and edx,edi ; 1D
  1894. mov eax,[esi+PITCH*2] ; 1E
  1895. mov Mask10+4,edx ; 1F
  1896. add ecx,eax ; 1G
  1897. add ecx,001010101H ; 1H
  1898. shr ecx,1 ; 1I
  1899. add bl,bl ; 2A
  1900. sbb eax,eax ; 2B
  1901. and edx,ecx ; 1J
  1902. mov P10+4,edx ; 1K
  1903. mov ecx,[esi+PITCH*2] ; 2C
  1904. and eax,edi ; 2D
  1905. mov edx,[esi+PITCH*3] ; 2E
  1906. mov Mask20+4,eax ; 2F
  1907. add ecx,edx ; 2G
  1908. add ecx,001010101H ; 2H
  1909. shr ecx,1 ; 2I
  1910. add bl,bl ; 3A
  1911. sbb edx,edx ; 3B
  1912. and eax,ecx ; 2J
  1913. mov P20+4,eax ; 2K
  1914. mov ecx,[esi+PITCH*3] ; 3C
  1915. and edx,edi ; 3D
  1916. mov eax,[esi+PITCH*4] ; 3E
  1917. mov Mask30+4,edx ; 3F
  1918. add ecx,eax ; 3G
  1919. add ecx,001010101H ; 3H
  1920. shr ecx,1 ; 3I
  1921. add bl,bl ; 4A
  1922. sbb eax,eax ; 4B
  1923. and edx,ecx ; 3J
  1924. mov P30+4,edx ; 3K
  1925. mov ecx,[esi+PITCH*4] ; 4C
  1926. and eax,edi ; 4D
  1927. mov edx,[esi+PITCH*5] ; 4E
  1928. mov Mask40+4,eax ; 4F
  1929. add ecx,edx ; 4G
  1930. add ecx,001010101H ; 4H
  1931. shr ecx,1 ; 4I
  1932. add bl,bl ; 5A
  1933. sbb edx,edx ; 5B
  1934. and eax,ecx ; 4J
  1935. mov P40+4,eax ; 4K
  1936. mov ecx,[esi+PITCH*5] ; 5C
  1937. and edx,edi ; 5D
  1938. mov eax,[esi+PITCH*6] ; 5E
  1939. mov Mask50+4,edx ; 5F
  1940. add ecx,eax ; 5G
  1941. add ecx,001010101H ; 5H
  1942. shr ecx,1 ; 5I
  1943. add bl,bl ; 6A
  1944. sbb eax,eax ; 6B
  1945. and edx,ecx ; 5J
  1946. mov P50+4,edx ; 5K
  1947. mov ecx,[esi+PITCH*6] ; 6C
  1948. and eax,edi ; 6D
  1949. mov edx,[esi+PITCH*7] ; 6E
  1950. mov Mask60+4,eax ; 6F
  1951. add ecx,edx ; 6G
  1952. add ecx,001010101H ; 6H
  1953. add esi,4
  1954. shr ecx,1 ; 6I
  1955. add bl,bl ; 7A
  1956. sbb edx,edx ; 7B
  1957. and eax,ecx ; 6J
  1958. mov P60+4,eax ; 6K
  1959. mov ecx,[esi+PITCH*7-4] ; 7C
  1960. and edx,edi ; 7D
  1961. mov eax,[esi+PITCH*8-4] ; 7E
  1962. mov Mask70+4,edx ; 7F
  1963. add ecx,eax ; 7G
  1964. add ecx,001010101H ; 7H
  1965. mov bl,bh
  1966. shr ecx,1 ; 7I
  1967. and edx,ecx ; 7J
  1968. mov P70+4,edx ; 7K
  1969. mov edi,ebp
  1970. mov edx,BlkActionDescrAddr
  1971. mov ecx,4
  1972. and ecx,esp
  1973. je @b
  1974. mov edi,[edx-8].T_Blk.BlkOffset
  1975. xor eax,eax
  1976. mov al,[edx-8].T_Blk.BestVMVf
  1977. jmp BiDiFuturePredDone
  1978. InterpHorz_FuturePFrame:
  1979. ; esi -- Future P Frame block address.
  1980. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  1981. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  1982. ; bl -- Mask of lines that are in range.
  1983. ; Interpolate Future Prediction Horizontally.
  1984. add esi,edx ; Add HMVb contrib to block addr.
  1985. @@:
  1986. xor esp,4
  1987. add bl,bl ; 0A CF == 1 iff line 0 in range.
  1988. sbb eax,eax ; 0B eax == -1 if line 0 in range.
  1989. mov ecx,[esi] ; 0C Fetch Future P00:P03.
  1990. and eax,edi ; 0D In range among P00,P01,P02,P03.
  1991. mov edx,[esi+1] ; 0E Fetch Future P01:P04.
  1992. mov Mask00+4,eax ; 0F Stash Mask for use with past pred.
  1993. add ecx,edx ; 0G Add P00:P03 and P01:P04.
  1994. add ecx,001010101H ; 0H Add rounding.
  1995. shr ecx,1 ; 0I Interpolate (divide by 2).
  1996. add bl,bl ; 1A
  1997. sbb edx,edx ; 1B
  1998. and eax,ecx ; 0J Select in-range pels (and clean).
  1999. mov P00+4,eax ; 0K Stash in-range pels.
  2000. mov ecx,[esi+PITCH*1] ; 1C
  2001. and edx,edi ; 1D
  2002. mov eax,[esi+PITCH*1+1] ; 1E
  2003. mov Mask10+4,edx ; 1F
  2004. add ecx,eax ; 1G
  2005. add ecx,001010101H ; 1H
  2006. shr ecx,1 ; 1I
  2007. add bl,bl ; 2A
  2008. sbb eax,eax ; 2B
  2009. and edx,ecx ; 1J
  2010. mov P10+4,edx ; 1K
  2011. mov ecx,[esi+PITCH*2] ; 2C
  2012. and eax,edi ; 2D
  2013. mov edx,[esi+PITCH*2+1] ; 2E
  2014. mov Mask20+4,eax ; 2F
  2015. add ecx,edx ; 2G
  2016. add ecx,001010101H ; 2H
  2017. shr ecx,1 ; 2I
  2018. add bl,bl ; 3A
  2019. sbb edx,edx ; 3B
  2020. and eax,ecx ; 2J
  2021. mov P20+4,eax ; 2K
  2022. mov ecx,[esi+PITCH*3] ; 3C
  2023. and edx,edi ; 3D
  2024. mov eax,[esi+PITCH*3+1] ; 3E
  2025. mov Mask30+4,edx ; 3F
  2026. add ecx,eax ; 3G
  2027. add ecx,001010101H ; 3H
  2028. shr ecx,1 ; 3I
  2029. add bl,bl ; 4A
  2030. sbb eax,eax ; 4B
  2031. and edx,ecx ; 3J
  2032. mov P30+4,edx ; 3K
  2033. mov ecx,[esi+PITCH*4] ; 4C
  2034. and eax,edi ; 4D
  2035. mov edx,[esi+PITCH*4+1] ; 4E
  2036. mov Mask40+4,eax ; 4F
  2037. add ecx,edx ; 4G
  2038. add ecx,001010101H ; 4H
  2039. shr ecx,1 ; 4I
  2040. add bl,bl ; 5A
  2041. sbb edx,edx ; 5B
  2042. and eax,ecx ; 4J
  2043. mov P40+4,eax ; 4K
  2044. mov ecx,[esi+PITCH*5] ; 5C
  2045. and edx,edi ; 5D
  2046. mov eax,[esi+PITCH*5+1] ; 5E
  2047. mov Mask50+4,edx ; 5F
  2048. add ecx,eax ; 5G
  2049. add ecx,001010101H ; 5H
  2050. shr ecx,1 ; 5I
  2051. add bl,bl ; 6A
  2052. sbb eax,eax ; 6B
  2053. and edx,ecx ; 5J
  2054. mov P50+4,edx ; 5K
  2055. mov ecx,[esi+PITCH*6] ; 6C
  2056. and eax,edi ; 6D
  2057. mov edx,[esi+PITCH*6+1] ; 6E
  2058. mov Mask60+4,eax ; 6F
  2059. add ecx,edx ; 6G
  2060. add ecx,001010101H ; 6H
  2061. add esi,4
  2062. shr ecx,1 ; 6I
  2063. add bl,bl ; 7A
  2064. sbb edx,edx ; 7B
  2065. and eax,ecx ; 6J
  2066. mov P60+4,eax ; 6K
  2067. mov ecx,[esi+PITCH*7-4] ; 7C
  2068. and edx,edi ; 7D
  2069. mov eax,[esi+PITCH*7+1-4] ; 7E
  2070. mov Mask70+4,edx ; 7F
  2071. add ecx,eax ; 7G
  2072. add ecx,001010101H ; 7H
  2073. mov bl,bh
  2074. shr ecx,1 ; 7I
  2075. and edx,ecx ; 7J
  2076. mov P70+4,edx ; 7K
  2077. mov edi,ebp
  2078. mov edx,BlkActionDescrAddr
  2079. mov ecx,4
  2080. and ecx,esp
  2081. je @b
  2082. mov edi,[edx-8].T_Blk.BlkOffset
  2083. xor eax,eax
  2084. mov al,[edx-8].T_Blk.BestVMVf
  2085. jmp BiDiFuturePredDone
  2086. InterpBoth_FuturePFrame:
  2087. add esi,edx ; Add HMVb contrib to block addr.
  2088. sub esp,68
  2089. ; esi -- Future P Frame block address.
  2090. ; edi -- Mask to apply to columns 0-3 of block to select columns in range.
  2091. ; ebp -- Mask to apply to columns 4-7 of block to select columns in range.
  2092. ; bl -- Mask of lines that are in range.
  2093. ; Interpolate Future Prediction Vertically.
  2094. @@:
  2095. add esp,8
  2096. mov eax,[esi] ; Fetch Future P00:P03.
  2097. mov ecx,001010101H ; Mask to extract halves.
  2098. mov edx,[esi+1] ; Fetch Future P01:P04.
  2099. add eax,edx ; <P04+P03 ...>.
  2100. mov edx,[esi+PITCH+1] ; Fetch Future P11:P14.
  2101. and ecx,eax ; <(P04+P03)&1 ...>.
  2102. add esi,PITCH ; Advance to next line.
  2103. xor eax,ecx ; <(P04+P03)/2*2 ...>.
  2104. add edx,ecx ; <P14+((P04+P03)&1) ...>.
  2105. shr eax,1 ; <(P04+P03)/2 ...>.
  2106. mov ecx,[esi] ; Fetch Future P10:P13.
  2107. add edx,ecx ; <P14+P13+((P04+P03)&1) ...>.
  2108. add eax,001010101H ; <(P04+P03)/2+1 ...>
  2109. shr edx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  2110. add bl,bl ; CF == 1 iff line 0 in range.
  2111. sbb ecx,ecx ; ecx == -1 if line 0 in range.
  2112. and edx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  2113. add eax,edx ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  2114. and ecx,edi ; In range among P00,P01,P02,P03.
  2115. shr eax,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  2116. mov Mask00+60,ecx ; Stash Mask for use with past prediction.
  2117. and eax,ecx ; Select in-range pels from future pred (and clean).
  2118. test esp,000000038H
  2119. mov P00+60,eax ; Stash in-range pels.
  2120. jne @b
  2121. sub esi,PITCH*8-4 ; Move to right 4 columns.
  2122. mov edx,BlkActionDescrAddr
  2123. mov edi,ebp
  2124. sub esp,60
  2125. mov ecx,4
  2126. mov bl,bh
  2127. and ecx,esp
  2128. jne @b
  2129. add esp,60
  2130. xor eax,eax
  2131. mov edi,[edx-8].T_Blk.BlkOffset
  2132. mov al,[edx-8].T_Blk.BestVMVf
  2133. BiDiFuturePredDone:
  2134. shr al,1 ; CF == 1 iff VMVf is half pel.
  2135. mov esi,TargetFrameBaseAddress
  2136. mov cl,[edx-8].T_Blk.BestHMVf
  2137. mov edx,DistFromTargetToPastP
  2138. lea edi,[edi+esi]
  2139. jc InterpVert_PastPFrame
  2140. shr cl,1 ; CF == 1 iff HMVf is half pel.
  2141. lea eax,[eax+eax*2-48*3] ; Mult integer pel VMVf by PITCH.
  2142. lea esi,[edi+edx-48] ; Addr 0-MV blk in Future P Frame.
  2143. jc InterpHorz_PastPFrame
  2144. shl eax,7
  2145. add esi,ecx ; Add HMVf contrib to block addr.
  2146. add esi,eax ; Add VMVf contrib to block addr.
  2147. sub esp,64
  2148. ; esi -- Past P Frame block address.
  2149. ; edi -- Target block address.
  2150. @@:
  2151. mov eax,[esi] ; 0A Fetch past prediction.
  2152. mov ebx,Mask00+64 ; 0B Fetch bidi-prediction mask.
  2153. mov ecx,P00+64 ; 0C Fetch future pred for bidi predicted pels.
  2154. and ebx,eax ; 0D Extract past for bidi predicted pels.
  2155. mov edx,[esi+4] ; 4A
  2156. mov ebp,Mask04+64 ; 4B
  2157. lea eax,[ecx+eax*2] ; 0E (2*Past) or ((2*Past+Future) for each pel.
  2158. mov ecx,P04+64 ; 4C
  2159. sub eax,ebx ; 0F (2*Past) or (Past+Future) for each pel.
  2160. and ebp,edx ; 4D
  2161. shr eax,1 ; 0G (Past) or ((Past+Future)/2) (dirty).
  2162. lea edx,[ecx+edx*2] ; 4E
  2163. and eax,07F7F7F7FH ; 0H (Past) or ((Past+Future)/2) (clean).
  2164. sub edx,ebp ; 4F
  2165. shr edx,1 ; 4G
  2166. mov ebx,[edi] ; 0I Fetch target pels.
  2167. and edx,07F7F7F7FH ; 4H
  2168. mov ebp,[edi+4] ; 4I
  2169. sub ebx,eax ; 0J Compute correction.
  2170. sub ebp,edx ; 4J
  2171. add ebx,080808080H ; 0K Bias correction.
  2172. add ebp,080808080H ; 4K
  2173. mov P00+64,ebx ; 0K Store correction.
  2174. mov P04+64,ebp ; 4K
  2175. add esi,PITCH
  2176. add esp,8
  2177. test esp,000000038H
  2178. lea edi,[edi+PITCH]
  2179. jne @b
  2180. xor ebx,ebx
  2181. xor ecx,ecx
  2182. mov bl,P00 ; Fetch P0.
  2183. mov cl,P03 ; Fetch P3.
  2184. jmp DoForwardDCT
  2185. InterpVert_PastPFrame:
  2186. shr cl,1 ; CF == 1 iff HMVf is half pel.
  2187. lea eax,[eax+eax*2-48*3] ; Mult integer pel VMVf by PITCH.
  2188. lea esi,[edi+edx-48] ; Addr 0-MV blk in Future P Frame.
  2189. jc InterpBoth_PastPFrame
  2190. shl eax,7
  2191. add esi,ecx ; Add HMVf contrib to block addr.
  2192. add esi,eax ; Add VMVf contrib to block addr.
  2193. sub esp,64
  2194. ; esi -- Past P Frame block address.
  2195. ; edi -- Target block address.
  2196. @@:
  2197. mov eax,[esi] ; 0A Fetch past prediction.
  2198. mov edx,[esi+4] ; 4A
  2199. add eax,[esi+PITCH] ; 0B Add past prediction with which to interpolate.
  2200. add edx,[esi+PITCH+4] ; 4B
  2201. add eax,001010101H ; 0C Add rounding.
  2202. add edx,001010101H ; 0C
  2203. shr eax,1 ; 0D Divide by two (dirty).
  2204. and edx,0FEFEFEFEH ; 1E
  2205. shr edx,1 ; 1D Clean.
  2206. and eax,07F7F7F7FH ; 0E
  2207. mov ebx,Mask00+64 ; 0F Fetch bidi-prediction mask.
  2208. mov ecx,P00+64 ; 0G Fetch future pred for bidi predicted pels.
  2209. and ebx,eax ; 0H Extract past for bidi predicted pels.
  2210. mov ebp,Mask04+64 ; 4F
  2211. lea eax,[ecx+eax*2] ; 0I (2*Past) or ((2*Past+Future) for each pel.
  2212. mov ecx,P04+64 ; 4G
  2213. sub eax,ebx ; 0J (2*Past) or (Past+Future) for each pel.
  2214. and ebp,edx ; 4H
  2215. shr eax,1 ; 0K (Past) or ((Past+Future)/2) (dirty).
  2216. lea edx,[ecx+edx*2] ; 4I
  2217. and eax,07F7F7F7FH ; 0L (Past) or ((Past+Future)/2) (clean).
  2218. sub edx,ebp ; 4J
  2219. shr edx,1 ; 4K
  2220. mov ebx,[edi] ; 0M Fetch target pels.
  2221. and edx,07F7F7F7FH ; 4L
  2222. mov ebp,[edi+4] ; 4M
  2223. sub ebx,eax ; 0N Compute correction.
  2224. sub ebp,edx ; 4N
  2225. add ebx,080808080H ; 0O Bias correction.
  2226. add ebp,080808080H ; 4O
  2227. mov P00+64,ebx ; 0P Store correction.
  2228. mov P04+64,ebp ; 4P
  2229. add esi,PITCH
  2230. add esp,8
  2231. test esp,000000038H
  2232. lea edi,[edi+PITCH]
  2233. jne @b
  2234. xor ebx,ebx
  2235. xor ecx,ecx
  2236. mov bl,P00 ; Fetch P0.
  2237. mov cl,P03 ; Fetch P3.
  2238. jmp DoForwardDCT
  2239. InterpHorz_PastPFrame:
  2240. shl eax,7
  2241. add esi,ecx ; Add HMVf contrib to block addr.
  2242. add esi,eax ; Add VMVf contrib to block addr.
  2243. sub esp,64
  2244. ; esi -- Past P Frame block address.
  2245. ; edi -- Target block address.
  2246. @@:
  2247. mov eax,[esi] ; 0A Fetch past prediction.
  2248. mov edx,[esi+4] ; 4A
  2249. add eax,[esi+1] ; 0B Add past prediction with which to interpolate.
  2250. add edx,[esi+5] ; 4B
  2251. add eax,001010101H ; 0C Add rounding.
  2252. add edx,001010101H ; 0C
  2253. shr eax,1 ; 0D Divide by two (dirty).
  2254. and edx,0FEFEFEFEH ; 1E
  2255. shr edx,1 ; 1D Clean.
  2256. and eax,07F7F7F7FH ; 0E
  2257. mov ebx,Mask00+64 ; 0F Fetch bidi-prediction mask.
  2258. mov ecx,P00+64 ; 0G Fetch future pred for bidi predicted pels.
  2259. and ebx,eax ; 0H Extract past for bidi predicted pels.
  2260. mov ebp,Mask04+64 ; 4F
  2261. lea eax,[ecx+eax*2] ; 0I (2*Past) or ((2*Past+Future) for each pel.
  2262. mov ecx,P04+64 ; 4G
  2263. sub eax,ebx ; 0J (2*Past) or (Past+Future) for each pel.
  2264. and ebp,edx ; 4H
  2265. shr eax,1 ; 0K (Past) or ((Past+Future)/2) (dirty).
  2266. lea edx,[ecx+edx*2] ; 4I
  2267. and eax,07F7F7F7FH ; 0L (Past) or ((Past+Future)/2) (clean).
  2268. sub edx,ebp ; 4J
  2269. shr edx,1 ; 4K
  2270. mov ebx,[edi] ; 0M Fetch target pels.
  2271. and edx,07F7F7F7FH ; 4L
  2272. mov ebp,[edi+4] ; 4M
  2273. sub ebx,eax ; 0N Compute correction.
  2274. sub ebp,edx ; 4N
  2275. add ebx,080808080H ; 0O Bias correction.
  2276. add ebp,080808080H ; 4O
  2277. mov P00+64,ebx ; 0P Store correction.
  2278. mov P04+64,ebp ; 4P
  2279. add esi,PITCH
  2280. add esp,8
  2281. test esp,000000038H
  2282. lea edi,[edi+PITCH]
  2283. jne @b
  2284. xor ebx,ebx
  2285. xor ecx,ecx
  2286. mov bl,P00 ; Fetch P0.
  2287. mov cl,P03 ; Fetch P3.
  2288. jmp DoForwardDCT
  2289. InterpBoth_PastPFrame:
  2290. shl eax,7
  2291. add esi,ecx ; Add HMVf contrib to block addr.
  2292. add esi,eax ; Add VMVf contrib to block addr.
  2293. sub esp,64
  2294. ; esi -- Past P Frame block address.
  2295. ; edi -- Target block address.
  2296. @@:
  2297. mov eax,[esi+1] ; 0A <P04 P03 P02 P01> prediction pels.
  2298. mov ebx,001010101H ; 0B Mask for extraction of halves.
  2299. mov ebp,[esi+PITCH+1] ; 0C <P14 P13 P12 P11>.
  2300. mov ecx,[esi] ; 0D <P03 P02 P01 P00>.
  2301. add eax,ecx ; 0E <P04+P03 P03+P02 P02+P01 P01+P00>.
  2302. mov ecx,[esi+PITCH] ; 0F <P13 P12 P11 P10>.
  2303. and ebx,eax ; 0G <(P04+P03)&1 ...>.
  2304. and eax,0FEFEFEFEH ; 0H Pre-Clean
  2305. shr eax,1 ; 0I <(P04+P03)/2 ...>.
  2306. add ecx,ebp ; 0J <P14+P13 P13+P12 P12+P11 P11+P10>.
  2307. add eax,001010101H ; 0K <(P04+P03)/2+1 ...>.
  2308. add ecx,ebx ; 0L <P14+P13+((P04+P03)&1) ...>.
  2309. shr ecx,1 ; 0M <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  2310. mov edx,[esi+5] ; 4A
  2311. and ecx,07F7F7F7FH ; 0M <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  2312. mov ebx,001010101H ; 4B
  2313. add eax,ecx ; 0N <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  2314. mov ebp,[esi+PITCH+5] ; 4C
  2315. shr eax,1 ; 0O <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>
  2316. mov ecx,[esi+4] ; 4D
  2317. and eax,07F7F7F7FH ; 0P Interpolated prediction.
  2318. add edx,ecx ; 4E
  2319. mov ecx,[esi+PITCH+4] ; 4F
  2320. and ebx,edx ; 4G
  2321. and edx,0FEFEFEFEH ; 4H
  2322. add ecx,ebp ; 4J
  2323. shr edx,1 ; 4I
  2324. add ecx,ebx ; 4L
  2325. shr ecx,1 ; 4M
  2326. add edx,001010101H ; 4K
  2327. and ecx,07F7F7F7FH ; 4M
  2328. mov ebx,Mask00+64 ; 0Q Fetch bidi-prediction mask.
  2329. add edx,ecx ; 4N
  2330. mov ecx,P00+64 ; 0R Fetch future pred for bidi predicted pels.
  2331. shr edx,1 ; 4O
  2332. and ebx,eax ; 0S Extract past for bidi predicted pels.
  2333. and edx,07F7F7F7FH ; 4P
  2334. mov ebp,Mask04+64 ; 4Q
  2335. lea eax,[ecx+eax*2] ; 0T (2*Past) or ((2*Past+Future) for each pel.
  2336. mov ecx,P04+64 ; 4R
  2337. sub eax,ebx ; 0U (2*Past) or (Past+Future) for each pel.
  2338. and ebp,edx ; 4S
  2339. shr eax,1 ; 0V (Past) or ((Past+Future)/2) (dirty).
  2340. lea edx,[ecx+edx*2] ; 4T
  2341. and eax,07F7F7F7FH ; 0W (Past) or ((Past+Future)/2) (clean).
  2342. sub edx,ebp ; 4U
  2343. shr edx,1 ; 4V
  2344. mov ebx,[edi] ; 0X Fetch target pels.
  2345. and edx,07F7F7F7FH ; 4W
  2346. mov ebp,[edi+4] ; 4X
  2347. sub ebx,eax ; 0Y Compute correction.
  2348. sub ebp,edx ; 4Y
  2349. add ebx,080808080H ; 0Z Bias correction.
  2350. add ebp,080808080H ; 4Z
  2351. mov P00+64,ebx ; 0a Store correction.
  2352. mov P04+64,ebp ; 4a
  2353. add esi,PITCH
  2354. add esp,8
  2355. test esp,000000038H
  2356. lea edi,[edi+PITCH]
  2357. jne @b
  2358. xor ebx,ebx
  2359. xor ecx,ecx
  2360. mov bl,P00 ; Fetch P0.
  2361. mov cl,P03 ; Fetch P3.
  2362. jmp DoForwardDCT
  2363. ;; end of section of code not define when H261 defined
  2364. ENDIF
  2365. InterOrOBMCBlock:
  2366. mov esi,TargetFrameBaseAddress
  2367. mov edi,[eax-4].T_Blk.BlkOffset ; Compute Addr of Target block.
  2368. IFNDEF H261
  2369. ;; H261 does not execute the OBMC code so it is included only when H261 is not defined
  2370. ;;
  2371. test eax,8
  2372. jne OBMCBlock
  2373. ENDIF
  2374. add edi,esi
  2375. mov esi,[eax-4].T_Blk.PastRef ; Addr of PrevRef block.
  2376. mov eax,[eax-4].T_Blk.MVs ; al = Horz MV; ah = Vert MV
  2377. mov ecx,080808080H
  2378. IFNDEF H261
  2379. ;; H261 does not execute Interp code so it is included only when H261 is not defined
  2380. ;;
  2381. test al,1
  2382. jne InterpHorzOrBoth
  2383. ENDIF
  2384. lea edx,[ebp+ebp*2]
  2385. lea ebx,[esi+ebp]
  2386. test ah,1
  2387. je NoInterp
  2388. IFNDEF H261
  2389. ;; H261 does not execute Interp code so it is included only when H261 is not defined
  2390. ;;
  2391. InterpVert:
  2392. InterpHorz:
  2393. ; Register usage:
  2394. ; edi -- Address of target block.
  2395. ; esi -- Address of reference block.
  2396. ; ebx -- Address of reference plus either 1 or PITCH, for interpolation.
  2397. ; ebp, edx, ecx, eax -- Scratch.
  2398. sub esp,16
  2399. @@:
  2400. add esp,4
  2401. mov eax,[esi] ; 0A <P03 P02 P01 P00> prediction pels.
  2402. mov ecx,[ebx] ; 0B <P04 ...> or <P13 ...> prediction pels.
  2403. mov edx,[edi] ; 0C <C03 C02 C01 C00> current pels.
  2404. add edx,080808080H ; 0D Add bias.
  2405. mov ebp,[esi+PITCH*2] ; 2A
  2406. lea eax,[eax+ecx+001010101H]; 0E Sum of pred pels to interpolate.
  2407. mov ecx,[ebx+PITCH*2] ; 2B
  2408. shr eax,1 ; 0F Average of prediction pels (dirty).
  2409. and eax,07F7F7F7FH ; 0G Average of prediction pels (clean).
  2410. lea ebp,[ebp+ecx+001010101H]; 2E
  2411. sub edx,eax ; 0H Current - interpolated prediction, biased.
  2412. mov eax,[edi+PITCH*2] ; 2C
  2413. mov P00+12,edx ; 0I Save correction.
  2414. add eax,080808080H ; 2D
  2415. shr ebp,1 ; 2F
  2416. mov edx,[esi+PITCH*4] ; 4A
  2417. and ebp,07F7F7F7FH ; 2G
  2418. mov ecx,[ebx+PITCH*4] ; 4B
  2419. sub eax,ebp ; 2H
  2420. mov ebp,[edi+PITCH*4] ; 4C
  2421. mov P20+12,eax ; 2I
  2422. lea ecx,[ecx+edx+001010101H]; 4E
  2423. shr ecx,1 ; 4F
  2424. add ebp,080808080H ; 4D
  2425. and ecx,07F7F7F7FH ; 4G
  2426. mov eax,[esi+PITCH*6] ; 6A
  2427. sub ebp,ecx ; 4H
  2428. mov ecx,[ebx+PITCH*6] ; 6B
  2429. mov P40+12,ebp ; 4I
  2430. mov ebp,[edi+PITCH*6] ; 6C
  2431. lea ecx,[ecx+eax+001010101H]; 6E
  2432. add ebp,080808080H ; 6D
  2433. shr ecx,1 ; 6F
  2434. add esi,4
  2435. and ecx,07F7F7F7FH ; 6G
  2436. add ebx,4
  2437. sub ebp,ecx ; 6H
  2438. add edi,4
  2439. test esp,4
  2440. mov P60+12,ebp ; 6I
  2441. je @b
  2442. add esi,PITCH-8
  2443. add edi,PITCH-8
  2444. test esp,8
  2445. lea ebx,[ebx+PITCH-8]
  2446. jne @b
  2447. xor ebx,ebx
  2448. xor ecx,ecx
  2449. mov bl,P00 ; Fetch P0.
  2450. mov cl,P03 ; Fetch P3.
  2451. jmp DoForwardDCT
  2452. InterpHorzOrBoth:
  2453. lea ebx,[esi+1]
  2454. test ah,1
  2455. je InterpHorz
  2456. InterpBoth:
  2457. ; Register usage:
  2458. ; edi -- Address of target block.
  2459. ; esi -- Address of reference block.
  2460. ; ecx -- bias value 0x80808080, to make code size smaller.
  2461. ; ebp -- Pitch and scratch.
  2462. ; edx, ebx, eax -- Scratch.
  2463. sub esp,64
  2464. @@:
  2465. mov eax,[esi+1] ; <P04 P03 P02 P01> prediction pels.
  2466. lea edx,[ecx*2+1] ; Get 001010101H mask.
  2467. mov ebx,[esi] ; <P03 P02 P01 P00>.
  2468. add edi,4 ; Pre-increment target block pointer.
  2469. add eax,ebx ; <P04+P03 P03+P02 P02+P01 P01+P00>.
  2470. mov ebx,[esi+ebp*1+1] ; <P14 P13 P12 P11>.
  2471. and edx,eax ; <(P04+P03)&1 ...>.
  2472. mov ebp,[esi+ebp*1] ; <P13 P12 P11 P10>.
  2473. xor eax,edx ; Clear insignificant fractional bit in each byte.
  2474. add ebx,ebp ; <P14+P13 P13+P12 P12+P11 P11+P10>.
  2475. shr eax,1 ; <(P04+P03)/2 ...>.
  2476. add ebx,edx ; <P14+P13+((P04+P03)&1) ...>.
  2477. shr ebx,1 ; <(P14+P13+((P04+P03)&1))/2 ...> (dirty).
  2478. add esi,4 ; Advance reference block pointer.
  2479. and ebx,07F7F7F7FH ; <(P14+P13+((P04+P03)&1))/2 ...> (clean).
  2480. lea eax,[eax+ecx*2+1] ; <(P04+P03)/2+1 ...>.
  2481. add eax,ebx ; <(P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2 ...>.
  2482. mov ebx,[edi-4] ; <C03 C02 C01 C00> current pels.
  2483. shr eax,1 ; <((P04+P03)/2+1+(P14+P13+((P04+P03)&1))/2)/2 ...>.
  2484. add ebx,ecx ; Add bias.
  2485. and eax,07F7F7F7FH ; Interpolated prediction.
  2486. add esp,4 ; Advance frame difference pointer.
  2487. sub ebx,eax ; Correction.
  2488. mov ebp,PITCH ; Reload Pitch.
  2489. test esp,4
  2490. mov P00+60,ebx ; Save correction.
  2491. je @b
  2492. lea esi,[esi+ebp-8]
  2493. xor ebx,ebx
  2494. test esp,000000038H
  2495. lea edi,[edi+ebp-8]
  2496. jne @b
  2497. mov bl,P00 ; Fetch P0.
  2498. xor ecx,ecx
  2499. mov cl,P03 ; Fetch P3.
  2500. jmp DoForwardDCT
  2501. OBMCBlock: ; Do OBMC frame differencing. OBMC prediction computed above.
  2502. mov ecx,080808080H
  2503. mov edi,[eax-12].T_Blk.BlkOffset ; Compute Addr of Target block.
  2504. add edi,esi
  2505. mov esi,[eax-12].T_Blk.PastRef ; Addr of PrevRef block.
  2506. lea edx,[ebp+ebp*2]
  2507. lea ebx,[esi+ebp]
  2508. ;; end of section of code not included when H261 defined
  2509. ENDIF
  2510. NoInterp:
  2511. ; Register usage:
  2512. ; edi -- Address of target block.
  2513. ; esi -- Address of reference block.
  2514. ; ebp -- Pitch.
  2515. ; edx -- Pitch times 3.
  2516. ; ecx -- bias value 0x80808080, to make code size smaller.
  2517. ; ebx, eax -- Scratch.
  2518. @@:
  2519. xor esp,4 ; 1st time: Back off to cache line;
  2520. mov eax,[edi] ; 0A <C3 C2 C1 C0> current pels.
  2521. add eax,ecx ; 0C Add bias.
  2522. mov ebx,[esi] ; 0B <P3 P2 P1 P0> prediction pels.
  2523. sub eax,ebx ; 0D <Cn-Pn> Current - pred, biased.
  2524. mov ebx,[esi+ebp*1] ; 1B
  2525. mov P00+4,eax ; 0E Save <Corr3 Corr2 Corr1 Corr0>
  2526. mov eax,[edi+ebp*1] ; 1A
  2527. sub eax,ebx ; 1D
  2528. mov ebx,[esi+ebp*2] ; 2B
  2529. add eax,ecx ; 1C
  2530. sub ebx,ecx ; 2C
  2531. mov P10+4,eax ; 1E
  2532. mov eax,[edi+ebp*2] ; 2A
  2533. sub eax,ebx ; 2D
  2534. mov ebx,[esi+ebp*4] ; 4B
  2535. mov P20+4,eax ; 2E
  2536. mov eax,[edi+ebp*4] ; 4A
  2537. sub eax,ebx ; 4D
  2538. mov ebx,[esi+edx*1] ; 3B
  2539. add eax,ecx ; 4C
  2540. sub ebx,ecx ; 3C
  2541. mov P40+4,eax ; 4E
  2542. mov eax,[edi+edx*1] ; 3A
  2543. sub eax,ebx ; 3D
  2544. mov ebx,[esi+edx*2] ; 6B
  2545. mov P30+4,eax ; 3E
  2546. lea esi,[esi+ebp+4] ; Advance to line 1.
  2547. mov eax,[edi+edx*2] ; 6A
  2548. lea edi,[edi+ebp+4] ; Advance to line 1.
  2549. sub eax,ebx ; 6D
  2550. mov ebx,[esi+ebp*4-4] ; 5B
  2551. add eax,ecx ; 6C
  2552. sub ebx,ecx ; 5C
  2553. mov P60+4,eax ; 6E
  2554. mov eax,[edi+ebp*4-4] ; 5A
  2555. sub eax,ebx ; 5D
  2556. mov ebx,[esi+edx*2-4] ; 7B
  2557. mov P50+4,eax ; 5E
  2558. mov eax,[edi+edx*2-4] ; 7A
  2559. sub eax,ebx ; 7D
  2560. sub edi,ebp ; Back off to line 0.
  2561. add eax,ecx ; 7C
  2562. sub esi,ebp ; Back off to line 0.
  2563. test esp,4 ; Do twice.
  2564. mov P70+4,eax ; 7E
  2565. je @b
  2566. xor ecx,ecx
  2567. xor ebx,ebx
  2568. mov bl,P00 ; Fetch P0.
  2569. mov cl,P03 ; Fetch P3.
  2570. DoForwardDCT:
  2571. ;=============================================================================
  2572. ;
  2573. ; This section does the Forward Discrete Cosine Transform. It performs a DCT
  2574. ; on a 8*8 block of pels or pel differences. The row transforms are done
  2575. ; first using a table lookup method. Then the columns are done, using
  2576. ; computation.
  2577. ;
  2578. ;
  2579. ; Each intermediate and coefficient is a short. There are four fractional
  2580. ; bits. All coefficients except an intrablock's DC are biased by 08000H.
  2581. ; Perform row transforms.
  2582. ;
  2583. ; Register usage:
  2584. ; ebp - Accumulator for contributions to intermediates I0 (hi) and I2 (lo).
  2585. ; edi - Accumulator for contributions to intermediates I1 (hi) and I3 (lo).
  2586. ; esi - Accumulator for contributions to intermediates I4 (hi) and I6 (lo).
  2587. ; edx - Accumulator for contributions to intermediates I7 (hi) and I5 (lo).
  2588. ; ecx - Pel or pel difference.
  2589. ; ebx - Pel or pel difference.
  2590. ; eax - Place in which to fetch a pel's contribution to two intermediates.
  2591. mov esi,PD P80000_P4545F [ebx*8] ; P0's contribution to I4|I6.
  2592. mov eax,PD P80000_N4545F [ecx*8] ; P3's contribution to I4|I6.
  2593. mov edx,PD P2350B_P6491A [ebx*8] ; P0's contribution to I7|I5.
  2594. mov edi,PD NB18A8_P96831 [ecx*8] ; P3's contribution to I7|I5.
  2595. lea esi,[esi+eax+40004000H] ; P0, P3 contribs to I4|I6, biased.
  2596. mov eax,PD P80000_NA73D7 [ecx*8] ; P3's contribution to I0|I2.
  2597. lea edx,[edx+edi+40004000H] ; P0, P3 contribs to I7|I5, biased.
  2598. mov ebp,PD P80000_PA73D7 [ebx*8] ; P0's contribution to I0|I2.
  2599. mov edi,PD P2350B_N6491A [ecx*8] ; P3's contribution to I1|I3.
  2600. mov cl,P01 ; Fetch P1.
  2601. lea ebp,[ebp+eax+40004000H] ; P0, P3 contribs to I0|I2, biased.
  2602. mov eax,PD NB18A8_N96831 [ebx*8] ; P0's contribution to I1|I3.
  2603. sub edi,eax ; P0, P3 contribs to I1|I3, unbiased.
  2604. mov eax,PD P80000_P4545F [ecx*8] ; P1's contribution to I0|I2.
  2605. add ebp,eax ; P0, P1, P3 contribs to I0|I2.
  2606. mov eax,PD N96831_P2350B [ecx*8] ; P1's contribution to I1|I3.
  2607. sub edi,eax ; P0, P1, P3 contribs to I1|I3, unbiased.
  2608. mov eax,PD P80000_PA73D7 [ecx*8] ; P1's contribution to I4|I6.
  2609. sub esi,eax ; P0, P1, P3 contribs to I4|I6.
  2610. mov bl,P02 ; Fetch P2.
  2611. mov eax,PD P6491A_PB18A8 [ecx*8] ; P1's contribution to I7|I5.
  2612. mov cl,P04 ; Fetch P4.
  2613. sub edx,eax ; P0, P1, P3 contribs to I7|I5.
  2614. mov eax,PD P80000_N4545F [ebx*8] ; P2's contribution to I0|I2.
  2615. add ebp,eax ; P0-P3 contribs to I0|I2.
  2616. mov eax,PD P6491A_NB18A8 [ebx*8] ; P2's contribution to I1|I3.
  2617. add edi,eax ; P0-P3 contribs to I1|I3, unbiased.
  2618. mov eax,PD P80000_NA73D7 [ebx*8] ; P2's contribution to I4|I6.
  2619. sub esi,eax ; P0-P3 contribs to I4|I6.
  2620. mov eax,PD N96831_N2350B [ebx*8] ; P2's contribution to I7|I5.
  2621. sub edx,eax ; P0-P3 contribs to I7|I5.
  2622. mov eax,PD P80000_NA73D7 [ecx*8] ; P4's contribution to I0|I2.
  2623. add ebp,eax ; P0-P4 contribs to I0|I2.
  2624. mov eax,PD P2350B_N6491A [ecx*8] ; P4's contribution to I1|I3.
  2625. sub edi,eax ; P0-P4 contribs to I1|I3, unbiased.
  2626. mov eax,PD P80000_N4545F [ecx*8] ; P4's contribution to I4|I6.
  2627. add esi,eax ; P0-P4 contribs to I4|I6.
  2628. mov bl,P05 ; Fetch P5.
  2629. mov eax,PD NB18A8_P96831 [ecx*8] ; P4's contribution to I7|I5.
  2630. mov cl,P06 ; Fetch P6.
  2631. sub edx,eax ; P0-P4 contribs to I7|I5.
  2632. mov eax,PD P80000_N4545F [ebx*8] ; P5's contribution to I0|I2.
  2633. add ebp,eax ; P0-P5 contribs to I0|I2.
  2634. mov eax,PD P6491A_NB18A8 [ebx*8] ; P5's contribution to I1|I3.
  2635. sub edi,eax ; P0-P5 contribs to I1|I3.
  2636. mov eax,PD P80000_NA73D7 [ebx*8] ; P5's contribution to I4|I6.
  2637. sub esi,eax ; P0-P5 contribs to I4|I6.
  2638. mov eax,PD N96831_N2350B [ebx*8] ; P5's contribution to I7|I5.
  2639. add edx,eax ; P0-P5 contribs to I3|I4.
  2640. mov eax,PD P80000_P4545F [ecx*8] ; P6's contribution to I0|I2.
  2641. add ebp,eax ; P0-P6 contribs to I0|I2.
  2642. mov eax,PD N96831_P2350B [ecx*8] ; P6's contribution to I1|I3.
  2643. add edi,eax ; P0-P6 contribs to I1|I3, unbiased.
  2644. mov eax,PD P80000_PA73D7 [ecx*8] ; P6's contribution to I4|I6.
  2645. sub esi,eax ; P0-P6 contribs to I4|I6.
  2646. mov bl,P07 ; Fetch P7.
  2647. mov eax,PD P6491A_PB18A8 [ecx*8] ; P6's contribution to I7|I5.
  2648. mov cl,P13 ; Fetch P0.
  2649. add edx,eax ; P0-P6 contribs to I7|I5.
  2650. mov eax,PD P80000_PA73D7 [ebx*8] ; P7's contribution to I0|I2.
  2651. add ebp,eax ; P0-P7 contribs to I0|I2.
  2652. mov eax,PD P80000_P4545F [ebx*8] ; P7's contribution to I4|I6.
  2653. add esi,eax ; P0-P7 contribs to I4|I6.
  2654. mov eax,PD NB18A8_N96831 [ebx*8] ; P7's contribution to I1|I3.
  2655. mov I00I02,ebp ; Store I0|I2 for line 0.
  2656. mov I04I06,esi ; Store I4|I6 for line 0.
  2657. lea edi,[edi+eax+40004000H] ; P0-P7 contribs to I1|I3, biased.
  2658. mov eax,PD P2350B_P6491A [ebx*8] ; P7's contribution to I7|I5.
  2659. sub edx,eax ; P0-P7 contribs to I7|I5.
  2660. mov bl,P10 ; Fetch P3 of line 1.
  2661. mov I01I03,edi ; Store I1|I3 for line 0.
  2662. mov I07I05,edx ; Store I7|I5 for line 0.
  2663. mov esi,PD P80000_P4545F [ebx*8]
  2664. mov eax,PD P80000_N4545F [ecx*8]
  2665. mov edx,PD P2350B_P6491A [ebx*8]
  2666. mov edi,PD NB18A8_P96831 [ecx*8]
  2667. lea esi,[esi+eax+40004000H]
  2668. mov eax,PD P80000_NA73D7 [ecx*8]
  2669. lea edx,[edx+edi+40004000H]
  2670. mov ebp,PD P80000_PA73D7 [ebx*8]
  2671. mov edi,PD P2350B_N6491A [ecx*8]
  2672. mov cl,P11
  2673. lea ebp,[ebp+eax+40004000H]
  2674. mov eax,PD NB18A8_N96831 [ebx*8]
  2675. sub edi,eax
  2676. mov eax,PD P80000_P4545F [ecx*8]
  2677. add ebp,eax
  2678. mov eax,PD N96831_P2350B [ecx*8]
  2679. sub edi,eax
  2680. mov eax,PD P80000_PA73D7 [ecx*8]
  2681. sub esi,eax
  2682. mov bl,P12
  2683. mov eax,PD P6491A_PB18A8 [ecx*8]
  2684. mov cl,P14
  2685. sub edx,eax
  2686. mov eax,PD P80000_N4545F [ebx*8]
  2687. add ebp,eax
  2688. mov eax,PD P6491A_NB18A8 [ebx*8]
  2689. add edi,eax
  2690. mov eax,PD P80000_NA73D7 [ebx*8]
  2691. sub esi,eax
  2692. mov eax,PD N96831_N2350B [ebx*8]
  2693. sub edx,eax
  2694. mov eax,PD P80000_NA73D7 [ecx*8]
  2695. add ebp,eax
  2696. mov eax,PD P2350B_N6491A [ecx*8]
  2697. sub edi,eax
  2698. mov eax,PD P80000_N4545F [ecx*8]
  2699. add esi,eax
  2700. mov bl,P15
  2701. mov eax,PD NB18A8_P96831 [ecx*8]
  2702. mov cl,P16
  2703. sub edx,eax
  2704. mov eax,PD P80000_N4545F [ebx*8]
  2705. add ebp,eax
  2706. mov eax,PD P6491A_NB18A8 [ebx*8]
  2707. sub edi,eax
  2708. mov eax,PD P80000_NA73D7 [ebx*8]
  2709. sub esi,eax
  2710. mov eax,PD N96831_N2350B [ebx*8]
  2711. add edx,eax
  2712. mov eax,PD P80000_P4545F [ecx*8]
  2713. add ebp,eax
  2714. mov eax,PD N96831_P2350B [ecx*8]
  2715. add edi,eax
  2716. mov eax,PD P80000_PA73D7 [ecx*8]
  2717. sub esi,eax
  2718. mov bl,P17
  2719. mov eax,PD P6491A_PB18A8 [ecx*8]
  2720. mov cl,P23
  2721. add edx,eax
  2722. mov eax,PD P80000_PA73D7 [ebx*8]
  2723. add ebp,eax
  2724. mov eax,PD P80000_P4545F [ebx*8]
  2725. add esi,eax
  2726. mov eax,PD NB18A8_N96831 [ebx*8]
  2727. mov I10I12,ebp
  2728. mov I14I16,esi
  2729. lea edi,[edi+eax+40004000H]
  2730. mov eax,PD P2350B_P6491A [ebx*8]
  2731. sub edx,eax
  2732. mov bl,P20
  2733. mov I11I13,edi
  2734. mov I17I15,edx
  2735. mov esi,PD P80000_P4545F [ebx*8]
  2736. mov eax,PD P80000_N4545F [ecx*8]
  2737. mov edx,PD P2350B_P6491A [ebx*8]
  2738. mov edi,PD NB18A8_P96831 [ecx*8]
  2739. lea esi,[esi+eax+40004000H]
  2740. mov eax,PD P80000_NA73D7 [ecx*8]
  2741. lea edx,[edx+edi+40004000H]
  2742. mov ebp,PD P80000_PA73D7 [ebx*8]
  2743. mov edi,PD P2350B_N6491A [ecx*8]
  2744. mov cl,P21
  2745. lea ebp,[ebp+eax+40004000H]
  2746. mov eax,PD NB18A8_N96831 [ebx*8]
  2747. sub edi,eax
  2748. mov eax,PD P80000_P4545F [ecx*8]
  2749. add ebp,eax
  2750. mov eax,PD N96831_P2350B [ecx*8]
  2751. sub edi,eax
  2752. mov eax,PD P80000_PA73D7 [ecx*8]
  2753. sub esi,eax
  2754. mov bl,P22
  2755. mov eax,PD P6491A_PB18A8 [ecx*8]
  2756. mov cl,P24
  2757. sub edx,eax
  2758. mov eax,PD P80000_N4545F [ebx*8]
  2759. add ebp,eax
  2760. mov eax,PD P6491A_NB18A8 [ebx*8]
  2761. add edi,eax
  2762. mov eax,PD P80000_NA73D7 [ebx*8]
  2763. sub esi,eax
  2764. mov eax,PD N96831_N2350B [ebx*8]
  2765. sub edx,eax
  2766. mov eax,PD P80000_NA73D7 [ecx*8]
  2767. add ebp,eax
  2768. mov eax,PD P2350B_N6491A [ecx*8]
  2769. sub edi,eax
  2770. mov eax,PD P80000_N4545F [ecx*8]
  2771. add esi,eax
  2772. mov bl,P25
  2773. mov eax,PD NB18A8_P96831 [ecx*8]
  2774. mov cl,P26
  2775. sub edx,eax
  2776. mov eax,PD P80000_N4545F [ebx*8]
  2777. add ebp,eax
  2778. mov eax,PD P6491A_NB18A8 [ebx*8]
  2779. sub edi,eax
  2780. mov eax,PD P80000_NA73D7 [ebx*8]
  2781. sub esi,eax
  2782. mov eax,PD N96831_N2350B [ebx*8]
  2783. add edx,eax
  2784. mov eax,PD P80000_P4545F [ecx*8]
  2785. add ebp,eax
  2786. mov eax,PD N96831_P2350B [ecx*8]
  2787. add edi,eax
  2788. mov eax,PD P80000_PA73D7 [ecx*8]
  2789. sub esi,eax
  2790. mov bl,P27
  2791. mov eax,PD P6491A_PB18A8 [ecx*8]
  2792. mov cl,P33
  2793. add edx,eax
  2794. mov eax,PD P80000_PA73D7 [ebx*8]
  2795. add ebp,eax
  2796. mov eax,PD P80000_P4545F [ebx*8]
  2797. add esi,eax
  2798. mov eax,PD NB18A8_N96831 [ebx*8]
  2799. mov I20I22,ebp
  2800. mov I24I26,esi
  2801. lea edi,[edi+eax+40004000H]
  2802. mov eax,PD P2350B_P6491A [ebx*8]
  2803. sub edx,eax
  2804. mov bl,P30
  2805. mov I21I23,edi
  2806. mov I27I25,edx
  2807. mov esi,PD P80000_P4545F [ebx*8]
  2808. mov eax,PD P80000_N4545F [ecx*8]
  2809. mov edx,PD P2350B_P6491A [ebx*8]
  2810. mov edi,PD NB18A8_P96831 [ecx*8]
  2811. lea esi,[esi+eax+40004000H]
  2812. mov eax,PD P80000_NA73D7 [ecx*8]
  2813. lea edx,[edx+edi+40004000H]
  2814. mov ebp,PD P80000_PA73D7 [ebx*8]
  2815. mov edi,PD P2350B_N6491A [ecx*8]
  2816. mov cl,P31
  2817. lea ebp,[ebp+eax+40004000H]
  2818. mov eax,PD NB18A8_N96831 [ebx*8]
  2819. sub edi,eax
  2820. mov eax,PD P80000_P4545F [ecx*8]
  2821. add ebp,eax
  2822. mov eax,PD N96831_P2350B [ecx*8]
  2823. sub edi,eax
  2824. mov eax,PD P80000_PA73D7 [ecx*8]
  2825. sub esi,eax
  2826. mov bl,P32
  2827. mov eax,PD P6491A_PB18A8 [ecx*8]
  2828. mov cl,P34
  2829. sub edx,eax
  2830. mov eax,PD P80000_N4545F [ebx*8]
  2831. add ebp,eax
  2832. mov eax,PD P6491A_NB18A8 [ebx*8]
  2833. add edi,eax
  2834. mov eax,PD P80000_NA73D7 [ebx*8]
  2835. sub esi,eax
  2836. mov eax,PD N96831_N2350B [ebx*8]
  2837. sub edx,eax
  2838. mov eax,PD P80000_NA73D7 [ecx*8]
  2839. add ebp,eax
  2840. mov eax,PD P2350B_N6491A [ecx*8]
  2841. sub edi,eax
  2842. mov eax,PD P80000_N4545F [ecx*8]
  2843. add esi,eax
  2844. mov bl,P35
  2845. mov eax,PD NB18A8_P96831 [ecx*8]
  2846. mov cl,P36
  2847. sub edx,eax
  2848. mov eax,PD P80000_N4545F [ebx*8]
  2849. add ebp,eax
  2850. mov eax,PD P6491A_NB18A8 [ebx*8]
  2851. sub edi,eax
  2852. mov eax,PD P80000_NA73D7 [ebx*8]
  2853. sub esi,eax
  2854. mov eax,PD N96831_N2350B [ebx*8]
  2855. add edx,eax
  2856. mov eax,PD P80000_P4545F [ecx*8]
  2857. add ebp,eax
  2858. mov eax,PD N96831_P2350B [ecx*8]
  2859. add edi,eax
  2860. mov eax,PD P80000_PA73D7 [ecx*8]
  2861. sub esi,eax
  2862. mov bl,P37
  2863. mov eax,PD P6491A_PB18A8 [ecx*8]
  2864. mov cl,P43
  2865. add edx,eax
  2866. mov eax,PD P80000_PA73D7 [ebx*8]
  2867. add ebp,eax
  2868. mov eax,PD P80000_P4545F [ebx*8]
  2869. add esi,eax
  2870. mov eax,PD NB18A8_N96831 [ebx*8]
  2871. mov I30I32,ebp
  2872. mov I34I36,esi
  2873. lea edi,[edi+eax+40004000H]
  2874. mov eax,PD P2350B_P6491A [ebx*8]
  2875. sub edx,eax
  2876. mov bl,P40
  2877. mov I31I33,edi
  2878. mov I37I35,edx
  2879. mov esi,PD P80000_P4545F [ebx*8]
  2880. mov eax,PD P80000_N4545F [ecx*8]
  2881. mov edx,PD P2350B_P6491A [ebx*8]
  2882. mov edi,PD NB18A8_P96831 [ecx*8]
  2883. add esi,eax
  2884. mov eax,PD P80000_NA73D7 [ecx*8]
  2885. add edx,edi
  2886. mov ebp,PD P80000_PA73D7 [ebx*8]
  2887. mov edi,PD P2350B_N6491A [ecx*8]
  2888. mov cl,P41
  2889. add ebp,eax
  2890. mov eax,PD NB18A8_N96831 [ebx*8]
  2891. sub edi,eax
  2892. mov eax,PD P80000_P4545F [ecx*8]
  2893. add ebp,eax
  2894. mov eax,PD N96831_P2350B [ecx*8]
  2895. sub edi,eax
  2896. mov eax,PD P80000_PA73D7 [ecx*8]
  2897. sub esi,eax
  2898. mov bl,P42
  2899. mov eax,PD P6491A_PB18A8 [ecx*8]
  2900. mov cl,P44
  2901. sub edx,eax
  2902. mov eax,PD P80000_N4545F [ebx*8]
  2903. add ebp,eax
  2904. mov eax,PD P6491A_NB18A8 [ebx*8]
  2905. add edi,eax
  2906. mov eax,PD P80000_NA73D7 [ebx*8]
  2907. sub esi,eax
  2908. mov eax,PD N96831_N2350B [ebx*8]
  2909. sub edx,eax
  2910. mov eax,PD P80000_NA73D7 [ecx*8]
  2911. add ebp,eax
  2912. mov eax,PD P2350B_N6491A [ecx*8]
  2913. sub edi,eax
  2914. mov eax,PD P80000_N4545F [ecx*8]
  2915. add esi,eax
  2916. mov bl,P45
  2917. mov eax,PD NB18A8_P96831 [ecx*8]
  2918. mov cl,P46
  2919. sub edx,eax
  2920. mov eax,PD P80000_N4545F [ebx*8]
  2921. add ebp,eax
  2922. mov eax,PD P6491A_NB18A8 [ebx*8]
  2923. sub edi,eax
  2924. mov eax,PD P80000_NA73D7 [ebx*8]
  2925. sub esi,eax
  2926. mov eax,PD N96831_N2350B [ebx*8]
  2927. add edx,eax
  2928. mov eax,PD P80000_P4545F [ecx*8]
  2929. add ebp,eax
  2930. mov eax,PD N96831_P2350B [ecx*8]
  2931. add edi,eax
  2932. mov eax,PD P80000_PA73D7 [ecx*8]
  2933. sub esi,eax
  2934. mov bl,P47
  2935. mov eax,PD P6491A_PB18A8 [ecx*8]
  2936. mov cl,P53
  2937. add edx,eax
  2938. mov eax,PD P80000_PA73D7 [ebx*8]
  2939. add ebp,eax
  2940. mov eax,PD P80000_P4545F [ebx*8]
  2941. add esi,eax
  2942. mov eax,PD NB18A8_N96831 [ebx*8]
  2943. mov I40I42,ebp
  2944. mov I44I46,esi
  2945. add edi,eax
  2946. mov eax,PD P2350B_P6491A [ebx*8]
  2947. sub edx,eax
  2948. mov bl,P50
  2949. mov I41I43,edi
  2950. mov I47I45,edx
  2951. mov esi,PD P80000_P4545F [ebx*8]
  2952. mov eax,PD P80000_N4545F [ecx*8]
  2953. mov edx,PD P2350B_P6491A [ebx*8]
  2954. mov edi,PD NB18A8_P96831 [ecx*8]
  2955. add esi,eax
  2956. mov eax,PD P80000_NA73D7 [ecx*8]
  2957. add edx,edi
  2958. mov ebp,PD P80000_PA73D7 [ebx*8]
  2959. mov edi,PD P2350B_N6491A [ecx*8]
  2960. mov cl,P51
  2961. add ebp,eax
  2962. mov eax,PD NB18A8_N96831 [ebx*8]
  2963. sub edi,eax
  2964. mov eax,PD P80000_P4545F [ecx*8]
  2965. add ebp,eax
  2966. mov eax,PD N96831_P2350B [ecx*8]
  2967. sub edi,eax
  2968. mov eax,PD P80000_PA73D7 [ecx*8]
  2969. sub esi,eax
  2970. mov bl,P52
  2971. mov eax,PD P6491A_PB18A8 [ecx*8]
  2972. mov cl,P54
  2973. sub edx,eax
  2974. mov eax,PD P80000_N4545F [ebx*8]
  2975. add ebp,eax
  2976. mov eax,PD P6491A_NB18A8 [ebx*8]
  2977. add edi,eax
  2978. mov eax,PD P80000_NA73D7 [ebx*8]
  2979. sub esi,eax
  2980. mov eax,PD N96831_N2350B [ebx*8]
  2981. sub edx,eax
  2982. mov eax,PD P80000_NA73D7 [ecx*8]
  2983. add ebp,eax
  2984. mov eax,PD P2350B_N6491A [ecx*8]
  2985. sub edi,eax
  2986. mov eax,PD P80000_N4545F [ecx*8]
  2987. add esi,eax
  2988. mov bl,P55
  2989. mov eax,PD NB18A8_P96831 [ecx*8]
  2990. mov cl,P56
  2991. sub edx,eax
  2992. mov eax,PD P80000_N4545F [ebx*8]
  2993. add ebp,eax
  2994. mov eax,PD P6491A_NB18A8 [ebx*8]
  2995. sub edi,eax
  2996. mov eax,PD P80000_NA73D7 [ebx*8]
  2997. sub esi,eax
  2998. mov eax,PD N96831_N2350B [ebx*8]
  2999. add edx,eax
  3000. mov eax,PD P80000_P4545F [ecx*8]
  3001. add ebp,eax
  3002. mov eax,PD N96831_P2350B [ecx*8]
  3003. add edi,eax
  3004. mov eax,PD P80000_PA73D7 [ecx*8]
  3005. sub esi,eax
  3006. mov bl,P57
  3007. mov eax,PD P6491A_PB18A8 [ecx*8]
  3008. mov cl,P63
  3009. add edx,eax
  3010. mov eax,PD P80000_PA73D7 [ebx*8]
  3011. add ebp,eax
  3012. mov eax,PD P80000_P4545F [ebx*8]
  3013. add esi,eax
  3014. mov eax,PD NB18A8_N96831 [ebx*8]
  3015. mov I50I52,ebp
  3016. mov I54I56,esi
  3017. add edi,eax
  3018. mov eax,PD P2350B_P6491A [ebx*8]
  3019. sub edx,eax
  3020. mov bl,P60
  3021. mov I51I53,edi
  3022. mov I57I55,edx
  3023. mov esi,PD P80000_P4545F [ebx*8]
  3024. mov eax,PD P80000_N4545F [ecx*8]
  3025. mov edx,PD P2350B_P6491A [ebx*8]
  3026. mov edi,PD NB18A8_P96831 [ecx*8]
  3027. add esi,eax
  3028. mov eax,PD P80000_NA73D7 [ecx*8]
  3029. add edx,edi
  3030. mov ebp,PD P80000_PA73D7 [ebx*8]
  3031. mov edi,PD P2350B_N6491A [ecx*8]
  3032. mov cl,P61
  3033. add ebp,eax
  3034. mov eax,PD NB18A8_N96831 [ebx*8]
  3035. sub edi,eax
  3036. mov eax,PD P80000_P4545F [ecx*8]
  3037. add ebp,eax
  3038. mov eax,PD N96831_P2350B [ecx*8]
  3039. sub edi,eax
  3040. mov eax,PD P80000_PA73D7 [ecx*8]
  3041. sub esi,eax
  3042. mov bl,P62
  3043. mov eax,PD P6491A_PB18A8 [ecx*8]
  3044. mov cl,P64
  3045. sub edx,eax
  3046. mov eax,PD P80000_N4545F [ebx*8]
  3047. add ebp,eax
  3048. mov eax,PD P6491A_NB18A8 [ebx*8]
  3049. add edi,eax
  3050. mov eax,PD P80000_NA73D7 [ebx*8]
  3051. sub esi,eax
  3052. mov eax,PD N96831_N2350B [ebx*8]
  3053. sub edx,eax
  3054. mov eax,PD P80000_NA73D7 [ecx*8]
  3055. add ebp,eax
  3056. mov eax,PD P2350B_N6491A [ecx*8]
  3057. sub edi,eax
  3058. mov eax,PD P80000_N4545F [ecx*8]
  3059. add esi,eax
  3060. mov bl,P65
  3061. mov eax,PD NB18A8_P96831 [ecx*8]
  3062. mov cl,P66
  3063. sub edx,eax
  3064. mov eax,PD P80000_N4545F [ebx*8]
  3065. add ebp,eax
  3066. mov eax,PD P6491A_NB18A8 [ebx*8]
  3067. sub edi,eax
  3068. mov eax,PD P80000_NA73D7 [ebx*8]
  3069. sub esi,eax
  3070. mov eax,PD N96831_N2350B [ebx*8]
  3071. add edx,eax
  3072. mov eax,PD P80000_P4545F [ecx*8]
  3073. add ebp,eax
  3074. mov eax,PD N96831_P2350B [ecx*8]
  3075. add edi,eax
  3076. mov eax,PD P80000_PA73D7 [ecx*8]
  3077. sub esi,eax
  3078. mov bl,P67
  3079. mov eax,PD P6491A_PB18A8 [ecx*8]
  3080. mov cl,P73
  3081. add edx,eax
  3082. mov eax,PD P80000_PA73D7 [ebx*8]
  3083. add ebp,eax
  3084. mov eax,PD P80000_P4545F [ebx*8]
  3085. add esi,eax
  3086. mov eax,PD NB18A8_N96831 [ebx*8]
  3087. mov I60I62,ebp
  3088. mov I64I66,esi
  3089. add edi,eax
  3090. mov eax,PD P2350B_P6491A [ebx*8]
  3091. sub edx,eax
  3092. mov bl,P70
  3093. mov I61I63,edi
  3094. mov I67I65,edx
  3095. mov esi,PD P80000_P4545F [ebx*8]
  3096. mov eax,PD P80000_N4545F [ecx*8]
  3097. mov edx,PD P2350B_P6491A [ebx*8]
  3098. mov edi,PD NB18A8_P96831 [ecx*8]
  3099. add esi,eax
  3100. mov eax,PD P80000_NA73D7 [ecx*8]
  3101. add edx,edi
  3102. mov ebp,PD P80000_PA73D7 [ebx*8]
  3103. mov edi,PD P2350B_N6491A [ecx*8]
  3104. mov cl,P71
  3105. add ebp,eax
  3106. mov eax,PD NB18A8_N96831 [ebx*8]
  3107. sub edi,eax
  3108. mov eax,PD P80000_P4545F [ecx*8]
  3109. add ebp,eax
  3110. mov eax,PD N96831_P2350B [ecx*8]
  3111. sub edi,eax
  3112. mov eax,PD P80000_PA73D7 [ecx*8]
  3113. sub esi,eax
  3114. mov bl,P72
  3115. mov eax,PD P6491A_PB18A8 [ecx*8]
  3116. mov cl,P74
  3117. sub edx,eax
  3118. mov eax,PD P80000_N4545F [ebx*8]
  3119. add ebp,eax
  3120. mov eax,PD P6491A_NB18A8 [ebx*8]
  3121. add edi,eax
  3122. mov eax,PD P80000_NA73D7 [ebx*8]
  3123. sub esi,eax
  3124. mov eax,PD N96831_N2350B [ebx*8]
  3125. sub edx,eax
  3126. mov eax,PD P80000_NA73D7 [ecx*8]
  3127. add ebp,eax
  3128. mov eax,PD P2350B_N6491A [ecx*8]
  3129. sub edi,eax
  3130. mov eax,PD P80000_N4545F [ecx*8]
  3131. add esi,eax
  3132. mov bl,P75
  3133. mov eax,PD NB18A8_P96831 [ecx*8]
  3134. mov cl,P76
  3135. sub edx,eax
  3136. mov eax,PD P80000_N4545F [ebx*8]
  3137. add ebp,eax
  3138. mov eax,PD P6491A_NB18A8 [ebx*8]
  3139. sub edi,eax
  3140. mov eax,PD P80000_NA73D7 [ebx*8]
  3141. sub esi,eax
  3142. mov eax,PD N96831_N2350B [ebx*8]
  3143. add edx,eax
  3144. mov eax,PD P80000_P4545F [ecx*8]
  3145. add ebp,eax
  3146. mov eax,PD N96831_P2350B [ecx*8]
  3147. add edi,eax
  3148. mov eax,PD P80000_PA73D7 [ecx*8]
  3149. sub esi,eax
  3150. mov bl,P77
  3151. mov eax,PD P6491A_PB18A8 [ecx*8]
  3152. mov ecx,I00I02 ; Fetch I0 (upper_lim <skew>) = 2000 4000
  3153. ; ; (lower_lim is -upper_limit)
  3154. add edx,eax
  3155. mov eax,PD P80000_PA73D7 [ebx*8]
  3156. add ebp,eax ; I70I72, aka I7. 2000 0000
  3157. mov eax,PD P80000_P4545F [ebx*8]
  3158. add esi,eax
  3159. mov eax,PD NB18A8_N96831 [ebx*8]
  3160. mov I74I76,esi
  3161. mov esi,I30I32 ; Fetch I3 2000 4000
  3162. add edi,eax
  3163. mov eax,I40I42 ; Fetch I4 2000 0000
  3164. sub esi,eax ; I3 - I4 4000 4000
  3165. sub ecx,ebp ; I0 - I7 4000 4000
  3166. shr ecx,1 ; R7 = (I0-I7)/2 (dirty) 2000 2000
  3167. and esi,0FFFEFFFFH ; pre-clean R4
  3168. shr esi,1 ; R4 = (I3-I4)/2 (dirty) 2000 2000
  3169. and ecx,0FFFF7FFFH ; R7 = (I0-I7)/2 (clean) 2000 2000
  3170. mov ebx,PD P2350B_P6491A [ebx*8]
  3171. mov I71I73,edi
  3172. sub edx,ebx
  3173. lea ebx,[ecx+ecx*2] ; 3R7 6000 6000
  3174. mov I77I75,edx
  3175. lea edi,[esi+esi*2] ; 3R4 6000 6000
  3176. ; eax: I4 2000 0000
  3177. ; ebx: 3R7 6000 6000
  3178. ; ecx: R7 2000 2000
  3179. ; edx: available
  3180. ; esi: R4 2000 2000
  3181. ; edi: 3R4 6000 6000
  3182. ; ebp: I7 2000 0000
  3183. lea ebp,[ebp+ecx+40004000H] ; R0 = (I0+I7)/2 2000 6000
  3184. add eax,esi ; R3 = (I3+I4)/2 2000 2000
  3185. shr ecx,1 ; R7/2 (dirty) 1000 1000
  3186. and esi,0FFFEFFFFH ; pre-clean
  3187. shr esi,1 ; R4/2 (clean) 1000 1000
  3188. and ecx,0FFFF7FFFH ; clean
  3189. add ebx,ecx ; 7R7/2 7000 7000
  3190. add edi,esi ; 7R4/2 7000 7000
  3191. shr ebx,6 ; 7R7/128 (dirty) 01C0 01C0
  3192. and edi,0FFC0FFFFH ; pre-clean
  3193. shr edi,6 ; 7R4/128 (clean) 01C0 01C0
  3194. and ebx,0FFFF03FFH ; clean
  3195. add ebx,ecx ; 71R7/128 11C0 11C0
  3196. add edi,esi ; 71R4/128 11C0 11C0
  3197. lea edx,[eax+ebp-40004000H] ; S0 = R0 + R3 4000 4000
  3198. sub ebp,eax ; S3 = R0 - R3 4000 4000
  3199. lea ecx,[ebx+ebx*2+6E406E40H] ; 213R7/128 3540 A380
  3200. lea esi,[edi+edi*2+27402740H] ; 213R4/128 3540 5C80
  3201. shr ecx,1 ; 213R7/256 (dirty) 1AA0 51C0
  3202. and esi,0FFFEFFFFH ; pre-clean
  3203. shr esi,1 ; 213R4/256 (clean) 1AA0 2E40
  3204. and ecx,0FFFF7FFFH ; clean
  3205. sub ecx,edi ; S7 = (213R7 - 142R4)/256 2C60 4000
  3206. mov S0,edx ; Free register for work.
  3207. mov S3,ebp ; Free register for work.
  3208. lea esi,[esi+ebx+80008000H] ; S4 = (142R7 + 213R3)/256 2C60 C000
  3209. mov S7,ecx ; Free register for work.
  3210. mov eax,I10I12 ; Fetch I1 2000 4000
  3211. mov S4,esi ; Free register for work.
  3212. ; mem: S4 2C60 C000
  3213. ; mem: S7 2C60 4000
  3214. ; mem: S0 4000 4000
  3215. ; mem: S3 4000 4000
  3216. mov ebx,I20I22 ; Fetch I2 2000 4000
  3217. mov ecx,I50I52 ; Fetch I5 2000 0000
  3218. mov edx,I60I62 ; Fetch I6 2000 0000
  3219. sub eax,edx ; I1 - I6 4000 4000
  3220. sub ebx,ecx ; I2 - I5 4000 4000
  3221. shr eax,1 ; R6 = (I1-I6)/2 (dirty) 2000 2000
  3222. and ebx,0FFFEFFFFH ; pre-clean R4
  3223. shr ebx,1 ; R5 = (I2-I5)/2 (dirty) 2000 2000
  3224. and eax,0FFFF7FFFH ; R6 = (I1-I6)/2 (clean) 2000 2000
  3225. ; eax: R6 2000 2000
  3226. ; ebx: R5 2000 2000
  3227. ; ecx: I5 2000 0000
  3228. ; edx: I6 2000 0000
  3229. ; mem: S4 2C60 C000
  3230. ; mem: S7 2C60 4000
  3231. ; mem: S0 4000 4000
  3232. ; mem: S3 4000 4000
  3233. mov esi,ebx ; R5 2000 2000
  3234. mov edi,eax ; R6 2000 2000
  3235. shr esi,6 ; R5/64 0080 0080
  3236. and edi,0FFC0FFFFH ; pre-clean
  3237. shr edi,6 ; R6/65 0080 0080
  3238. and esi,0FFFF03FFH ; clean
  3239. lea edx,[eax+edx+20002000H] ; R1 = (I1+I6)/2 2000 4000
  3240. lea ecx,[ecx+ebx-20002000H] ; R2 = (I2+I5)/2 2000 0000
  3241. lea ebp,[ebx+ebx*2] ; 3R5 6000 6000
  3242. sub ebx,esi ; 63R5/64 1F80 1F80
  3243. shr ebp,4 ; 3R5/16 (dirty) 0600 0600
  3244. lea esi,[eax+eax*2] ; 3R6 6000 6000
  3245. sub eax,edi ; 63R6/64 1F80 1F80
  3246. mov edi,ebx ; 63R5/64 1F80 1F80
  3247. shr edi,7 ; 63R5/8192 (dirty) 003F 003F
  3248. and ebp,0FFFF0FFFH ; clean
  3249. shr esi,4 ; 3R6/16 (dirty) 0600 0600
  3250. and edi,0FFFF01FFH ; clean
  3251. and esi,0FFFF0FFFH ; clean
  3252. sub edx,ecx ; S2 = R1 - R2 4000 4000
  3253. lea edi,[edi+ebp-46BF46BFH] ; 1599R5/8192 063F -4080
  3254. mov ebp,eax ; 63R6/64 1F80 1F80
  3255. shr ebp,7 ; 63R6/8192 (dirty) 003F 003F
  3256. sub eax,edi ; S6 = 8064R6/8192 - 1599R5/8192 25BF 6000
  3257. and ebp,0FFFF01FFH ; clean
  3258. lea ecx,[edx+ecx*2-80008000H] ; S1 = R1 + R2 4000 -4000
  3259. add ebp,esi ; 1599R6/8192 063F 063F
  3260. mov esi,S0 ; Reload S0 4000 4000
  3261. mov edi,CoeffStream ; Fetch addr at which to place blk of coeffs.
  3262. sub esi,ecx ; C4 = T1 = S0 - S1 8000 8000
  3263. lea ebx,[ebx+ebp-45BF45BFH] ; S5 = 8064R5/8192 + 1599R6/8192 25BF -2000
  3264. mov ebp,S4 ; Reload S4 2C60 C000
  3265. ; eax: S6 25BF 6000
  3266. ; ebx: S5 25BF -2000
  3267. ; ecx: S0 4000 4000
  3268. ; edx: S2 4000 4000
  3269. ; esi: C4 8000 8000
  3270. ; edi: Destination pointer.
  3271. ; ebp: S4 2C60 C000
  3272. ; mem: S7 2C60 4000
  3273. ; mem: S3 4000 4000
  3274. sub ebp,eax ; T6 = S4 - S6 521F 6000
  3275. mov PD [edi+C40C42],esi ; Store coeffs C40 and C42.
  3276. lea ecx,[esi+ecx*2+80008000H] ; C0 = T0 = S0 + S1 8000 8000
  3277. mov esi,S7 ; Reload S7 2C60 4000
  3278. sub esi,ebx ; T5 = S7 - S5 521F 6000
  3279. lea eax,[ebp+eax*2-0C000C000H] ; T4 = S4 + S6 521F 6000
  3280. mov PD [edi+C00C02],ecx ; Store coeffs C00 and C02.
  3281. mov ecx,ebp ; T6 521F 6000
  3282. shr ebp,2 ; T6/4 (dirty) 1487 1800
  3283. lea ebx,[esi+ebx*2+0C000C000H] ; T7 = S7 + S5 521F E000
  3284. ; eax: T4 521F 6000
  3285. ; ebx: T7 521F 6000
  3286. ; ecx: T6 521F 6000
  3287. ; edx: S2 4000 4000
  3288. ; esi: T5 521F 6000
  3289. ; edi: Destination pointer.
  3290. ; ebp: T6/4 (dirty) 1487 1800
  3291. ; mem: S3 4000 4000
  3292. ; done: C0, C4
  3293. and ebp,0FFFF3FFFH ; T6/4 (clean) 1487 1800
  3294. sub ebx,eax ; C7 = T7 - T4 <7642> 8000
  3295. add ecx,ebp ; 5T6/4 66A6 7800
  3296. mov PD [edi+C70C72],ebx ; Store coeffs C70 and C72.
  3297. mov ebp,ecx ; 5T6/4 66A6 7800
  3298. and ecx,0FFF8FFFFH ; pre-clean
  3299. shr ecx,3 ; 5T6/32 (clean) 0CD4 0F00
  3300. lea eax,[ebx+eax*2-0C000C000H] ; C1 = T7 + T4 <7642> 8000
  3301. mov ebx,esi ; T5 521F 6000
  3302. and esi,0FFFCFFFFH ; pre-clean
  3303. shr esi,2 ; T5/4 (clean) 1487 1800
  3304. lea ecx,[ecx+ebp-07000700H] ; C5 = 45T6/32 737A 8000
  3305. mov PD [edi+C50C52],ecx ; Store coeffs C50 and C52.
  3306. add esi,ebx ; 5T5/4 66A6 7800
  3307. mov ebx,esi ; 5T5/4 66A6 7800
  3308. and esi,0FFF8FFFFH ; pre-clean
  3309. shr esi,3 ; 5T5/32 (clean) 0CD4 0F00
  3310. mov ebp,S3 ; Reload S3 4000 4000
  3311. mov ecx,edx ; S2 4000 4000
  3312. lea esi,[esi+ebx-07000700H] ; C3 = 45T5/32 737A 8000
  3313. mov ebx,ebp ; S3 4000 4000
  3314. ;
  3315. ; eax: C1 521E 8000
  3316. ; ebx: S3 4000 4000
  3317. ; ecx: S2 4000 4000
  3318. ; edx: S2 4000 4000
  3319. ; esi: C3 737A 8000
  3320. ; edi: Destination pointer.
  3321. ; ebp: S3 4000 4000
  3322. ; done: C0, C4, C5, C7
  3323. shr ebp,2 ; S3/4 (dirty) 1000 1000
  3324. and ecx,0FFFCFFFFH ; pre-clean
  3325. shr ecx,2 ; S2/4 (clean) 1000 1000
  3326. and ebp,0FFFF3FFFH ; S3/4 (clean) 1000 1000
  3327. mov PD [edi+C10C12],eax ; Store coeffs C10 and C12.
  3328. mov PD [edi+C30C32],esi ; Store coeffs C30 and C32.
  3329. lea eax,[edx+ecx] ; 5S2/4 5000 5000
  3330. lea esi,[ebx+ebp] ; 5S3/4 5000 5000
  3331. shr ebp,2 ; S3/16 (dirty) 0400 0400
  3332. and ecx,0FFFCFFFFH ; pre-clean
  3333. shr ecx,2 ; S2/16 (clean) 0400 0400
  3334. and ebp,0FFFF3FFFH ; S3/16 (clean) 0400 0400
  3335. add ecx,eax ; 21S2/16 5400 5400
  3336. add ebp,esi ; 21S3/16 5400 5400
  3337. shr eax,5 ; 5S2/128 (dirty) 0280 0280
  3338. and esi,0FFE0FFFFH ; pre-clean
  3339. shr esi,5 ; 5S3/128 (clean) 0280 0280
  3340. and eax,0FFFF07FFH ; 5S2/128 (clean) 0280 0280
  3341. shr edx,1 ; S2/2 (dirty) 2000 2000
  3342. and ebx,0FFFEFFFFH ; pre-clean
  3343. shr ebx,1 ; S3/2 (clean) 2000 2000
  3344. and edx,0FFFF7FFFH ; S2/2 (clean) 2000 2000
  3345. sub ebx,ecx ; (64S3 - 168S2) / 128 7400 -3400
  3346. add eax,ebp ; (5S2 + 168S3) / 128 5680 5680
  3347. mov ecx,I01I03
  3348. mov ebp,I71I73
  3349. lea ebx,[ebx+esi+0B180B180H] ; C6 = (69S3 - 168S2) / 128 7680 8000
  3350. lea edx,[eax+edx+009800980H] ; C2 = (69S2 + 168S3) / 128 7680 8000
  3351. mov esi,I31I33
  3352. mov eax,I41I43
  3353. sub esi,eax
  3354. sub ecx,ebp
  3355. shr ecx,1
  3356. and esi,0FFFEFFFFH
  3357. shr esi,1
  3358. and ecx,0FFFF7FFFH
  3359. mov PD [edi+C60C62],ebx
  3360. mov PD [edi+C20C22],edx
  3361. lea ebx,[ecx+ecx*2]
  3362. lea edi,[esi+esi*2]
  3363. lea ebp,[ebp+ecx+40004000H]
  3364. add eax,esi
  3365. shr ecx,1
  3366. and esi,0FFFEFFFFH
  3367. shr esi,1
  3368. and ecx,0FFFF7FFFH
  3369. add ebx,ecx
  3370. add edi,esi
  3371. shr ebx,6
  3372. and edi,0FFC0FFFFH
  3373. shr edi,6
  3374. and ebx,0FFFF03FFH
  3375. add ebx,ecx
  3376. add edi,esi
  3377. lea edx,[eax+ebp-40004000H]
  3378. sub ebp,eax
  3379. lea ecx,[ebx+ebx*2+6E406E40H]
  3380. lea esi,[edi+edi*2+27402740H]
  3381. shr ecx,1
  3382. and esi,0FFFEFFFFH
  3383. shr esi,1
  3384. and ecx,0FFFF7FFFH
  3385. sub ecx,edi
  3386. mov S0,edx
  3387. mov S3,ebp
  3388. lea esi,[esi+ebx+80008000H]
  3389. mov S7,ecx
  3390. mov eax,I11I13
  3391. mov S4,esi
  3392. mov ebx,I21I23
  3393. mov ecx,I51I53
  3394. mov edx,I61I63
  3395. sub eax,edx
  3396. sub ebx,ecx
  3397. shr eax,1
  3398. and ebx,0FFFEFFFFH
  3399. shr ebx,1
  3400. and eax,0FFFF7FFFH
  3401. mov esi,ebx
  3402. mov edi,eax
  3403. shr esi,6
  3404. and edi,0FFC0FFFFH
  3405. shr edi,6
  3406. and esi,0FFFF03FFH
  3407. lea edx,[eax+edx+20002000H]
  3408. lea ecx,[ecx+ebx-20002000H]
  3409. lea ebp,[ebx+ebx*2]
  3410. sub ebx,esi
  3411. shr ebp,4
  3412. lea esi,[eax+eax*2]
  3413. sub eax,edi
  3414. mov edi,ebx
  3415. shr edi,7
  3416. and ebp,0FFFF0FFFH
  3417. shr esi,4
  3418. and edi,0FFFF01FFH
  3419. and esi,0FFFF0FFFH
  3420. sub edx,ecx
  3421. lea edi,[edi+ebp-46BF46BFH]
  3422. mov ebp,eax
  3423. shr ebp,7
  3424. sub eax,edi
  3425. and ebp,0FFFF01FFH
  3426. lea ecx,[edx+ecx*2-80008000H]
  3427. add ebp,esi
  3428. mov esi,S0
  3429. mov edi,CoeffStream
  3430. sub esi,ecx
  3431. lea ebx,[ebx+ebp-45BF45BFH]
  3432. mov ebp,S4
  3433. sub ebp,eax
  3434. mov PD [edi+C41C43],esi
  3435. lea ecx,[esi+ecx*2+80008000H]
  3436. mov esi,S7
  3437. sub esi,ebx
  3438. lea eax,[ebp+eax*2-0C000C000H]
  3439. mov PD [edi+C01C03],ecx
  3440. mov ecx,ebp
  3441. shr ebp,2
  3442. lea ebx,[esi+ebx*2+0C000C000H]
  3443. and ebp,0FFFF3FFFH
  3444. sub ebx,eax
  3445. add ecx,ebp
  3446. mov PD [edi+C71C73],ebx
  3447. mov ebp,ecx
  3448. and ecx,0FFF8FFFFH
  3449. shr ecx,3
  3450. lea eax,[ebx+eax*2-0C000C000H]
  3451. mov ebx,esi
  3452. and esi,0FFFCFFFFH
  3453. shr esi,2
  3454. lea ecx,[ecx+ebp-07000700H]
  3455. mov PD [edi+C51C53],ecx
  3456. add esi,ebx
  3457. mov ebx,esi
  3458. and esi,0FFF8FFFFH
  3459. shr esi,3
  3460. mov ebp,S3
  3461. mov ecx,edx
  3462. lea esi,[esi+ebx-07000700H]
  3463. mov ebx,ebp
  3464. ;
  3465. shr ebp,2
  3466. and ecx,0FFFCFFFFH
  3467. shr ecx,2
  3468. and ebp,0FFFF3FFFH
  3469. mov PD [edi+C11C13],eax
  3470. mov PD [edi+C31C33],esi
  3471. lea eax,[edx+ecx]
  3472. lea esi,[ebx+ebp]
  3473. shr ebp,2
  3474. and ecx,0FFFCFFFFH
  3475. shr ecx,2
  3476. and ebp,0FFFF3FFFH
  3477. add ecx,eax
  3478. add ebp,esi
  3479. shr eax,5
  3480. and esi,0FFE0FFFFH
  3481. shr esi,5
  3482. and eax,0FFFF07FFH
  3483. shr edx,1
  3484. and ebx,0FFFEFFFFH
  3485. shr ebx,1
  3486. and edx,0FFFF7FFFH
  3487. sub ebx,ecx
  3488. add eax,ebp
  3489. mov ecx,I04I06
  3490. mov ebp,I74I76
  3491. lea ebx,[ebx+esi+0B180B180H]
  3492. lea edx,[eax+edx+009800980H]
  3493. mov esi,I34I36
  3494. mov eax,I44I46
  3495. sub esi,eax
  3496. sub ecx,ebp
  3497. shr ecx,1
  3498. and esi,0FFFEFFFFH
  3499. shr esi,1
  3500. and ecx,0FFFF7FFFH
  3501. mov PD [edi+C61C63],ebx
  3502. mov PD [edi+C21C23],edx
  3503. lea ebx,[ecx+ecx*2]
  3504. lea edi,[esi+esi*2]
  3505. lea ebp,[ebp+ecx+40004000H]
  3506. add eax,esi
  3507. shr ecx,1
  3508. and esi,0FFFEFFFFH
  3509. shr esi,1
  3510. and ecx,0FFFF7FFFH
  3511. add ebx,ecx
  3512. add edi,esi
  3513. shr ebx,6
  3514. and edi,0FFC0FFFFH
  3515. shr edi,6
  3516. and ebx,0FFFF03FFH
  3517. add ebx,ecx
  3518. add edi,esi
  3519. lea edx,[eax+ebp-40004000H]
  3520. sub ebp,eax
  3521. lea ecx,[ebx+ebx*2+6E406E40H]
  3522. lea esi,[edi+edi*2+27402740H]
  3523. shr ecx,1
  3524. and esi,0FFFEFFFFH
  3525. shr esi,1
  3526. and ecx,0FFFF7FFFH
  3527. sub ecx,edi
  3528. mov S0,edx
  3529. mov S3,ebp
  3530. lea esi,[esi+ebx+80008000H]
  3531. mov S7,ecx
  3532. mov eax,I14I16
  3533. mov S4,esi
  3534. mov ebx,I24I26
  3535. mov ecx,I54I56
  3536. mov edx,I64I66
  3537. sub eax,edx
  3538. sub ebx,ecx
  3539. shr eax,1
  3540. and ebx,0FFFEFFFFH
  3541. shr ebx,1
  3542. and eax,0FFFF7FFFH
  3543. mov esi,ebx
  3544. mov edi,eax
  3545. shr esi,6
  3546. and edi,0FFC0FFFFH
  3547. shr edi,6
  3548. and esi,0FFFF03FFH
  3549. lea edx,[eax+edx+20002000H]
  3550. lea ecx,[ecx+ebx-20002000H]
  3551. lea ebp,[ebx+ebx*2]
  3552. sub ebx,esi
  3553. shr ebp,4
  3554. lea esi,[eax+eax*2]
  3555. sub eax,edi
  3556. mov edi,ebx
  3557. shr edi,7
  3558. and ebp,0FFFF0FFFH
  3559. shr esi,4
  3560. and edi,0FFFF01FFH
  3561. and esi,0FFFF0FFFH
  3562. sub edx,ecx
  3563. lea edi,[edi+ebp-46BF46BFH]
  3564. mov ebp,eax
  3565. shr ebp,7
  3566. sub eax,edi
  3567. and ebp,0FFFF01FFH
  3568. lea ecx,[edx+ecx*2-80008000H]
  3569. add ebp,esi
  3570. mov esi,S0
  3571. mov edi,CoeffStream
  3572. sub esi,ecx
  3573. lea ebx,[ebx+ebp-45BF45BFH]
  3574. mov ebp,S4
  3575. sub ebp,eax
  3576. mov PD [edi+C44C46],esi
  3577. lea ecx,[esi+ecx*2+80008000H]
  3578. mov esi,S7
  3579. sub esi,ebx
  3580. lea eax,[ebp+eax*2-0C000C000H]
  3581. mov PD [edi+C04C06],ecx
  3582. mov ecx,ebp
  3583. shr ebp,2
  3584. lea ebx,[esi+ebx*2+0C000C000H]
  3585. and ebp,0FFFF3FFFH
  3586. sub ebx,eax
  3587. add ecx,ebp
  3588. mov PD [edi+C74C76],ebx
  3589. mov ebp,ecx
  3590. and ecx,0FFF8FFFFH
  3591. shr ecx,3
  3592. lea eax,[ebx+eax*2-0C000C000H]
  3593. mov ebx,esi
  3594. and esi,0FFFCFFFFH
  3595. shr esi,2
  3596. lea ecx,[ecx+ebp-07000700H]
  3597. mov PD [edi+C54C56],ecx
  3598. add esi,ebx
  3599. mov ebx,esi
  3600. and esi,0FFF8FFFFH
  3601. shr esi,3
  3602. mov ebp,S3
  3603. mov ecx,edx
  3604. lea esi,[esi+ebx-07000700H]
  3605. mov ebx,ebp
  3606. ;
  3607. shr ebp,2
  3608. and ecx,0FFFCFFFFH
  3609. shr ecx,2
  3610. and ebp,0FFFF3FFFH
  3611. mov PD [edi+C14C16],eax
  3612. mov PD [edi+C34C36],esi
  3613. lea eax,[edx+ecx]
  3614. lea esi,[ebx+ebp]
  3615. shr ebp,2
  3616. and ecx,0FFFCFFFFH
  3617. shr ecx,2
  3618. and ebp,0FFFF3FFFH
  3619. add ecx,eax
  3620. add ebp,esi
  3621. shr eax,5
  3622. and esi,0FFE0FFFFH
  3623. shr esi,5
  3624. and eax,0FFFF07FFH
  3625. shr edx,1
  3626. and ebx,0FFFEFFFFH
  3627. shr ebx,1
  3628. and edx,0FFFF7FFFH
  3629. sub ebx,ecx
  3630. add eax,ebp
  3631. mov ecx,I07I05
  3632. mov ebp,I77I75
  3633. lea ebx,[ebx+esi+0B180B180H]
  3634. lea edx,[eax+edx+009800980H]
  3635. mov esi,I37I35
  3636. mov eax,I47I45
  3637. sub esi,eax
  3638. sub ecx,ebp
  3639. shr ecx,1
  3640. and esi,0FFFEFFFFH
  3641. shr esi,1
  3642. and ecx,0FFFF7FFFH
  3643. mov PD [edi+C64C66],ebx
  3644. mov PD [edi+C24C26],edx
  3645. lea ebx,[ecx+ecx*2]
  3646. lea edi,[esi+esi*2]
  3647. lea ebp,[ebp+ecx+40004000H]
  3648. add eax,esi
  3649. shr ecx,1
  3650. and esi,0FFFEFFFFH
  3651. shr esi,1
  3652. and ecx,0FFFF7FFFH
  3653. add ebx,ecx
  3654. add edi,esi
  3655. shr ebx,6
  3656. and edi,0FFC0FFFFH
  3657. shr edi,6
  3658. and ebx,0FFFF03FFH
  3659. add ebx,ecx
  3660. add edi,esi
  3661. lea edx,[eax+ebp-40004000H]
  3662. sub ebp,eax
  3663. lea ecx,[ebx+ebx*2+6E406E40H]
  3664. lea esi,[edi+edi*2+27402740H]
  3665. shr ecx,1
  3666. and esi,0FFFEFFFFH
  3667. shr esi,1
  3668. and ecx,0FFFF7FFFH
  3669. sub ecx,edi
  3670. mov S0,edx
  3671. mov S3,ebp
  3672. lea esi,[esi+ebx+80008000H]
  3673. mov S7,ecx
  3674. mov eax,I17I15
  3675. mov S4,esi
  3676. mov ebx,I27I25
  3677. mov ecx,I57I55
  3678. mov edx,I67I65
  3679. sub eax,edx
  3680. sub ebx,ecx
  3681. shr eax,1
  3682. and ebx,0FFFEFFFFH
  3683. shr ebx,1
  3684. and eax,0FFFF7FFFH
  3685. mov esi,ebx
  3686. mov edi,eax
  3687. shr esi,6
  3688. and edi,0FFC0FFFFH
  3689. shr edi,6
  3690. and esi,0FFFF03FFH
  3691. lea edx,[eax+edx+20002000H]
  3692. lea ecx,[ecx+ebx-20002000H]
  3693. lea ebp,[ebx+ebx*2]
  3694. sub ebx,esi
  3695. shr ebp,4
  3696. lea esi,[eax+eax*2]
  3697. sub eax,edi
  3698. mov edi,ebx
  3699. shr edi,7
  3700. and ebp,0FFFF0FFFH
  3701. shr esi,4
  3702. and edi,0FFFF01FFH
  3703. and esi,0FFFF0FFFH
  3704. sub edx,ecx
  3705. lea edi,[edi+ebp-46BF46BFH]
  3706. mov ebp,eax
  3707. shr ebp,7
  3708. sub eax,edi
  3709. and ebp,0FFFF01FFH
  3710. lea ecx,[edx+ecx*2-80008000H]
  3711. add ebp,esi
  3712. mov esi,S0
  3713. mov edi,CoeffStream
  3714. sub esi,ecx
  3715. lea ebx,[ebx+ebp-45BF45BFH]
  3716. mov ebp,S4
  3717. sub ebp,eax
  3718. mov PD [edi+C47C45],esi
  3719. lea ecx,[esi+ecx*2+80008000H]
  3720. mov esi,S7
  3721. sub esi,ebx
  3722. lea eax,[ebp+eax*2-0C000C000H]
  3723. mov PD [edi+C07C05],ecx
  3724. mov ecx,ebp
  3725. shr ebp,2
  3726. lea ebx,[esi+ebx*2+0C000C000H]
  3727. and ebp,0FFFF3FFFH
  3728. sub ebx,eax
  3729. add ecx,ebp
  3730. mov PD [edi+C77C75],ebx
  3731. mov ebp,ecx
  3732. and ecx,0FFF8FFFFH
  3733. shr ecx,3
  3734. lea eax,[ebx+eax*2-0C000C000H]
  3735. mov ebx,esi
  3736. and esi,0FFFCFFFFH
  3737. shr esi,2
  3738. lea ecx,[ecx+ebp-07000700H]
  3739. mov PD [edi+C57C55],ecx
  3740. add esi,ebx
  3741. mov ebx,esi
  3742. and esi,0FFF8FFFFH
  3743. shr esi,3
  3744. mov ebp,S3
  3745. mov ecx,edx
  3746. lea esi,[esi+ebx-07000700H]
  3747. mov ebx,ebp
  3748. ;
  3749. shr ebp,2
  3750. and ecx,0FFFCFFFFH
  3751. shr ecx,2
  3752. and ebp,0FFFF3FFFH
  3753. mov PD [edi+C17C15],eax
  3754. mov PD [edi+C37C35],esi
  3755. lea eax,[edx+ecx]
  3756. lea esi,[ebx+ebp]
  3757. shr ebp,2
  3758. and ecx,0FFFCFFFFH
  3759. shr ecx,2
  3760. and ebp,0FFFF3FFFH
  3761. add ecx,eax
  3762. add ebp,esi
  3763. shr eax,5
  3764. and esi,0FFE0FFFFH
  3765. shr esi,5
  3766. and eax,0FFFF07FFH
  3767. shr edx,1
  3768. and ebx,0FFFEFFFFH
  3769. shr ebx,1
  3770. and edx,0FFFF7FFFH
  3771. sub ebx,ecx
  3772. add eax,ebp
  3773. mov ecx,CoeffStreamStart
  3774. lea ebp,[edi-SIZEOF T_CoeffBlk] ; Advance cursor for block action stream.
  3775. lea ebx,[ebx+esi+0B180B180H]
  3776. lea edx,[eax+edx+009800980H]
  3777. mov PD [edi+C67C65],ebx
  3778. mov PD [edi+C27C25],edx
  3779. ; Forward Slant Transform is done
  3780. cmp ebp,ecx
  3781. mov edi,ebp
  3782. mov CoeffStream,edi
  3783. jae NextBlock ; Process next block.
  3784. Done:
  3785. mov esp,StashESP
  3786. pop ebx
  3787. pop ebp
  3788. pop edi
  3789. pop esi
  3790. rturn
  3791. FORWARDDCT endp
  3792. END