Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

689 lines
21 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. ;//
  14. ;// $Header: S:\h26x\src\dec\cx51209.asv
  15. ;//
  16. ;// $Log: S:\h26x\src\dec\cx51209.asv
  17. ;//
  18. ;////////////////////////////////////////////////////////////////////////////
  19. ; cx1209 -- This function performs YUV12 to IF09 color conversion for H26x.
  20. ; IF09 consists of Y, V, U in 8-bit, planar format, plus a plane of
  21. ; 4-bit flags, each in 8 bits of storage, with each bit indicative
  22. ; of which dwords of Y are unchanged from the previous frame.
  23. ; IF09 is only applicable using DCI.
  24. ;
  25. ; This version is tuned for maximum performance on both the Pentium
  26. ; (r) microcprocessor and the Pentium Pro (tm) microprocessor.
  27. ;
  28. ; Indentation of instructions indicates expected U/V pipe execution
  29. ; on Pentium (r) microprocessor; indented instructions are
  30. ; expected to execute in V-pipe, outdented instructions in U-pipe.
  31. ; Inside loops, blank lines delineate groups of 1, 2, or 3
  32. ; instructions that are expected to be decoded simultaneously
  33. ; on the Pentium Pro (tm) microprocessor.
  34. ;
  35. ; cx1209
  36. ; ^^^^^^
  37. ; ||||++----- Convert to IF09.
  38. ; ||++------- Convert from YUV12.
  39. ; |+--------- For both H261 and H263.
  40. ; +---------- Color convertor.
  41. ;-------------------------------------------------------------------------------
  42. OPTION PROLOGUE:None
  43. OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
  44. include locals.inc
  45. include ccinst.inc
  46. include decconst.inc
  47. IFNDEF DSEGNAME
  48. IFNDEF WIN32
  49. DSEGNAME TEXTEQU <Data_cx1209>
  50. ENDIF
  51. ENDIF
  52. IFDEF WIN32
  53. .xlist
  54. include memmodel.inc
  55. .list
  56. .DATA
  57. ELSE
  58. DSEGNAME SEGMENT WORD PUBLIC 'DATA'
  59. ENDIF
  60. ; any data would go here
  61. IFNDEF WIN32
  62. DSEGNAME ENDS
  63. .xlist
  64. include memmodel.inc
  65. .list
  66. ENDIF
  67. IFNDEF SEGNAME
  68. IFNDEF WIN32
  69. SEGNAME TEXTEQU <_CODE32>
  70. ENDIF
  71. ENDIF
  72. ifdef WIN32
  73. .CODE
  74. ASSUME cs : FLAT
  75. ASSUME ds : FLAT
  76. ASSUME es : FLAT
  77. ASSUME fs : FLAT
  78. ASSUME gs : FLAT
  79. ASSUME ss : FLAT
  80. else
  81. SEGNAME SEGMENT PARA PUBLIC USE32 'CODE'
  82. ASSUME CS : SEGNAME
  83. ASSUME DS : Nothing
  84. ASSUME ES : Nothing
  85. ASSUME FS : Nothing
  86. ASSUME GS : Nothing
  87. endif
  88. PUBLIC YUV12ToIF09
  89. YUV12ToIF09 proc DIST LANG AYPlane: DWORD,
  90. AVPlane: DWORD,
  91. AUPlane: DWORD,
  92. AFrameWidth: DWORD,
  93. AFrameHeight: DWORD,
  94. AYPitch: DWORD,
  95. AUVPitch: DWORD,
  96. AAspectAdjustmentCnt: DWORD,
  97. AColorConvertedFrame: DWORD,
  98. ADCIOffset: DWORD,
  99. ACCOffsetToLine0: DWORD,
  100. ACCOPitch: DWORD,
  101. ACCType: DWORD
  102. ; void * YUV12ToIF09 (
  103. ; U8 * YPlane,
  104. ; U8 * VPlane,
  105. ; U8 * UPlane,
  106. ; UN FrameWidth,
  107. ; UN FrameHeight,
  108. ; UN YPitch,
  109. ; UN UVPitch,
  110. ; UN AspectAdjustmentCount,
  111. ; U8 * ColorConvertedFrame,
  112. ; U32 DCIOffset,
  113. ; U32 CCOffsetToLine0,
  114. ; IN CCOPitch,
  115. ; IN CCType)
  116. ;
  117. ; YPlane and VPlane are offsets relative to InstanceBase. In 16-bit Microsoft
  118. ; Windows (tm), space in this segment is used for local variables and tables.
  119. ; In 32-bit variants of Microsoft Windows (tm), the local variables are on
  120. ; the stack, while the tables are in the one and only data segment.
  121. ;
  122. ; CCOffsetToLine0 is relative to ColorConvertedFrame.
  123. ;
  124. IFDEF WIN32
  125. LocalFrameSize = 32
  126. RegisterStorageSize = 16
  127. ; Arguments:
  128. YPlane = LocalFrameSize + RegisterStorageSize + 4
  129. VPlane = LocalFrameSize + RegisterStorageSize + 8
  130. FrameWidth = LocalFrameSize + RegisterStorageSize + 12
  131. FrameHeight = LocalFrameSize + RegisterStorageSize + 16
  132. YPitch = LocalFrameSize + RegisterStorageSize + 20
  133. ColorConvertedFrame = LocalFrameSize + RegisterStorageSize + 24
  134. DCIOffset = LocalFrameSize + RegisterStorageSize + 28
  135. CCOffsetToLine0 = LocalFrameSize + RegisterStorageSize + 32
  136. CCOPitch = LocalFrameSize + RegisterStorageSize + 36
  137. CCType = LocalFrameSize + RegisterStorageSize + 40
  138. EndOfArgList = LocalFrameSize + RegisterStorageSize + 44
  139. ; Locals (on local stack frame)
  140. CCOCursor = 0
  141. YLimit = 4
  142. CCOVCursor = 8
  143. CCOUCursor = 12
  144. CCOSkipCursor = 16
  145. VLimit = 20
  146. YLine1Limit = 24
  147. CCOUVPitch = 28
  148. LCL EQU <esp+>
  149. ELSE
  150. ; Arguments:
  151. RegisterStorageSize = 20 ; Put local variables on stack.
  152. InstanceBase_zero = RegisterStorageSize + 4
  153. InstanceBase_SegNum = RegisterStorageSize + 6
  154. YPlane_arg = RegisterStorageSize + 8
  155. VPlane_arg = RegisterStorageSize + 12
  156. FrameWidth_arg = RegisterStorageSize + 16
  157. FrameHeight_arg = RegisterStorageSize + 18
  158. YPitch_arg = RegisterStorageSize + 20
  159. ColorConvertedFrame = RegisterStorageSize + 22
  160. ColorConvertedFrame_SegNum = RegisterStorageSize + 24
  161. DCIOffset = RegisterStorageSize + 26
  162. CCOffsetToLine0 = RegisterStorageSize + 30
  163. CCOPitch_arg = RegisterStorageSize + 34
  164. EndOfArgList = RegisterStorageSize + 36
  165. ; Locals (in per-instance data segment)
  166. CCOCursor = LocalStorageCC + 0
  167. YLimit = LocalStorageCC + 4
  168. CCOVCursor = LocalStorageCC + 8
  169. CCOUCursor = LocalStorageCC + 12
  170. CCOSkipCursor = LocalStorageCC + 16
  171. VLimit = LocalStorageCC + 20
  172. YLine1Limit = LocalStorageCC + 24
  173. CCOUVPitch = LocalStorageCC + 28
  174. YPlane = LocalStorageCC + 32
  175. VPlane = LocalStorageCC + 36
  176. FrameWidth = LocalStorageCC + 40
  177. FrameHeight = LocalStorageCC + 44
  178. YPitch = LocalStorageCC + 48
  179. CCOPitch = LocalStorageCC + 52
  180. LCL EQU <>
  181. ENDIF
  182. push esi
  183. push edi
  184. push ebp
  185. push ebx
  186. IFDEF WIN32
  187. sub esp,LocalFrameSize
  188. mov eax,PD [esp+ColorConvertedFrame]
  189. add eax,PD [esp+DCIOffset]
  190. add eax,PD [esp+CCOffsetToLine0]
  191. mov PD [esp+CCOCursor],eax
  192. ELSE
  193. xor eax,eax
  194. mov eax,ds
  195. push eax
  196. mov ebp,esp
  197. and ebp,00000FFFFH
  198. mov ds, PW [ebp+InstanceBase_SegNum]
  199. mov es, PW [ebp+ColorConvertedFrame_SegNum]
  200. mov ebx,PD [ebp+YPlane_arg] ; Make YPlane accessible
  201. mov ds:PD YPlane,ebx
  202. mov ebx,PD [ebp+VPlane_arg] ; Make VPlane accessible.
  203. mov ds:PD VPlane,ebx
  204. mov ax,PW [ebp+FrameWidth_arg] ; Make FrameWidth accessible
  205. mov ds:PD FrameWidth,eax
  206. mov ax,PW [ebp+FrameHeight_arg] ; Make FrameHeight accessible
  207. mov ds:PD FrameHeight,eax
  208. mov ax,PW [ebp+YPitch_arg] ; Make YPitch accessible
  209. mov ds:PD YPitch,eax
  210. mov ax,PW [ebp+ColorConvertedFrame] ; Init CCOCursor
  211. add eax,PD [ebp+DCIOffset]
  212. mov ds:PD CCOCursor,eax
  213. movsx ebx,PW [ebp+CCOPitch_arg] ; Make CCOPitch accessible
  214. mov ds:PD CCOPitch,ebx
  215. ENDIF
  216. Ledx FrameHeight
  217. Lebx CCOPitch
  218. shr ebx,2 ; UV pitch for the output
  219. Lecx YPitch
  220. add ebx,3 ; Pitch is always a multiple of 4.
  221. Lebp CCOPitch
  222. and ebx,0FFFFFFFCH
  223. Lesi YPlane ; Fetch cursor over luma plane.
  224. Sebx CCOUVPitch
  225. Leax CCOCursor
  226. imul ecx,edx ; ecx: size of Y input.
  227. imul ebp,edx ; ebp: was CCOPitch, now size of Y output.
  228. imul ebx,edx ; ebp: size of U/V output (times 4).
  229. add ecx,esi ; ecx: Ylimit
  230. add eax,ebp ; eax was CCOCursor, now CCOVCursor
  231. Secx YLimit
  232. Seax CCOVCursor
  233. sar ebx,2 ; ebx: UVsize of output
  234. Lecx FrameWidth ; ecx: Y frame width
  235. add esi,ecx ; esi: end of first input Y
  236. add eax,ebx ; eax: now CCOUCursor
  237. shr ecx,2
  238. Seax CCOUCursor
  239. Lebp VPlane ; ebp Vplane input
  240. Ledx YPitch
  241. lea esi,[edx+esi] ; End of Y line 1
  242. add ebp,ecx ; end of Vline
  243. Sesi YLine1Limit
  244. add eax,ebx ; CCO Skip Blocks
  245. Sebp VLimit ; UV width for input
  246. Seax CCOSkipCursor
  247. ; Prepare the UV contribution to decide the skip blocks, and copy chroma
  248. ; planes at the same time.
  249. ;
  250. ; Register usage:
  251. ;
  252. ; esi: V plane input pointer
  253. ; edi; V output pointer
  254. ; ebp: U output pointer
  255. ; edx: Y plane input pointer
  256. ; ecx: V limit
  257. ; ebx: Work area for U
  258. ; eax: Work area for V
  259. ChromaPrep:
  260. Ledi CCOVCursor
  261. Lebp CCOUCursor
  262. Ledx YPlane
  263. Leax YPitch
  264. Lesi VPlane
  265. Lecx VLimit
  266. sub edi,esi ; make edi offset to esi.
  267. sub ebp,esi ; make ebp offset to esi to save inc in the loop.
  268. lea edx,[eax+edx-1296] ; make edx point at place for chroma prep.
  269. mov eax,PD [esi] ; fetch four V
  270. add eax,eax ; Change to 8-bit. (Low bit undef, usually 0).
  271. ChromaLoop:
  272. mov Ze PD[esi+edi*1],eax ; Store four V.
  273. mov ebx,PD [esi+UOFFSET] ; fetch four U
  274. add esi,4
  275. mov PD [edx],eax ; Store four V to chroma-prep line in Y frame.
  276. add edx,16 ; Advance chroma-prep cursor.
  277. add ebx,ebx ; Change to 8-bit. (Low bit undef, usually 0).
  278. mov Ze PD[esi+ebp*1-4],ebx ; Store four U.
  279. mov eax,PD [esi] ; fetch next four V.
  280. add eax,eax ; Change to 8-bit. (Low bit undef, usually 0).
  281. mov PD [edx-12],ebx ; Store four U to chroma-prep line in Y frame.
  282. mov bl,Ze PB [esi+edi*1] ; Pre-load output cache line
  283. cmp esi,ecx
  284. mov bl,Ze PB [esi+ebp*1] ; Pre-load output cache line
  285. jb ChromaLoop
  286. ; update chroma pointers.
  287. add ecx,VPITCH
  288. Lebx CCOUVPitch
  289. Ledi CCOVCursor
  290. Lebp CCOUCursor
  291. Secx VLimit
  292. add edi,ebx ; update V output ptr to the next line
  293. Leax VPlane
  294. add ebp,ebx ; update U output ptr to the next line
  295. Sedi CCOVCursor
  296. add eax,VPITCH
  297. Sebp CCOUCursor
  298. Seax VPlane
  299. ; now do Luma a row of 4x4 blocks
  300. ;
  301. ; register usage:
  302. ;
  303. ; esi: Y cursor
  304. ; edi: CCOCursor
  305. ; ebp: counts down 4 lines of luma.
  306. ; ecx: counts down frame width.
  307. ; ebx: Y Pitch.
  308. ; eax: Work area.
  309. ; copy a row of 4x4 luma
  310. Lesi YPlane
  311. Lecx FrameWidth
  312. Ledi CCOCursor
  313. add esi,ecx
  314. neg ecx
  315. Lebx YPitch
  316. sub edi,ecx
  317. mov eax,PD[esi+ecx] ; Fetch 4 Y pels.
  318. add eax,eax ; Make them 8-bit. Low bit undef, but usually 0.
  319. mov ebp,4
  320. YLoop:
  321. mov Ze PD[edi+ecx],eax ; Store them to IF09 output, Y plane.
  322. mov eax,PD[esi+ecx+4] ; Fetch 4 Y pels.
  323. add eax,eax ; Make them 8-bit. Low bit undef, but usually 0.
  324. add ecx,4 ; Advance induction variable.
  325. jl YLoop
  326. YLoopDone:
  327. Lecx FrameWidth
  328. add esi,ebx
  329. add edi,ecx
  330. neg ecx
  331. mov eax,PD[esi+ecx] ; Fetch 4 Y pels.
  332. add eax,eax ; Make them 8-bit. Low bit undef, but usually 0.
  333. dec ebp
  334. jne YLoop
  335. add edi,ecx
  336. Sedi CCOCursor ; save the output ptr for next four lines
  337. ; Build the skip block mask
  338. ;
  339. ; Register usage:
  340. ;
  341. ; esi: Y ptr
  342. ; edi: Mask Ptr
  343. ; ebp: Y Pitch
  344. ; edx: mask
  345. ; ecx: Archive value
  346. ; ebx: UV contribution
  347. ; eax: Dword of Y pels
  348. ;
  349. ; Y starts with Line 1 of 4x4 blocks, since UV pattern has been saved
  350. ; relative to line 1.
  351. Lesi YPlane
  352. Lebp YPitch
  353. Ledi CCOSkipCursor
  354. add esi,ebp ; esi point at line 1 of luma
  355. BuildSkipDescrLoop:
  356. mov ebx,PD [esi-1296] ; Fetch 4 U's; byte0 corresponds to this Y.
  357. mov eax,PD [esi-1292] ; Fetch 4 V's; byte0 corresponds to this Y.
  358. shl ebx,11 ; Position U.
  359. and eax,0000000FCH ; Extract 6 bits of V.
  360. and ebx,00007E000H ; Extract 6 bits of U.
  361. mov edx,PD [esi+ebp*2] ; Line 3 of luma first.
  362. and edx,07E7E7E7EH ; Use 6 bits of Y to save more xfer cycles.
  363. mov ecx,PD[esi+ebp*2+YARCHIVEOFFSET] ; Fetch archive for previous frame
  364. lea ebx,[ebx+eax*8] ; Build UV.
  365. mov eax,PD[esi+ebp*1] ; line 2 of luma.
  366. add edx,ebx ; combine Y with UV pattern
  367. and eax,07E7E7E7EH
  368. mov PD[esi+ebp*2+YARCHIVEOFFSET],edx ; save the current in the archive
  369. sub ecx,edx ; compare with the previous archive
  370. add ecx,-1 ; CF == 1 iff curr differs from prev
  371. lea eax,[eax+ebx]
  372. sbb edx,edx ; edx == -1 iff different, else 0.
  373. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  374. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  375. sub ecx,eax
  376. mov eax,PD[esi]
  377. sub esi,ebp ; Gain acces to line 0.
  378. and eax,07E7E7E7EH
  379. add ecx,-1
  380. adc edx,edx ; edx[0] == 1 if different, else 0.
  381. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  382. add eax,ebx
  383. lea edi,[edi+1]
  384. sub ecx,eax
  385. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  386. mov eax,PD[esi]
  387. add ecx,-1
  388. adc edx,edx
  389. and eax,07E7E7E7EH
  390. mov ecx,PD[esi+YARCHIVEOFFSET]
  391. add eax,ebx
  392. sub ecx,eax
  393. Lebx YLine1Limit
  394. mov PD[esi+YARCHIVEOFFSET],eax
  395. add ecx,-1
  396. lea esi,[esi+ebp+4] ; jump to line 1 of next 4x4 block
  397. adc edx,edx
  398. xor edx,0FFFFFFFFH ; edx[4:31] = 0. edx[0,1,2,3] == 1 if skip dword.
  399. cmp esi,ebx ; check the end of line 1 of Y
  400. mov Ze PB[edi-1],dl ; write to the skip block buffer
  401. je BuildSkipDescrLoopDone
  402. mov ebx,PD [esi-1300] ; Fetch 4 U's; byte1 corresponds to this Y.
  403. mov eax,PD [esi-1296] ; Fetch 4 V's; byte1 corresponds to this Y.
  404. shl ebx,11 ; Position U.
  405. and eax,00000FC00H ; Extract 6 bits of V.
  406. and ebx,007E00000H ; Extract 6 bits of U.
  407. mov edx,PD [esi+ebp*2] ; Line 3 of luma first.
  408. and edx,07E7E7E7EH ; Use 6 bits of Y to save more xfer cycles.
  409. mov ecx,PD[esi+ebp*2+YARCHIVEOFFSET] ; Fetch archive for previous frame
  410. lea ebx,[ebx+eax*8] ; Build UV.
  411. mov eax,PD[esi+ebp*1] ; line 2 of luma.
  412. add edx,ebx ; combine Y with UV pattern
  413. and eax,07E7E7E7EH
  414. mov PD[esi+ebp*2+YARCHIVEOFFSET],edx ; save the current in the archive
  415. sub ecx,edx ; compare with the previous archive
  416. add ecx,-1 ; CF == 1 iff curr differs from prev
  417. lea eax,[eax+ebx]
  418. sbb edx,edx ; edx == -1 iff different, else 0.
  419. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  420. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  421. sub ecx,eax
  422. mov eax,PD[esi]
  423. sub esi,ebp ; Gain acces to line 0.
  424. and eax,07E7E7E7EH
  425. add ecx,-1
  426. adc edx,edx ; edx[0] == 1 if different, else 0.
  427. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  428. add eax,ebx
  429. lea edi,[edi+1]
  430. sub ecx,eax
  431. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  432. mov eax,PD[esi]
  433. add ecx,-1
  434. adc edx,edx
  435. and eax,07E7E7E7EH
  436. mov ecx,PD[esi+YARCHIVEOFFSET]
  437. add eax,ebx
  438. sub ecx,eax
  439. Lebx YLine1Limit
  440. mov PD[esi+YARCHIVEOFFSET],eax
  441. add ecx,-1
  442. lea esi,[esi+ebp+4] ; jump to line 1 of next 4x4 block
  443. adc edx,edx
  444. xor edx,0FFFFFFFFH ; edx[4:31] = 0. edx[0,1,2,3] == 1 if skip dword.
  445. cmp esi,ebx ; check the end of line 1 of Y
  446. mov Ze PB[edi-1],dl ; write to the skip block buffer
  447. je BuildSkipDescrLoopDone
  448. mov ebx,PD [esi-1304] ; Fetch 4 U's; byte2 corresponds to this Y.
  449. mov eax,PD [esi-1300] ; Fetch 4 V's; byte2 corresponds to this Y.
  450. shr ebx,5 ; Position U.
  451. and eax,000FC0000H ; Extract 6 bits of V.
  452. and ebx,00007E000H ; Extract 6 bits of U.
  453. mov edx,PD [esi+ebp*2] ; Line 3 of luma first.
  454. and edx,07E7E7E7EH ; Use 6 bits of Y to save more xfer cycles.
  455. mov ecx,PD[esi+ebp*2+YARCHIVEOFFSET] ; Fetch archive for previous frame
  456. lea ebx,[ebx+eax*8] ; Build UV.
  457. mov eax,PD[esi+ebp*1] ; line 2 of luma.
  458. add edx,ebx ; combine Y with UV pattern
  459. and eax,07E7E7E7EH
  460. mov PD[esi+ebp*2+YARCHIVEOFFSET],edx ; save the current in the archive
  461. sub ecx,edx ; compare with the previous archive
  462. add ecx,-1 ; CF == 1 iff curr differs from prev
  463. lea eax,[eax+ebx]
  464. sbb edx,edx ; edx == -1 iff different, else 0.
  465. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  466. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  467. sub ecx,eax
  468. mov eax,PD[esi]
  469. sub esi,ebp ; Gain acces to line 0.
  470. and eax,07E7E7E7EH
  471. add ecx,-1
  472. adc edx,edx ; edx[0] == 1 if different, else 0.
  473. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  474. add eax,ebx
  475. lea edi,[edi+1]
  476. sub ecx,eax
  477. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  478. mov eax,PD[esi]
  479. add ecx,-1
  480. adc edx,edx
  481. and eax,07E7E7E7EH
  482. mov ecx,PD[esi+YARCHIVEOFFSET]
  483. add eax,ebx
  484. sub ecx,eax
  485. Lebx YLine1Limit
  486. mov PD[esi+YARCHIVEOFFSET],eax
  487. add ecx,-1
  488. lea esi,[esi+ebp+4] ; jump to line 1 of next 4x4 block
  489. adc edx,edx
  490. xor edx,0FFFFFFFFH ; edx[4:31] = 0. edx[0,1,2,3] == 1 if skip dword.
  491. cmp esi,ebx ; check the end of line 1 of Y
  492. mov Ze PB[edi-1],dl ; write to the skip block buffer
  493. je BuildSkipDescrLoopDone
  494. mov ebx,PD [esi-1308] ; Fetch 4 U's; byte3 corresponds to this Y.
  495. mov eax,PD [esi-1304] ; Fetch 4 V's; byte3 corresponds to this Y.
  496. shr ebx,5 ; Position U.
  497. mov edx,PD [esi+ebp*2] ; Line 3 of luma first.
  498. shr eax,26 ; Extract 6 bits of V.
  499. and ebx,007E00000H ; Extract 6 bits of U.
  500. and edx,07E7E7E7EH ; Use 6 bits of Y to save more xfer cycles.
  501. mov ecx,PD[esi+ebp*2+YARCHIVEOFFSET] ; Fetch archive for previous frame
  502. lea ebx,[ebx+eax*8] ; Build UV.
  503. mov eax,PD[esi+ebp*1] ; line 2 of luma.
  504. add edx,ebx ; combine Y with UV pattern
  505. and eax,07E7E7E7EH
  506. mov PD[esi+ebp*2+YARCHIVEOFFSET],edx ; save the current in the archive
  507. sub ecx,edx ; compare with the previous archive
  508. add ecx,-1 ; CF == 1 iff curr differs from prev
  509. lea eax,[eax+ebx]
  510. sbb edx,edx ; edx == -1 iff different, else 0.
  511. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  512. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  513. sub ecx,eax
  514. mov eax,PD[esi]
  515. sub esi,ebp ; Gain acces to line 0.
  516. and eax,07E7E7E7EH
  517. add ecx,-1
  518. adc edx,edx ; edx[0] == 1 if different, else 0.
  519. mov ecx,PD[esi+ebp*1+YARCHIVEOFFSET]
  520. add eax,ebx
  521. lea edi,[edi+1]
  522. sub ecx,eax
  523. mov PD[esi+ebp*1+YARCHIVEOFFSET],eax
  524. mov eax,PD[esi]
  525. add ecx,-1
  526. adc edx,edx
  527. and eax,07E7E7E7EH
  528. mov ecx,PD[esi+YARCHIVEOFFSET]
  529. add eax,ebx
  530. sub ecx,eax
  531. Lebx YLine1Limit
  532. mov PD[esi+YARCHIVEOFFSET],eax
  533. add ecx,-1
  534. lea esi,[esi+ebp+4] ; jump to line 1 of next 4x4 block
  535. adc edx,edx
  536. xor edx,0FFFFFFFFH ; edx[4:31] = 0. edx[0,1,2,3] == 1 if skip dword.
  537. cmp esi,ebx ; check the end of line 1 of Y
  538. mov Ze PB[edi-1],dl ; write to the skip block buffer
  539. jne BuildSkipDescrLoop
  540. BuildSkipDescrLoopDone:
  541. add edi,3 ; Round to next dword.
  542. lea ebx,[ebx+ebp*4] ; update YLine1Limit for next row of blocks
  543. and edi,0FFFFFFFCH
  544. Lesi YPlane
  545. Sedi CCOSkipCursor
  546. Sebx YLine1Limit
  547. lea esi,[esi+ebp*4]
  548. Leax YLimit
  549. Sesi YPlane
  550. cmp esi,eax
  551. jl ChromaPrep
  552. IFDEF WIN32
  553. add esp,LocalFrameSize
  554. ELSE
  555. pop ebx
  556. mov ds,ebx
  557. ENDIF
  558. pop ebx
  559. pop ebp
  560. pop edi
  561. pop esi
  562. rturn
  563. YUV12ToIF09 endp
  564. IFNDEF WIN32
  565. SEGNAME ENDS
  566. ENDIF
  567. END