;************************************************************************* ;** INTEL Corporation Proprietary Information ;** ;** This listing is supplied under the terms of a license ;** agreement with INTEL Corporation and may not be copied ;** nor disclosed except in accordance with the terms of ;** that agreement. ;** ;** Copyright (c) 1995 Intel Corporation. ;** All Rights Reserved. ;** ;************************************************************************* ;// ;// $Header: S:\h26x\src\dec\cx512242.asv ;// ;// $Log: S:\h26x\src\dec\cx512242.asv $ ;// ;// Rev 1.8 20 Mar 1996 10:57:22 bnickers ;// Fix numerous bugs. ;// ;// Rev 1.7 19 Mar 1996 11:50:22 bnickers ;// Fix error regarding commitment of pages to stack. ;// ;// Rev 1.6 18 Mar 1996 09:58:36 bnickers ;// Make color convertors non-destructive. ;// ;// Rev 1.5 05 Feb 1996 13:35:36 BNICKERS ;// Fix RGB16 color flash problem, by allowing different RGB16 formats at oce. ;// ;// Rev 1.4 22 Dec 1995 15:42:18 KMILLS ;// added new copyright notice ;// ;// Rev 1.3 30 Oct 1995 17:15:32 BNICKERS ;// Fix color shift in RGB24 color convertors. ;// ;// Rev 1.2 26 Oct 1995 17:49:36 CZHU ;// Fix a whole bunch of bugs. ;// ;// Rev 1.1 26 Oct 1995 09:46:22 BNICKERS ;// Reduce the number of blanks in the "proc" statement because the assembler ;// sometimes has problems with statements longer than 512 characters long. ;// ;// Rev 1.0 25 Oct 1995 17:59:28 BNICKERS ;// Initial revision. ;// ;//////////////////////////////////////////////////////////////////////////// ; ; +---------- Color convertor. ; |+--------- For both H261 and H263. ; ||+-------- Version for the Pentium(r) Microprocessor. ; |||++------ Convert from YUV12. ; |||||++---- Convert to RGB24. ; |||||||+--- Zoom by two. ; |||||||| ; cx512242 -- This function performs YUV12-to-RGB24 zoom-by-two color conversion ; for H26x. It is tuned for best performance on the Pentium(r) ; Microprocessor. It handles the format in which the low order ; byte is B, the second byte is G, and the high order byte is R. ; ; The YUV12 input is planar, 8 bits per pel. The Y plane may have ; a pitch of up to 768. It may have a width less than or equal ; to the pitch. It must be DWORD aligned, and preferably QWORD ; aligned. Pitch and Width must be a multiple of four. For best ; performance, Pitch should not be 4 more than a multiple of 32. ; Height may be any amount, but must be a multiple of two. The U ; and V planes may have a different pitch than the Y plane, subject ; to the same limitations. ; ; The color convertor is non-destructive; the input Y, U, and V ; planes will not be clobbered. OPTION PROLOGUE:None OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro include locals.inc include ccinst.inc include decconst.inc .xlist include memmodel.inc .list .DATA ; any data would go here .CODE ASSUME cs : FLAT ASSUME ds : FLAT ASSUME es : FLAT ASSUME fs : FLAT ASSUME gs : FLAT ASSUME ss : FLAT ; void FAR ASM_CALLTYPE YUV12ToRGB24ZoomBy2 (U8 * YPlane, ; U8 * VPlane, ; U8 * UPlane, ; UN FrameWidth, ; UN FrameHeight, ; UN YPitch, ; UN VPitch, ; UN AspectAdjustmentCount, ; U8 FAR * ColorConvertedFrame, ; U32 DCIOffset, ; U32 CCOffsetToLine0, ; IN CCOPitch, ; IN CCType) ; ; CCOffsetToLine0 is relative to ColorConvertedFrame. ; PUBLIC YUV12ToRGB24ZoomBy2 ; due to the need for the ebp reg, these parameter declarations aren't used, ; they are here so the assembler knows how many bytes to relieve from the stack YUV12ToRGB24ZoomBy2 proc DIST LANG AYPlane: DWORD, AVPlane: DWORD, AUPlane: DWORD, AFrameWidth: DWORD, AFrameHeight: DWORD, AYPitch: DWORD, AVPitch: DWORD, AAspectAdjustmentCnt: DWORD, AColorConvertedFrame: DWORD, ADCIOffset: DWORD, ACCOffsetToLine0: DWORD, ACCOPitch: DWORD, ACCType: DWORD LocalFrameSize = 64+768*8+32 RegisterStorageSize = 16 ; Arguments: YPlane_arg = RegisterStorageSize + 4 VPlane_arg = RegisterStorageSize + 8 UPlane_arg = RegisterStorageSize + 12 FrameWidth_arg = RegisterStorageSize + 16 FrameHeight = RegisterStorageSize + 20 YPitch_arg = RegisterStorageSize + 24 ChromaPitch_arg = RegisterStorageSize + 28 AspectAdjustmentCount_arg = RegisterStorageSize + 32 ColorConvertedFrame = RegisterStorageSize + 36 DCIOffset = RegisterStorageSize + 40 CCOffsetToLine0 = RegisterStorageSize + 44 CCOPitch = RegisterStorageSize + 48 CCType_arg = RegisterStorageSize + 52 EndOfArgList = RegisterStorageSize + 56 ; Locals (on local stack frame) CCOCursor EQU [esp+ 0] CCOSkipDistance EQU [esp+ 4] ChromaLineLen EQU [esp+ 8] YSkipDistance EQU [esp+12] YLimit EQU [esp+16] YCursor EQU [esp+20] VCursor EQU [esp+24] DistanceFromVToU EQU [esp+28] EndOfChromaLine EQU [esp+32] AspectCount EQU [esp+36] ChromaPitch EQU [esp+40] AspectAdjustmentCount EQU [esp+44] LineParity EQU [esp+48] LumaPitch EQU [esp+52] FrameWidth EQU [esp+56] StashESP EQU [esp+60] ChromaContribution EQU [esp+64] B0R0G0B0 EQU [esp+72] G1B1R0G0 EQU [esp+76] R1G1B1R1 EQU [esp+80] B2R2G2B2 EQU [esp+84] G3B3R2G2 EQU [esp+88] R3G3B3R3 EQU [esp+92] push esi push edi push ebp push ebx mov edi,esp sub esp,4096 mov eax,[esp] sub esp,LocalFrameSize-4096 and esp,0FFFFF000H mov eax,[esp] and esp,0FFFFE000H mov eax,[esp] sub esp,1000H mov eax,[esp] sub esp,1000H mov eax,[esp] add esp,2000H mov eax,[edi+YPitch_arg] mov ebx,[edi+ChromaPitch_arg] mov ecx,[edi+AspectAdjustmentCount_arg] mov edx,[edi+FrameWidth_arg] mov LumaPitch,eax mov ChromaPitch,ebx mov AspectAdjustmentCount,ecx mov AspectCount,ecx mov FrameWidth,edx mov ebx,[edi+VPlane_arg] mov ecx,[edi+UPlane_arg] mov eax,[edi+YPlane_arg] sub ecx,ebx mov DistanceFromVToU,ecx mov VCursor,ebx mov YCursor,eax mov eax,[edi+ColorConvertedFrame] add eax,[edi+DCIOffset] add eax,[edi+CCOffsetToLine0] mov CCOCursor,eax mov StashESP,edi mov edx,[edi+FrameHeight] mov ecx,LumaPitch imul edx,ecx mov ebx,FrameWidth mov eax,[edi+CCOPitch] sub ecx,ebx mov esi,YCursor ; Fetch cursor over luma plane. lea ebp,[ebx+ebx*4] add edx,esi add ebp,ebx mov YSkipDistance,ecx sub eax,ebp mov YLimit,edx shr ebx,1 mov CCOSkipDistance,eax mov ChromaLineLen,ebx mov ecx,AspectAdjustmentCount mov esi,VCursor mov AspectCount,ecx ; Register Usage: ; ; edi -- Y Line cursor. Chroma contribs go in lines above current Y line. ; esi -- Chroma Line cursor. ; ebp -- Y Pitch ; edx -- Distance from V pel to U pel. ; ecx -- V contribution to RGB; sum of U and V contributions. ; ebx -- U contribution to RGB. ; eax -- Alternately a U and a V pel. PrepareChromaLine: mov edi,ChromaLineLen xor eax,eax mov edx,DistanceFromVToU mov al,[esi] ; Fetch V. add edi,esi ; Compute EOL address. xor ecx,ecx mov ebp,PD V24Contrib[eax*8] ; ebp[ 0: 7] -- Zero ; ; ebp[ 8:15] -- V contrib to G. ; ; ebp[16:23] -- V contrib to R. ; ; ebp[24:31] -- Zero. mov cl,[esi+edx] ; Fetch U. mov EndOfChromaLine,edi xor ebx,ebx ; Keep pairing happy. mov ebx,PD U24Contrib[ecx*8] ; ebx[ 0: 7] -- U contrib to B. ; ; ebx[ 8:15] -- U contrib to G. ; ; ebx[16:23] -- Zero. mov cl,[esi+edx+1] ; Fetch next U. lea edi,ChromaContribution add ebp,ebx ; Chroma contributions to RGB. NextChromaPel: mov ebx,PD U24Contrib[ecx*8] ; See above. mov al,[esi+1] ; Fetch V. mov [edi],ebp ; Store contribs to use for even chroma pel. mov cl,[esi+edx+2] ; Fetch next U. mov ebp,PD V24Contrib[eax*8] ; See above. add edi,32 add ebp,ebx ; Chroma contributions to RGB. mov al,[esi+2] ; Fetch V. mov [edi-28],ebp ; Store contribs to use for odd chroma pel. mov ebx,PD U24Contrib[ecx*8] ; See above. mov ebp,PD V24Contrib[eax*8] ; See above. mov cl,[esi+edx+3] ; Fetch next U. add ebp,ebx ; Chroma contributions to RGB. add esi,2 ; Inc Chroma cursor. cmp esi,EndOfChromaLine jne NextChromaPel xor eax,eax mov esi,YCursor mov [edi+4],eax ; Store EOL indicator. mov LineParity,eax DoLine1: xor ebx,ebx xor ecx,ecx mov ebp,ChromaContribution ; Fetch preprocessed chroma contribs. xor edx,edx mov cl,[esi] ; Fetch Y0. mov bl,ChromaContribution+3 ; Fetch U contrib to B value. mov dl,ChromaContribution+2 ; Fetch UV contrib to G value. and ebp,0000001FFH ; Extract V contrib to R. mov edi,CCOCursor sub esp,6144 xor eax,eax ; Register Usage: ; ; esi -- Cursor over a line of the Y Plane. ; edi -- Cursor over the color conv output. ; ebp -- V contribution to R field of RGB value. ; edx -- UV contrib to G field; U contrib to B field of RGB value. ; ecx -- Y value (i.e. Y contribution to R, G, and B); ; ebx -- Construction of one and a third pels of RGB24. ; eax -- Construction of one and a third pels of RGB24. Next4YPelsLine0: mov ah,PB B24Value[ecx+ebx*2] ; Fetch Pel0 B. mov bh,PB R24Value[ecx+ebp*1] ; Fetch Pel0 R. mov bl,PB G24Value[ecx+edx] ; Fetch Pel0 G. -- -- R0 G0 mov cl,[esi+1] ; Fetch Y1. shl ebx,16 ; R0 G0 -- -- mov al,ah ; Copy Pel0 B. -- -- B0 B0 or eax,ebx ; R0 G0 B0 B0 mov bh,PB G24Value[ecx+edx] ; Fetch Pel1 G. R0 G0 G1 -- ror eax,8 ; First output: B0 R0 G0 B0 mov dl,ChromaContribution+6144+3 ; Refetch U contrib to B value. mov Ze [edi],eax ; Save B0R0G0B0. mov bl,PB B24Value[ecx+edx*2] ; Fetch Pel1 B. R0 G0 G1 B1 rol ebx,16 ; Second output: G1 B1 R0 G0 mov B0R0G0B0+6144,eax ; Stash for saving to second line. mov Ze [edi+4],ebx ; Save G1B1R0G0. mov G1B1R0G0+6144,ebx ; Stash for saving to second line. mov bl,PB R24Value[ecx+ebp*1] ; Fetch Pel1 R. G1 B1 -- R1 mov ebp,ChromaContribution+6144+4 ; Fetch preprocessed chroma contribs. mov bh,bl ; Copy Pel1 R. G1 B1 R1 R1 mov cl,[esi+2] ; Fetch Y2. ror ebx,8 ; Third output: R1 G1 B1 R1 and ebp,0000001FFH ; Extract V contrib to R. mov dl,ChromaContribution+6144+6 ; Fetch UV contrib to G value. xor eax,eax mov al,ChromaContribution+6144+7 ; Fetch U contrib to B value. mov R1G1B1R1+6144,ebx ; Stash for saving to second line. mov Ze [edi+8],ebx ; Save R1G1B1R1. xor ebx,ebx mov bh,PB B24Value[ecx+eax*2] ; Fetch Pel2 B. mov ah,PB R24Value[ecx+ebp*1] ; Fetch Pel2 R. mov al,PB G24Value[ecx+edx] ; Fetch Pel2 G. -- -- R2 G2 mov cl,[esi+3] ; Fetch Y3. shl eax,16 ; R2 G2 -- -- mov bl,bh ; Copy Pel2 B. -- -- B2 B2 or ebx,eax ; R2 G2 B2 B2 mov ah,PB G24Value[ecx+edx] ; Fetch Pel1 G. R2 G2 G3 -- ror ebx,8 ; Fourth output: B2 R2 G2 B2 mov dl,ChromaContribution+6144+7 ; Refetch U contrib to B value. mov Ze [edi+12],ebx ; Save B2R2G2B2. mov al,PB B24Value[ecx+edx*2] ; Fetch Pel3 B. R2 G2 G3 B3 rol eax,16 ; Fifth output: G3 B3 R2 G2 mov B2R2G2B2+6144,ebx ; Stash for saving to second line. mov Ze [edi+16],eax ; Save G3B3R2G2. mov G3B3R2G2+6144,eax ; Stash for saving to second line. mov al,PB R24Value[ecx+ebp*1] ; Fetch Pel3 R. G3 B3 -- R3 mov ebp,ChromaContribution+6144+32; Fetch preprocessed chroma contribs. mov ah,al ; Copy Pel3 R. G3 B3 R3 R3 mov cl,[esi+4] ; Fetch Y4. ror eax,8 ; Sixth output: R3 G3 B3 R3 xor ebx,ebx mov dl,ChromaContribution+6144+34 ; Fetch UV contrib to G value. and ebp,0000001FFH ; Extract U contrib to B. mov bl,ChromaContribution+6144+35 ; Fetch U contrib to B value. lea esi,[esi+4] ; Advance input cursor. mov Ze [edi+20],eax ; Save R3G3B3R3. mov R3G3B3R3+6144,eax ; Stash for saving to second line. mov eax,ebx lea esp,[esp+32] lea edi,[edi+24] ; Advance output cursor. jne Next4YPelsLine0 and esp,0FFFFE000H add esp,02000H mov ebx,CCOSkipDistance mov ebp,AspectCount add edi,ebx sub ebp,2 ; If count is non-zero, we keep the line. mov AspectCount,ebp lea ecx,B0R0G0B0 mov eax,FrameWidth jg Keep2ndLineOfLine0 add ebp,AspectAdjustmentCount mov AspectCount,ebp jmp Skip2ndLineOfLine0 Keep2ndLineOfLine0: Keep2ndLineOfLine0_Loop: mov ebp,[ecx] sub eax,4 mov Ze PD [edi],ebp mov ebp,[ecx+4] mov Ze PD [edi+4],ebp mov ebp,[ecx+8] mov Ze PD [edi+8],ebp mov ebp,[ecx+12] mov Ze PD [edi+12],ebp mov ebp,[ecx+16] mov Ze PD [edi+16],ebp mov ebp,[ecx+20] mov Ze PD [edi+20],ebp lea ecx,[ecx+32] lea edi,[edi+24] jne Keep2ndLineOfLine0_Loop add edi,ebx Skip2ndLineOfLine0: mov bl,LineParity add esi,YSkipDistance xor bl,1 mov CCOCursor,edi mov YCursor,esi mov LineParity,bl jne DoLine1 mov eax,esi mov esi,VCursor ; Inc VPlane cursor to next line. mov ebp,ChromaPitch mov ebx,YLimit ; Done with last line? add esi,ebp cmp eax,ebx mov VCursor,esi jb PrepareChromaLine Done: mov esp,StashESP pop ebx pop ebp pop edi pop esi rturn YUV12ToRGB24ZoomBy2 endp END