;************************************************************************* ;** INTEL Corporation Proprietary Information ;** ;** This listing is supplied under the terms of a license ;** agreement with INTEL Corporation and may not be copied ;** nor disclosed except in accordance with the terms of ;** that agreement. ;** ;** Copyright (c) 1995 Intel Corporation. ;** All Rights Reserved. ;** ;************************************************************************* ;// ;// $Header: S:\h26x\src\dec\cx51281.asv ;// ;// $Log: S:\h26x\src\dec\cx51281.asv $ ;// ;// Rev 1.6 18 Mar 1996 09:58:40 bnickers ;// Make color convertors non-destructive. ;// ;// Rev 1.5 05 Feb 1996 13:35:38 BNICKERS ;// Fix RGB16 color flash problem, by allowing different RGB16 formats at oce. ;// ;// Rev 1.4 16 Jan 1996 11:23:06 BNICKERS ;// Fix starting point in output stream, so we don't start at line two and ;// write off the end of the output frame. ;// ;// Rev 1.3 22 Dec 1995 15:43:28 KMILLS ;// ;// added new copyright notice ;// ;// Rev 1.2 03 Nov 1995 11:49:40 BNICKERS ;// Support YUV12 to CLUT8 zoom and non-zoom color conversions. ;// ;// Rev 1.1 26 Oct 1995 09:46:08 BNICKERS ;// Reduce the number of blanks in the "proc" statement because the assembler ;// sometimes has problems with statements longer than 512 characters long. ;// ;// Rev 1.0 25 Oct 1995 17:59:20 BNICKERS ;// Initial revision. ;// ;//////////////////////////////////////////////////////////////////////////// ; ; +---------- Color convertor. ; |+--------- For both H261 and H263. ; ||+-------- Version for the Pentium Microprocessor. ; |||++------ Convert from YUV12. ; |||||+----- Convert to CLUT8. ; ||||||+---- Zoom by one, i.e. non-zoom. ; ||||||| ; cx51281 -- This function performs YUV12 to CLUT8 color conversion for H26x. ; It is tuned for best performance on the Pentium(r) Microprocessor. ; It dithers among 9 chroma points and 26 luma points, mapping the ; 8 bit luma pels into the 26 luma points by clamping the ends and ; stepping the luma by 8. ; ; The color convertor is non-destructive; the input Y, U, and V ; planes will not be clobbered. OPTION PROLOGUE:None OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro include locals.inc include ccinst.inc include decconst.inc .xlist include memmodel.inc .list .DATA ; any data would go here .CODE ASSUME cs : FLAT ASSUME ds : FLAT ASSUME es : FLAT ASSUME fs : FLAT ASSUME gs : FLAT ASSUME ss : FLAT ; void FAR ASM_CALLTYPE YUV12ToCLUT8 (U8 * YPlane, ; U8 * VPlane, ; U8 * UPlane, ; UN FrameWidth, ; UN FrameHeight, ; UN YPitch, ; UN VPitch, ; UN AspectAdjustmentCount, ; U8 * ColorConvertedFrame, ; U32 DCIOffset, ; U32 CCOffsetToLine0, ; IN CCOPitch, ; IN CCType) ; ; CCOffsetToLine0 is relative to ColorConvertedFrame. ; PUBLIC YUV12ToCLUT8 ; due to the need for the ebp reg, these parameter declarations aren't used, ; they are here so the assembler knows how many bytes to relieve from the stack YUV12ToCLUT8 proc DIST LANG AYPlane: DWORD, AVPlane: DWORD, AUPlane: DWORD, AFrameWidth: DWORD, AFrameHeight: DWORD, AYPitch: DWORD, AVPitch: DWORD, AAspectAdjustmentCnt: DWORD, AColorConvertedFrame: DWORD, ADCIOffset: DWORD, ACCOffsetToLine0: DWORD, ACCOPitch: DWORD, ACCType: DWORD LocalFrameSize = 64+768*2+4 RegisterStorageSize = 16 ; Arguments: YPlane_arg = RegisterStorageSize + 4 VPlane_arg = RegisterStorageSize + 8 UPlane_arg = RegisterStorageSize + 12 FrameWidth_arg = RegisterStorageSize + 16 FrameHeight = RegisterStorageSize + 20 YPitch_arg = RegisterStorageSize + 24 ChromaPitch_arg = RegisterStorageSize + 28 AspectAdjustmentCount_arg = RegisterStorageSize + 32 ColorConvertedFrame = RegisterStorageSize + 36 DCIOffset = RegisterStorageSize + 40 CCOffsetToLine0 = RegisterStorageSize + 44 CCOPitch_arg = RegisterStorageSize + 48 CCType_arg = RegisterStorageSize + 52 EndOfArgList = RegisterStorageSize + 56 ; Locals (on local stack frame) CCOCursor EQU [esp+ 0] ChromaLineLen EQU [esp+ 4] YLimit EQU [esp+ 8] YCursor EQU [esp+12] VCursor EQU [esp+16] DistanceFromVToU EQU [esp+20] EndOfChromaLine EQU [esp+24] AspectCount EQU [esp+28] FrameWidth EQU [esp+32] ChromaPitch EQU [esp+36] AspectAdjustmentCount EQU [esp+40] LumaPitch EQU [esp+44] CCOPitch EQU [esp+48] StashESP EQU [esp+52] ChromaContribution EQU [esp+64] push esi push edi push ebp push ebx mov edi,esp sub esp,LocalFrameSize and esp,0FFFFF800H mov eax,[edi+FrameWidth_arg] mov ebx,[edi+ChromaPitch_arg] mov ecx,[edi+AspectAdjustmentCount_arg] mov edx,[edi+YPitch_arg] mov esi,[edi+CCOPitch_arg] mov FrameWidth,eax mov ChromaPitch,ebx mov AspectAdjustmentCount,ecx mov AspectCount,ecx mov LumaPitch,edx mov CCOPitch,esi mov ebx,[edi+VPlane_arg] mov ecx,[edi+UPlane_arg] mov eax,[edi+YPlane_arg] sub ecx,ebx mov DistanceFromVToU,ecx mov VCursor,ebx mov YCursor,eax mov eax,[edi+ColorConvertedFrame] add eax,[edi+DCIOffset] add eax,[edi+CCOffsetToLine0] mov CCOCursor,eax mov StashESP,edi mov edx,[edi+FrameHeight] mov ecx,LumaPitch imul edx,ecx mov ebx,FrameWidth mov esi,YCursor ; Fetch cursor over luma plane. sar ebx,1 add edx,esi mov YLimit,edx mov ChromaLineLen,ebx NextFourLines: ; Convert line of U and V pels to the corresponding UVDitherPattern Indices. ; ; Register Usage ; ; edi -- Cursor over V line ; esi -- Cursor over storage to hold preprocessed UV. ; ebp -- Distance from V line to U line. ; edx -- UVDitherPattern index: ((V:{0:8}*9) + U:{0:8}) * 2 + 1 ; bl -- U pel value ; cl -- V pel value ; eax -- Scratch mov edi,VCursor ; Fetch address of pel 0 of next line of V. mov ebp,DistanceFromVToU ; Fetch span from V plane to U plane. lea esi,ChromaContribution mov eax,ChromaLineLen mov edx,ChromaPitch add eax,edi mov EndOfChromaLine,eax add edx,edi mov bl,[edi] ; Fetch first V pel. ; and ebx,0FCH ; Reduce to 6 bits. mov cl,[edi+ebp*1] ; Fetch first U pel. and ecx,0FCH ; Reduce to 6 bits. mov VCursor,edx ; Stash for next time around. @@: mov edx,PD UVDitherLine01[ebx] ; Fetch dither pattern for V point. mov bl,[edi+1] ; Fetch next V pel. mov eax,PD UVDitherLine23[ecx] ; Fetch dither pattern for U point. mov cl,[edi+ebp*1+1] ; Fetch next U pel. lea edx,[edx+edx*2+00A0A0A0AH] ; Weight V dither pattern. and bl,0FCH ; Reduce to 6 bits. add eax,edx ; Combine dither patterns for U and V. and cl,0FCH ; Reduce to 6 bits. mov edx,PD UVDitherLine01[ebx] ; Fetch dither pattern for V point. mov [esi],eax ; Stash UV corresponding to Y00,Y01,Y10,Y11. mov eax,PD UVDitherLine23[ecx] ; Fetch dither pattern for U point. mov bl,[edi+2] ; Fetch next V pel. lea edx,[edx+edx*2+00A0A0A0AH] ; Weight V dither pattern. mov cl,[edi+ebp*1+2] ; Fetch next U pel. add eax,edx ; Combine dither patterns for U and V. mov edx,EndOfChromaLine ; Fetch EOL address. mov [esi+4],eax ; Stash UV corresponding to Y02,Y03,Y12,Y13. add edi,2 ; Advance U plane cursor. and bl,0FCH ; Reduce to 6 bits. and cl,0FCH ; Reduce to 6 bits. add esi,8 sub edx,edi jne @b ; Now color convert a line of luma. ; ; Register Usage ; edi -- Cursor over line of color converted output frame, minus esi. ; esi -- Cursor over Y line. ; ebp -- Not used. ; edx,eax -- Build output pels. ; ecx,ebx -- Y pels. mov [esi],edx ; Stash EOL indication. mov edx,AspectCount mov esi,YCursor ; Reload cursor over Y line. dec edx mov AspectCount,edx jne KeepLine0 mov edx,AspectAdjustmentCount mov AspectCount,edx jmp SkipLine0 KeepLine0: mov edi,CCOCursor ; Fetch output cursor. mov eax,CCOPitch ; Compute start of next line. add eax,edi mov edx,ChromaContribution+4 ; Fetch . mov CCOCursor,eax ; Stash start of next line. sub edi,esi ; Get span from Y cursor to CCO cursor. mov bl,[esi+3] ; Fetch Y03. and edx,0FFFF0000H ; . mov eax,ChromaContribution ; Fetch . sub esp,1536-8 and eax,00000FFFFH ; . mov cl,[esi+2] ; Fetch Y02. Line0Loop: or eax,edx ; . mov dh,PB YDither[ebx+4] ; . mov dl,PB YDither[ecx+2] ; . mov bl,PB [esi+1] ; Fetch Y01. shl edx,16 ; < Y03 Y02 xxxx xxxx>. mov cl,PB [esi] ; Fetch Y00. mov dh,PB YDither[ebx+6] ; < Y03 Y02 Y01 xxxx>. mov bl,PB [esi+3+4] ; Fetch next Y03. mov dl,PB YDither[ecx+0] ; < Y03 Y02 Y01 Y00>. mov cl,PB [esi+2+4] ; Fetch next Y02. add eax,edx ; < P03 P02 P01 P00>. mov edx,ChromaContribution+1536+4 ; Fetch next . mov Ze [edi+esi],eax ; Store four pels to color conv output. mov eax,ChromaContribution+1536 ; Fetch next . and edx,0FFFF0000H ; . add esi,4 ; Advance input cursor. add esp,8 and eax,00000FFFFH ; . jne Line0Loop and esp,0FFFFF800H add esp,0800H SkipLine0: ; Color convert another line of luma. ; ; Register Usage ; edi -- Cursor over line of color converted output frame, minus esi. ; esi -- Cursor over Y line. ; ebp -- Y Pitch. ; edx,eax -- Build output pels. ; ecx,ebx -- Y pels. mov esi,YCursor ; Reload cursor over Y line. mov ebp,LumaPitch mov edx,AspectCount mov edi,CCOCursor ; Fetch output cursor. lea eax,[esi+ebp*2] ; Compute address of next line of Y. dec edx mov YCursor,eax mov eax,CCOPitch ; Compute start of next line. mov AspectCount,edx jne KeepLine1 mov edx,AspectAdjustmentCount mov AspectCount,edx jmp SkipLine1 KeepLine1: add eax,edi mov edx,ChromaContribution+4 ; Fetch . mov CCOCursor,eax ; Stash start of next line. sub edi,esi ; Get span from Y cursor to CCO cursor. mov bl,[esi+ebp*1+3] ; Fetch Y13. mov eax,ChromaContribution ; Fetch . shl edx,16 ; . sub esp,1536-8 shr eax,16 ; . mov cl,[esi+ebp*1+2] ; Fetch Y12. Line1Loop: or eax,edx ; . mov dh,PB YDither[ebx+6] ; . mov dl,PB YDither[ecx+0] ; . mov bl,PB [esi+ebp*1+1] ; Fetch Y11. shl edx,16 ; < Y13 Y12 xxxx xxxx>. mov cl,PB [esi+ebp*1] ; Fetch Y10. mov dh,PB YDither[ebx+4] ; < Y13 Y12 Y11 xxxx>. mov bl,PB [esi+ebp*1+3+4] ; Fetch next Y13. mov dl,PB YDither[ecx+2] ; < Y13 Y12 Y11 Y10>. mov cl,PB [esi+ebp*1+2+4] ; Fetch next Y12. add eax,edx ; < P13 P12 P11 P10>. mov edx,ChromaContribution+1536+4 ; Fetch next . mov Ze [edi+esi],eax ; Store four pels to color conv output. mov eax,ChromaContribution+1536 ; Fetch next . shl edx,16 ; . add esi,4 ; Advance input cursor. shr eax,16 ; . lea esp,[esp+8] jne Line1Loop and esp,0FFFFF800H add esp,0800H SkipLine1: mov edi,VCursor ; Fetch addr of pel 0 of next line of V. mov ebp,DistanceFromVToU ; Fetch span from V plane to U plane. lea esi,ChromaContribution mov eax,ChromaLineLen mov edx,ChromaPitch add eax,edi mov EndOfChromaLine,eax add edx,edi mov bl,[edi] ; Fetch first V pel. ; and ebx,0FCH ; Reduce to 6 bits. mov cl,[edi+ebp*1] ; Fetch first U pel. and ecx,0FCH ; Reduce to 6 bits. mov VCursor,edx ; Stash for next time around. @@: mov edx,PD UVDitherLine23[ebx] mov bl,[edi+1] mov eax,PD UVDitherLine01[ecx] mov cl,[edi+ebp*1+1] lea edx,[edx+edx*2+00A0A0A0AH] and bl,0FCH add eax,edx and cl,0FCH mov edx,PD UVDitherLine23[ebx] mov [esi],eax mov eax,PD UVDitherLine01[ecx] mov bl,[edi+2] lea edx,[edx+edx*2+00A0A0A0AH] mov cl,[edi+ebp*1+2] add eax,edx mov edx,EndOfChromaLine mov [esi+4],eax add edi,2 and bl,0FCH and cl,0FCH add esi,8 sub edx,edi jne @b mov [esi],edx mov edx,AspectCount mov esi,YCursor dec edx mov AspectCount,edx jne KeepLine2 mov edx,AspectAdjustmentCount mov AspectCount,edx jmp SkipLine2 KeepLine2: mov edi,CCOCursor mov eax,CCOPitch add eax,edi mov edx,ChromaContribution+4 mov CCOCursor,eax sub edi,esi mov bl,[esi+3] and edx,0FFFF0000H mov eax,ChromaContribution sub esp,1536-8 and eax,00000FFFFH mov cl,[esi+2] Line2Loop: or eax,edx mov dh,PB YDither[ebx+2] mov dl,PB YDither[ecx+4] mov bl,PB [esi+1] shl edx,16 mov cl,PB [esi] mov dh,PB YDither[ebx+0] mov bl,PB [esi+3+4] mov dl,PB YDither[ecx+6] mov cl,PB [esi+2+4] add eax,edx mov edx,ChromaContribution+1536+4 mov Ze [edi+esi],eax mov eax,ChromaContribution+1536 and edx,0FFFF0000H add esi,4 add esp,8 and eax,00000FFFFH jne Line2Loop and esp,0FFFFF800H add esp,0800H SkipLine2: mov esi,YCursor mov ebp,LumaPitch mov edx,AspectCount mov edi,CCOCursor lea eax,[esi+ebp*2] dec edx mov YCursor,eax mov eax,CCOPitch mov AspectCount,edx jne KeepLine3 mov edx,AspectAdjustmentCount mov AspectCount,edx jmp SkipLine3 KeepLine3: add eax,edi mov edx,ChromaContribution+4 mov CCOCursor,eax sub edi,esi mov bl,[esi+ebp*1+3] mov eax,ChromaContribution shl edx,16 sub esp,1536-8 shr eax,16 mov cl,[esi+ebp*1+2] Line3Loop: or eax,edx mov dh,PB YDither[ebx+0] mov dl,PB YDither[ecx+6] mov bl,PB [esi+ebp*1+1] shl edx,16 mov cl,PB [esi+ebp*1] mov dh,PB YDither[ebx+2] mov bl,PB [esi+ebp*1+3+4] mov dl,PB YDither[ecx+4] mov cl,PB [esi+ebp*1+2+4] add eax,edx mov edx,ChromaContribution+1536+4 mov Ze [edi+esi],eax mov eax,ChromaContribution+1536 shl edx,16 add esi,4 shr eax,16 lea esp,[esp+8] jne Line3Loop and esp,0FFFFF800H add esp,0800H SkipLine3: mov esi,YCursor mov eax,YLimit cmp eax,esi jne NextFourLines mov esp,StashESP pop ebx pop ebp pop edi pop esi rturn YUV12ToCLUT8 endp END