You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
495 lines
17 KiB
495 lines
17 KiB
;-------------------------------------------------------------------------
|
|
; INTEL Corporation Proprietary Information
|
|
;
|
|
; This listing is supplied under the terms of a license
|
|
; agreement with INTEL Corporation and may not be copied
|
|
; nor disclosed except in accordance with the terms of
|
|
; that agreement.
|
|
;
|
|
; Copyright (c) 1996 Intel Corporation.
|
|
; All Rights Reserved.
|
|
;
|
|
;-------------------------------------------------------------------------
|
|
|
|
;-------------------------------------------------------------------------
|
|
;// $Header: S:\h26x\src\dec\cx51281.asv
|
|
;//
|
|
;// $Log: S:\h26x\src\dec\cxm1281.asv $
|
|
;//
|
|
;// Rev 1.7 25 Jul 1996 13:47:58 AGUPTA2
|
|
;// Fixed blockiness problem; dither matrices were not created properly.
|
|
;//
|
|
;// Rev 1.6 14 Jun 1996 16:28:24 AGUPTA2
|
|
;// Cosmetic changes to adhere to common coding convention.
|
|
;//
|
|
;// Rev 1.5 13 May 1996 11:01:34 AGUPTA2
|
|
;// Final drop from IDC.
|
|
;//
|
|
;// Rev 1.1 20 Mar 1996 11:19:24 RMCKENZX
|
|
;// March 96 version.
|
|
;//
|
|
;// Rev 1.2 01 Feb 1996 10:45:58 vladip
|
|
;// Reduced number of locals, DataSegment changed to PARA
|
|
;//
|
|
;// Rev 1.1 29 Jan 1996 18:53:40 vladip
|
|
;//
|
|
;// IFDEF TIMING is added
|
|
;//
|
|
;// Rev 1.0 29 Jan 1996 17:28:06 vladip
|
|
;// Initial mmx verision.
|
|
;//
|
|
;-------------------------------------------------------------------------
|
|
;
|
|
; +---------- Color convertor.
|
|
; |+--------- For both H261 and H263.
|
|
; ||+-------- MMx Version.
|
|
; |||++------ Convert from YUV12.
|
|
; |||||+----- Convert to CLUT8.
|
|
; ||||||+---- Zoom by one, i.e. non-zoom.
|
|
; |||||||
|
|
; cxm1281 -- This function performs YUV12 to CLUT8 color conversion for H26x.
|
|
; It dithers among 9 chroma points and 26 luma points, mapping the
|
|
; 8 bit luma pels into the 26 luma points by clamping the ends and
|
|
; stepping the luma by 8.
|
|
;
|
|
; Color convertor is not destructive.
|
|
; Requirement:
|
|
; U and V plane SHOULD be followed by 4 bytes (for read only)
|
|
; Y plane SHOULD be followed by 8 bytes (for read only)
|
|
|
|
OPTION CASEMAP:NONE
|
|
OPTION PROLOGUE:None
|
|
OPTION EPILOGUE:ReturnAndRelieveEpilogueMacro
|
|
|
|
.586
|
|
.xlist
|
|
include iammx.inc
|
|
include memmodel.inc
|
|
.list
|
|
|
|
MMXCODE1 SEGMENT PARA USE32 PUBLIC 'CODE'
|
|
MMXCODE1 ENDS
|
|
|
|
MMXDATA1 SEGMENT PARA USE32 PUBLIC 'DATA'
|
|
MMXDATA1 ENDS
|
|
|
|
MMXDATA1 SEGMENT
|
|
ALIGN 8
|
|
|
|
PUBLIC Y0_low
|
|
PUBLIC Y1_low
|
|
PUBLIC U_low_value
|
|
PUBLIC V_low_value
|
|
PUBLIC U2_V0high_bound
|
|
PUBLIC U2_V0low_bound
|
|
PUBLIC V2_U0high_bound
|
|
PUBLIC V2_U0low_bound
|
|
PUBLIC return_from_Y_high
|
|
PUBLIC saturate_to_Y_high
|
|
PUBLIC clean_MSB_mask
|
|
PUBLIC convert_to_sign
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; U,V,Y impacts are calculated as follows:
|
|
; 0 U < 64h
|
|
; U impact 1ah 64h <= U < 84h
|
|
; 24h U >= 84h
|
|
;
|
|
; 0 V < 64h
|
|
; V impact 4eh 64h <= V < 84h
|
|
; 9ch V >= 84h
|
|
;
|
|
; 0 Y < 1bh
|
|
; Y impact Y/8 1bh <= Y < ebh
|
|
; 19h Y >= ebh
|
|
; and the dither pattern is added to the input Y,U,V values and is a
|
|
; 4X4 matrix as defined below:
|
|
; U
|
|
; 10h 8 18h 0
|
|
; 18h 0 10h 8
|
|
; 8 10h 0 18h
|
|
; 0 18h 8 10h
|
|
; V
|
|
; 8 10h 0 18h
|
|
; 0 18h 8 10h
|
|
; 10h 8 18h 0
|
|
; 18h 0 10h 8
|
|
; Y
|
|
; 4 2 6 0
|
|
; 6 0 4 2
|
|
; 2 4 0 6
|
|
; 0 6 2 4
|
|
; Note the following equalities in dither matrices which will explain funny
|
|
; data declarations below:
|
|
; U0=V2
|
|
; U1=V3
|
|
; U2=V0
|
|
; U3=V1
|
|
; More gory details can be found in the color convertor document written
|
|
; by IDC.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
V2_U0low_bound DWORD 0f3ebfbe3h, 0f3ebfbe3h ; 746c7c64746c7c64 - 8080808080808080
|
|
U2_V0low_bound DWORD 0ebf3e3fbh, 0ebf3e3fbh, ; 6c74647c6c74647c - 8080808080808080
|
|
0f3ebfbe3h, 0f3ebfbe3h ; 746c7c64746c7c64 - 8080808080808080
|
|
|
|
U3_V1low_bound DWORD 0e3fbebf3h, 0e3fbebf3h ; 647c6c74647c6c74 - 8080808080808080
|
|
V3_U1low_bound DWORD 0fbe3f3ebh, 0fbe3f3ebh, ; 7c64746c7c64746c - 8080808080808080
|
|
0e3fbebf3h, 0e3fbebf3h ; 647c6c74647c6c74 - 8080808080808080
|
|
|
|
V2_U0high_bound DWORD 0130b1b03h, 0130b1b03h ; 948c9c84948c9c84 - 8080808080808080
|
|
U2_V0high_bound DWORD 00b13031bh, 00b13031bh, ; 8c94849c8c94849c - 8080808080808080
|
|
0130b1b03h, 0130b1b03h ; 948c9c84948c9c84 - 8080808080808080
|
|
|
|
U3_V1high_bound DWORD 0031b0b13h, 0031b0b13h ; 849c8c94849c8c94 - 8080808080808080
|
|
V3_U1high_bound DWORD 01b03130bh, 01b03130bh, ; 9c84948c9c84948c - 8080808080808080
|
|
0031b0b13h, 0031b0b13h ; 849c8c94849c8c94 - 8080808080808080
|
|
|
|
|
|
U_low_value DWORD 01a1a1a1ah, 01a1a1a1ah
|
|
V_low_value DWORD 04e4e4e4eh, 04e4e4e4eh
|
|
convert_to_sign DWORD 080808080h, 080808080h
|
|
|
|
|
|
; Y0_low,Y1_low are arrays
|
|
Y0_low DWORD 01719151bh, 01719151bh, ; 1b1b1b1b1b1b1b1b - 0402060004020600 ; for line%4=0
|
|
019171b15h, 019171b15h ; 1b1b1b1b1b1b1b1b - 0204000602040006 ; for line%4=2
|
|
|
|
Y1_low DWORD 0151b1719h, 0151b1719h, ; 1b1b1b1b1b1b1b1b - 0600040206000402 ; for line%4=1
|
|
01b151917h, 01b151917h ; 1b1b1b1b1b1b1b1b - 0006020400060204 ; for line%4=3
|
|
|
|
clean_MSB_mask DWORD 01f1f1f1fh, 01f1f1f1fh
|
|
saturate_to_Y_high DWORD 0e6e6e6e6h, 0e6e6e6e6h ; ffh-19h
|
|
return_from_Y_high DWORD 0dcdcdcdch, 0dcdcdcdch ; ffh-19h-ah (return back and ADD ah);
|
|
|
|
MMXDATA1 ENDS
|
|
|
|
MMXCODE1 SEGMENT
|
|
MMX_YUV12ToCLUT8 PROC DIST LANG PUBLIC,
|
|
AYPlane: DWORD,
|
|
AVPlane: DWORD,
|
|
AUPlane: DWORD,
|
|
AFrameWidth: DWORD,
|
|
AFrameHeight: DWORD,
|
|
AYPitch: DWORD,
|
|
AVPitch: DWORD,
|
|
AAspectAdjustmentCnt: DWORD,
|
|
AColorConvertedFrame: DWORD,
|
|
ADCIOffset: DWORD,
|
|
ACCOffsetToLine0: DWORD,
|
|
ACCOPitch: DWORD,
|
|
ACCType: DWORD
|
|
LocalFrameSize = 108
|
|
RegisterStorageSize = 16
|
|
argument_base EQU ebp + RegisterStorageSize
|
|
local_base EQU esp
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Arguments:
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
YPlane EQU argument_base + 4
|
|
VPlane EQU argument_base + 8
|
|
UPlane EQU argument_base + 12
|
|
FrameWidth EQU argument_base + 16
|
|
FrameHeight EQU argument_base + 20
|
|
YPitch EQU argument_base + 24
|
|
ChromaPitch EQU argument_base + 28
|
|
AspectAdjustmentCount EQU argument_base + 32
|
|
ColorConvertedFrame EQU argument_base + 36
|
|
DCIOffset EQU argument_base + 40
|
|
CCOffsetToLine0 EQU argument_base + 44
|
|
CCOPitch EQU argument_base + 48
|
|
CCType EQU argument_base + 52
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Locals (on local stack frame)
|
|
; (local_base is aligned at cache-line boundary in the prologue)
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
tmpV2_U0low_bound EQU local_base + 0 ; qword
|
|
tmpU2_V0low_bound EQU local_base + 8 ; qword
|
|
tmpU3_V1low_bound EQU local_base + 16 ; qword
|
|
tmpV3_U1low_bound EQU local_base + 24 ; qword
|
|
tmpV2_U0high_bound EQU local_base + 32 ; qword
|
|
tmpU2_V0high_bound EQU local_base + 40 ; qword
|
|
tmpU3_V1high_bound EQU local_base + 48 ; qword
|
|
tmpV3_U1high_bound EQU local_base + 56 ; qword
|
|
tmpY0_low EQU local_base + 64 ; qword
|
|
tmpY1_low EQU local_base + 72 ; qword
|
|
tmpBlockParity EQU local_base + 80
|
|
YLimit EQU local_base + 84
|
|
AspectCount EQU local_base + 88
|
|
tmpYCursorEven EQU local_base + 92
|
|
tmpYCursorOdd EQU local_base + 96
|
|
tmpCCOPitch EQU local_base + 100
|
|
StashESP EQU local_base + 104
|
|
|
|
U_low EQU mm6
|
|
V_low EQU mm7
|
|
U_high EQU U_low
|
|
V_high EQU V_low
|
|
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
push ebx
|
|
mov ebp, esp
|
|
sub esp, LocalFrameSize
|
|
and esp, -32 ; align at cache line boundary
|
|
mov [StashESP], ebp
|
|
|
|
mov ecx, [YPitch]
|
|
mov edx, [FrameHeight]
|
|
mov ebx, [FrameWidth]
|
|
;
|
|
imul edx, ecx
|
|
;
|
|
mov eax, [YPlane]
|
|
add edx, eax ; edx is relative to YPlane
|
|
add eax, ebx ; Points to end of Y even line
|
|
;
|
|
mov [tmpYCursorEven], eax
|
|
add eax, ecx ; add YPitch
|
|
mov [tmpYCursorOdd], eax
|
|
lea edx, [edx+2*ebx] ; final value of Y-odd-pointer
|
|
mov [YLimit], edx
|
|
mov esi, [VPlane]
|
|
mov edx, [UPlane]
|
|
mov eax, [ColorConvertedFrame]
|
|
add eax, [DCIOffset]
|
|
;
|
|
add eax, [CCOffsetToLine0]
|
|
sar ebx, 1
|
|
add esi, ebx
|
|
add edx, ebx
|
|
lea edi, [eax+2*ebx] ; CCOCursor
|
|
mov ecx, [AspectAdjustmentCount]
|
|
mov [AspectCount], ecx
|
|
test ecx, ecx ; if AspectCount=0 we should not drop any lines
|
|
jnz non_zero_AspectCount
|
|
dec ecx
|
|
non_zero_AspectCount:
|
|
mov [AspectCount], ecx
|
|
cmp ecx, 1
|
|
jbe finish
|
|
;
|
|
neg ebx
|
|
;
|
|
mov [FrameWidth], ebx
|
|
;
|
|
movq mm6, U_low_value ; store some frequently used values in registers
|
|
;
|
|
movq mm7, V_low_value
|
|
xor eax, eax
|
|
mov [tmpBlockParity], eax
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; Register Usage:
|
|
;
|
|
; esi -- points to the end of V Line
|
|
; edx -- points to the end of U Line.
|
|
; edi -- points to the end of even line of output.
|
|
; ebp -- points to the end of odd line of output.
|
|
;
|
|
; ecx -- points to the end of even/odd Y Line
|
|
; eax -- 8*(line&2) == 0, on line%4=0,1
|
|
; == 8, on line%4=2,3
|
|
; in the loop, eax points to the end of even Y line
|
|
; ebx -- Number of points, we havn't done yet. (multiplyed by -0.5)
|
|
;
|
|
;
|
|
; Noise matrix is of size 4x4 , so we have different noise values in even
|
|
; pair of lines, and in odd pair of lines. But in our loop we are doing 2
|
|
; lines. So here we are prepairing constants for next two lines. This code
|
|
; is done each time we are starting to convert next pair of lines.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
PrepareNext2Lines:
|
|
mov eax, [tmpBlockParity]
|
|
;
|
|
;constants for odd line
|
|
movq mm0, V3_U1low_bound[eax]
|
|
;
|
|
movq mm1, V3_U1high_bound[eax]
|
|
;
|
|
movq mm2, U3_V1low_bound[eax]
|
|
;
|
|
movq mm3, U3_V1high_bound[eax]
|
|
;
|
|
movq [tmpV3_U1low_bound], mm0
|
|
;
|
|
movq [tmpV3_U1high_bound], mm1
|
|
;
|
|
movq [tmpU3_V1low_bound], mm2
|
|
;
|
|
movq [tmpU3_V1high_bound], mm3
|
|
;
|
|
;
|
|
;constants for even line
|
|
;
|
|
movq mm0, V2_U0low_bound[eax]
|
|
;
|
|
movq mm1, V2_U0high_bound[eax]
|
|
;
|
|
movq mm2, U2_V0low_bound[eax]
|
|
;
|
|
movq mm3, U2_V0high_bound[eax]
|
|
;
|
|
movq [tmpV2_U0low_bound], mm0
|
|
;
|
|
movq [tmpV2_U0high_bound], mm1
|
|
;
|
|
movq [tmpU2_V0low_bound], mm2
|
|
;
|
|
movq [tmpU2_V0high_bound], mm3
|
|
;
|
|
;
|
|
; Constants for Y values
|
|
;
|
|
movq mm4, Y0_low[eax]
|
|
;
|
|
movq mm5, Y1_low[eax]
|
|
;
|
|
xor eax, 8
|
|
mov [tmpBlockParity], eax
|
|
movq [tmpY0_low], mm4
|
|
;
|
|
movq [tmpY1_low], mm5
|
|
;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; if AspectCount<2 we should skip a line. In this case we are still doing two
|
|
; lines, but output pointers are the same, so we just overwriting line
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
mov eax, [CCOPitch]
|
|
mov ebx, [AspectCount]
|
|
xor ecx, ecx
|
|
sub ebx, 2
|
|
mov [tmpCCOPitch], eax
|
|
ja continue
|
|
mov eax, [AspectAdjustmentCount]
|
|
mov [tmpCCOPitch], ecx ; 0
|
|
lea ebx, [ebx+eax] ; calculate new AspectCount
|
|
jnz continue ; skiping even line
|
|
;
|
|
;skip_odd_line
|
|
;
|
|
mov eax, [tmpYCursorEven]
|
|
;
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; set odd constants to be equal to even_constants
|
|
; Odd line will be performed as even
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
movq [tmpV3_U1low_bound], mm0
|
|
;
|
|
movq [tmpV3_U1high_bound], mm1
|
|
;
|
|
movq [tmpU3_V1low_bound], mm2
|
|
;
|
|
movq [tmpU3_V1high_bound], mm3
|
|
;
|
|
movq [tmpY1_low], mm4
|
|
;
|
|
mov [tmpYCursorOdd], eax
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; when we got here, we already did all preparations.
|
|
; we are entering a main loop which is starts at do_next_2x8_block label
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
continue:
|
|
mov [AspectCount], ebx
|
|
mov ebx, [FrameWidth]
|
|
mov ebp, edi
|
|
;
|
|
add ebp, [tmpCCOPitch] ; ebp points to the end of odd line
|
|
mov eax, [tmpYCursorEven]
|
|
mov ecx, [tmpYCursorOdd]
|
|
;
|
|
movdt mm0, [edx+ebx] ; 0:0:0:0|u3:u2:u1:u0 unsigned
|
|
;
|
|
movdt mm2, [esi+ebx] ; 0:0:0:0|v3:v2:v1:v0 unsigned
|
|
punpcklbw mm0, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0 unsigned
|
|
psubb mm0, convert_to_sign ; u3:u3:u2:u2|u1:u1:u0:u0 signed
|
|
punpcklbw mm2, mm2 ; v3:v3:v2:v2|v1:v1:v0:v0 unsigned
|
|
movq mm4, [eax+2*ebx] ; y7|..|y0
|
|
;
|
|
movq mm1, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0
|
|
;
|
|
do_next_2x8_block:
|
|
psubb mm2, convert_to_sign ; v3:v3:v2:v2|v1:v1:v0:v0 signed
|
|
movq mm5, mm1 ; u3:u3:u2:u2|u1:u1:u0:u0
|
|
pcmpgtb mm0, [tmpV2_U0low_bound]
|
|
movq mm3, mm2
|
|
pcmpgtb mm1, [tmpV2_U0high_bound]
|
|
pand mm0, U_low
|
|
psubusb mm4, [tmpY0_low]
|
|
pand mm1, U_high
|
|
pcmpgtb mm2, [tmpU2_V0low_bound]
|
|
psrlq mm4, 3
|
|
pand mm4, clean_MSB_mask
|
|
pand mm2, V_low
|
|
paddusb mm4, saturate_to_Y_high
|
|
paddb mm0, mm1 ; U03:U03:U02:U02|U01:U01:U00:U00
|
|
psubusb mm4, return_from_Y_high
|
|
movq mm1, mm5
|
|
pcmpgtb mm5, [tmpV3_U1low_bound]
|
|
paddd mm0, mm2
|
|
pcmpgtb mm1, [tmpV3_U1high_bound]
|
|
pand mm5, U_low
|
|
paddd mm0, mm4
|
|
movq mm2, mm3
|
|
pcmpgtb mm3, [tmpU2_V0high_bound]
|
|
pand mm1, U_high
|
|
movq mm4, [ecx+2*ebx] ; read next 8 Y points from odd line
|
|
paddb mm5, mm1 ; u impact on odd line
|
|
psubusb mm4, [tmpY1_low]
|
|
movq mm1, mm2
|
|
pcmpgtb mm2, [tmpU3_V1low_bound]
|
|
psrlq mm4, 3
|
|
pand mm4, clean_MSB_mask
|
|
pand mm2, V_low
|
|
paddusb mm4, saturate_to_Y_high
|
|
paddd mm5, mm2
|
|
psubusb mm4, return_from_Y_high
|
|
pand mm3, V_high
|
|
pcmpgtb mm1, [tmpU3_V1high_bound]
|
|
paddb mm3, mm0
|
|
movdt mm0, [edx+ebx+4] ; read next 4 U points
|
|
pand mm1, V_high
|
|
movdt mm2, [esi+ebx+4] ; read next 4 V points
|
|
paddd mm5, mm4
|
|
movq mm4, [eax+2*ebx+8] ; read next 8 Y points from even line
|
|
paddb mm5, mm1
|
|
psubb mm0, convert_to_sign
|
|
punpcklbw mm2, mm2 ; v3:v3:v2:v2|v1:v1:v0:v0
|
|
movq [edi+2*ebx], mm3 ; write even line
|
|
punpcklbw mm0, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0
|
|
movq [ebp+2*ebx], mm5 ; write odd line
|
|
movq mm1, mm0 ; u3:u3:u2:u2|u1:u1:u0:u0
|
|
add ebx, 4
|
|
jl do_next_2x8_block
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; update pointes to input and output buffers, to point to the next lines
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
mov ebp, [StashESP]
|
|
mov eax, [tmpYCursorEven]
|
|
mov ecx, [YPitch]
|
|
add edi, [CCOPitch] ; go to the end of next line
|
|
add edi, [tmpCCOPitch] ; skip odd line
|
|
lea eax, [eax+2*ecx]
|
|
mov [tmpYCursorEven], eax
|
|
add eax, [YPitch]
|
|
mov [tmpYCursorOdd], eax
|
|
add esi, [ChromaPitch]
|
|
mov ecx, [YLimit] ; Done with last line?
|
|
add edx, [ChromaPitch]
|
|
cmp eax, ecx
|
|
jb PrepareNext2Lines
|
|
|
|
finish:
|
|
mov esp, [StashESP]
|
|
;
|
|
pop ebx
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
ret
|
|
|
|
MMX_YUV12ToCLUT8 ENDP
|
|
|
|
MMXCODE1 ENDS
|
|
|
|
END
|