Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

312 lines
7.5 KiB

;*************************************************************************
;** INTEL Corporation Proprietary Information
;**
;** This listing is supplied under the terms of a license
;** agreement with INTEL Corporation and may not be copied
;** nor disclosed except in accordance with the terms of
;** that agreement.
;**
;** Copyright (c) 1995 Intel Corporation.
;** All Rights Reserved.
;**
;*************************************************************************
;//
;// $Header: S:\h26x\src\dec\cx512yuv.asv 1.5 30 Dec 1996 20:02:08 MDUDA $
;//
;// $Log: S:\h26x\src\dec\cx512yuv.asv $
;//
;// Rev 1.5 30 Dec 1996 20:02:08 MDUDA
;// Fixed problem where buffer boundaries were being over-written.
;//
;// Rev 1.4 11 Dec 1996 14:58:52 JMCVEIGH
;//
;// Changed to support width the are multiples of 4.
;//
;// Rev 1.3 18 Jul 1996 12:52:58 KLILLEVO
;// changed cache heating to speed things up a bit
;//
;// Rev 1.2 18 Jul 1996 09:39:34 KLILLEVO
;//
;// added PVCS header and log
;; Very straightforward implementation of the YUV pitch changer
;; Does 16 pels at a time. If the width is not a multiple of 16
;; the remainder pels are handled as a special case. We assume
;; that the width is at least a multiple of 4
OPTION PROLOGUE: None
OPTION EPILOGUE: ReturnAndRelieveEpilogueMacro
.xlist
include memmodel.inc
.list
.DATA
; any data would go here
.CODE
ASSUME cs: FLAT
ASSUME ds: FLAT
ASSUME es: FLAT
ASSUME fs: FLAT
ASSUME gs: FLAT
ASSUME ss: FLAT
PUBLIC YUV12ToYUV
YUV12ToYUV proc DIST LANG AuYPlane: DWORD,
AuVPlane: DWORD,
AuUPlane: DWORD,
AuWidth: DWORD,
AuHeight: DWORD,
AuYPitch: DWORD,
AUVPitch: DWORD,
AbShapingFlag: DWORD,
AuCCOutputBuffer: DWORD,
AlOutput: DWORD,
AuOffsetToLine0: DWORD,
AintPitch: DWORD,
ACCType: DWORD
LocalFrameSize = 12
RegisterStorageSize = 16 ; 4 registers pushed
; Argument offsets (after register pushed)
uYPlane = LocalFrameSize + RegisterStorageSize + 4
uVPlane = LocalFrameSize + RegisterStorageSize + 8
uUPlane = LocalFrameSize + RegisterStorageSize + 12
uWidth = LocalFrameSize + RegisterStorageSize + 16
uHeight = LocalFrameSize + RegisterStorageSize + 20
uYPitch = LocalFrameSize + RegisterStorageSize + 24
uUVPitch = LocalFrameSize + RegisterStorageSize + 28
bShapingFlag = LocalFrameSize + RegisterStorageSize + 32
uCCOutputBuffer = LocalFrameSize + RegisterStorageSize + 36
lOutput = LocalFrameSize + RegisterStorageSize + 40
uOffsetToLine0 = LocalFrameSize + RegisterStorageSize + 44
intPitch = LocalFrameSize + RegisterStorageSize + 48
CCType = LocalFrameSize + RegisterStorageSize + 52
; Local offsets (after register pushes)
LineAdd = 0 ; 1
LineWidth = 4 ; 2
; Arguments relative to esp
_uYPlane EQU [esp + uYPlane]
_uVPlane EQU [esp + uVPlane]
_UUPlane EQU [esp + uUPlane]
_uWidth EQU [esp + uWidth ]
_uHeight EQU [esp + uHeight]
_uYPitch EQU [esp + uYPitch]
_uUVPitch EQU [esp + uUVPitch]
_bShapingFlag EQU [esp + bShapingFlag]
_uCCOutputBuffer EQU [esp + uCCOutputBuffer]
_lOutput EQU [esp + lOutput]
_uOffsetToLine0 EQU [esp + uOffsetToLine0]
_intPitch EQU [esp + intPitch]
_uCCType EQU [esp + CCType]
; Locals relative to esp
_LineAdd EQU [esp + LineAdd]
_LineWidth EQU [esp + LineWidth]
_uRemainderEdgePels EQU [esp + uRemainderEdgePels]
; Save registers and start working
push ebx
push esi
push edi
push ebp
sub esp, LocalFrameSize
mov eax, _uCCOutputBuffer
add eax, _uOffsetToLine0
mov ecx, _lOutput
add eax, ecx
mov ebx, _uYPitch
mov ecx, _uWidth
mov esi, _uYPlane
mov edi, eax
; luma
sub ebx, ecx ; ebx = pitch - width
mov edx, _uHeight
mov eax, _uWidth
mov _LineAdd, ebx
L2:
test ecx, 0FFFFFFF0H
jz LEdgePels ; Width may be less than 16
L1:
mov ebx, DWORD PTR [edi] ; heat cache
add edi, 16
mov eax, DWORD PTR [esi + 0]
mov ebx, DWORD PTR [esi + 4]
mov DWORD PTR [edi - 16], eax
mov DWORD PTR [edi - 12], ebx
mov eax, DWORD PTR [esi + 8]
mov ebx, DWORD PTR [esi +12]
mov DWORD PTR [edi - 8], eax
mov DWORD PTR [edi - 4], ebx
add esi, 16
sub ecx, 16
test ecx, 0FFFFFFF0H
jnz L1
LEdgePels:
; Do edge pels is needed (if width a multiple of 4, but not 16)
; Check 8 edge pels
test ecx, 08H
jz Lchk4
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
mov ebx, DWORD PTR [esi + 4] ; Input pels 4-7
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
mov DWORD PTR [edi + 4], ebx ; Output pels 4-7
add esi, 8
add edi, 8
Lchk4:
; Check 4 edge pels
test ecx, 04H
jz L2_cont
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
add esi, 4
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
add edi, 4
L2_cont:
add esi, _LineAdd
mov ecx, _uWidth
dec edx
jnz L2
; chroma
mov esi, _uUPlane
mov ecx, _uWidth
shr ecx, 1
mov ebx, _uUVPitch
sub ebx, ecx ; ebx = pitch - width/2
mov edx, _uHeight
shr edx, 1
mov _LineAdd, ebx
mov _uWidth, ecx
mov _uHeight, edx
U2:
test ecx, 0FFFFFFF8H
jz UEdgePels ; Width may be less than 16
U1:
mov ebx, DWORD PTR [edi] ; heat cache
add edi, 8
mov eax, DWORD PTR [esi + 0]
mov ebx, DWORD PTR [esi + 4]
mov DWORD PTR [edi - 8], eax
mov DWORD PTR [edi - 4], ebx
add esi, 8
sub ecx, 8
test ecx, 0FFFFFFF8H
jnz U1
UEdgePels:
; Do edge pels is needed (if width a multiple of 4, but not 16)
; Check 4 edge pels
test ecx, 04H
jz Uchk4
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
add esi, 4
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
add edi, 4
Uchk4:
; Check 2 edge pels
test ecx, 02H
jz U2_cont
mov ax, WORD PTR [esi + 0] ; Input pels 0-3
add esi, 2
mov WORD PTR [edi + 0], ax ; Output pels 0-3
add edi, 2
U2_cont:
add esi, _LineAdd
mov ecx, _uWidth
dec edx
jnz U2
; chroma
mov esi, _uVPlane
mov ecx, _uWidth
mov edx, _uHeight
nop
V2:
test ecx, 0FFFFFFF8H
jz UEdgePels ; Width may be less than 16
V1:
mov ebx, DWORD PTR [edi] ; heat cache
add edi, 8
mov eax, DWORD PTR [esi + 0]
mov ebx, DWORD PTR [esi + 4]
mov DWORD PTR [edi - 8], eax
mov DWORD PTR [edi - 4], ebx
add esi, 8
sub ecx, 8
test ecx, 0FFFFFFF8H
jnz V1
VEdgePels:
; Do edge pels is needed (if width a multiple of 4, but not 16)
; Check 4 edge pels
test ecx, 04H
jz Vchk4
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
add esi, 4
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
add edi, 4
Vchk4:
; Check 2 edge pels
test ecx, 02H
jz V2_cont
mov ax, WORD PTR [esi + 0] ; Input pels 0-3
add esi, 2
mov WORD PTR [edi + 0], ax ; Output pels 0-3
add edi, 2
V2_cont:
add esi, _LineAdd
mov ecx, _uWidth
dec edx
jnz V2
add esp, LocalFrameSize ; restore esp to registers
pop ebp
pop edi
pop esi
pop ebx
ret 52 ; 13*4 bytes of arguments
YUV12ToYUV ENDP
END