You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
312 lines
7.5 KiB
312 lines
7.5 KiB
;*************************************************************************
|
|
;** INTEL Corporation Proprietary Information
|
|
;**
|
|
;** This listing is supplied under the terms of a license
|
|
;** agreement with INTEL Corporation and may not be copied
|
|
;** nor disclosed except in accordance with the terms of
|
|
;** that agreement.
|
|
;**
|
|
;** Copyright (c) 1995 Intel Corporation.
|
|
;** All Rights Reserved.
|
|
;**
|
|
;*************************************************************************
|
|
;//
|
|
;// $Header: S:\h26x\src\dec\cx512yuv.asv 1.5 30 Dec 1996 20:02:08 MDUDA $
|
|
;//
|
|
;// $Log: S:\h26x\src\dec\cx512yuv.asv $
|
|
;//
|
|
;// Rev 1.5 30 Dec 1996 20:02:08 MDUDA
|
|
;// Fixed problem where buffer boundaries were being over-written.
|
|
;//
|
|
;// Rev 1.4 11 Dec 1996 14:58:52 JMCVEIGH
|
|
;//
|
|
;// Changed to support width the are multiples of 4.
|
|
;//
|
|
;// Rev 1.3 18 Jul 1996 12:52:58 KLILLEVO
|
|
;// changed cache heating to speed things up a bit
|
|
;//
|
|
;// Rev 1.2 18 Jul 1996 09:39:34 KLILLEVO
|
|
;//
|
|
;// added PVCS header and log
|
|
|
|
;; Very straightforward implementation of the YUV pitch changer
|
|
;; Does 16 pels at a time. If the width is not a multiple of 16
|
|
;; the remainder pels are handled as a special case. We assume
|
|
;; that the width is at least a multiple of 4
|
|
|
|
OPTION PROLOGUE: None
|
|
OPTION EPILOGUE: ReturnAndRelieveEpilogueMacro
|
|
|
|
.xlist
|
|
include memmodel.inc
|
|
.list
|
|
.DATA
|
|
|
|
; any data would go here
|
|
|
|
.CODE
|
|
|
|
ASSUME cs: FLAT
|
|
ASSUME ds: FLAT
|
|
ASSUME es: FLAT
|
|
ASSUME fs: FLAT
|
|
ASSUME gs: FLAT
|
|
ASSUME ss: FLAT
|
|
|
|
PUBLIC YUV12ToYUV
|
|
|
|
|
|
YUV12ToYUV proc DIST LANG AuYPlane: DWORD,
|
|
AuVPlane: DWORD,
|
|
AuUPlane: DWORD,
|
|
AuWidth: DWORD,
|
|
AuHeight: DWORD,
|
|
AuYPitch: DWORD,
|
|
AUVPitch: DWORD,
|
|
AbShapingFlag: DWORD,
|
|
AuCCOutputBuffer: DWORD,
|
|
AlOutput: DWORD,
|
|
AuOffsetToLine0: DWORD,
|
|
AintPitch: DWORD,
|
|
ACCType: DWORD
|
|
|
|
LocalFrameSize = 12
|
|
|
|
RegisterStorageSize = 16 ; 4 registers pushed
|
|
|
|
; Argument offsets (after register pushed)
|
|
|
|
uYPlane = LocalFrameSize + RegisterStorageSize + 4
|
|
uVPlane = LocalFrameSize + RegisterStorageSize + 8
|
|
uUPlane = LocalFrameSize + RegisterStorageSize + 12
|
|
uWidth = LocalFrameSize + RegisterStorageSize + 16
|
|
uHeight = LocalFrameSize + RegisterStorageSize + 20
|
|
uYPitch = LocalFrameSize + RegisterStorageSize + 24
|
|
uUVPitch = LocalFrameSize + RegisterStorageSize + 28
|
|
bShapingFlag = LocalFrameSize + RegisterStorageSize + 32
|
|
uCCOutputBuffer = LocalFrameSize + RegisterStorageSize + 36
|
|
lOutput = LocalFrameSize + RegisterStorageSize + 40
|
|
uOffsetToLine0 = LocalFrameSize + RegisterStorageSize + 44
|
|
intPitch = LocalFrameSize + RegisterStorageSize + 48
|
|
CCType = LocalFrameSize + RegisterStorageSize + 52
|
|
|
|
; Local offsets (after register pushes)
|
|
|
|
LineAdd = 0 ; 1
|
|
LineWidth = 4 ; 2
|
|
|
|
; Arguments relative to esp
|
|
|
|
_uYPlane EQU [esp + uYPlane]
|
|
_uVPlane EQU [esp + uVPlane]
|
|
_UUPlane EQU [esp + uUPlane]
|
|
_uWidth EQU [esp + uWidth ]
|
|
_uHeight EQU [esp + uHeight]
|
|
_uYPitch EQU [esp + uYPitch]
|
|
_uUVPitch EQU [esp + uUVPitch]
|
|
_bShapingFlag EQU [esp + bShapingFlag]
|
|
_uCCOutputBuffer EQU [esp + uCCOutputBuffer]
|
|
_lOutput EQU [esp + lOutput]
|
|
_uOffsetToLine0 EQU [esp + uOffsetToLine0]
|
|
_intPitch EQU [esp + intPitch]
|
|
_uCCType EQU [esp + CCType]
|
|
|
|
; Locals relative to esp
|
|
|
|
_LineAdd EQU [esp + LineAdd]
|
|
_LineWidth EQU [esp + LineWidth]
|
|
_uRemainderEdgePels EQU [esp + uRemainderEdgePels]
|
|
|
|
; Save registers and start working
|
|
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
|
|
sub esp, LocalFrameSize
|
|
|
|
mov eax, _uCCOutputBuffer
|
|
add eax, _uOffsetToLine0
|
|
mov ecx, _lOutput
|
|
add eax, ecx
|
|
mov ebx, _uYPitch
|
|
mov ecx, _uWidth
|
|
mov esi, _uYPlane
|
|
mov edi, eax
|
|
|
|
; luma
|
|
sub ebx, ecx ; ebx = pitch - width
|
|
mov edx, _uHeight
|
|
mov eax, _uWidth
|
|
mov _LineAdd, ebx
|
|
|
|
L2:
|
|
test ecx, 0FFFFFFF0H
|
|
jz LEdgePels ; Width may be less than 16
|
|
|
|
L1:
|
|
mov ebx, DWORD PTR [edi] ; heat cache
|
|
add edi, 16
|
|
mov eax, DWORD PTR [esi + 0]
|
|
mov ebx, DWORD PTR [esi + 4]
|
|
mov DWORD PTR [edi - 16], eax
|
|
mov DWORD PTR [edi - 12], ebx
|
|
mov eax, DWORD PTR [esi + 8]
|
|
mov ebx, DWORD PTR [esi +12]
|
|
mov DWORD PTR [edi - 8], eax
|
|
mov DWORD PTR [edi - 4], ebx
|
|
|
|
add esi, 16
|
|
sub ecx, 16
|
|
|
|
test ecx, 0FFFFFFF0H
|
|
jnz L1
|
|
|
|
LEdgePels:
|
|
; Do edge pels is needed (if width a multiple of 4, but not 16)
|
|
|
|
; Check 8 edge pels
|
|
test ecx, 08H
|
|
jz Lchk4
|
|
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
|
|
mov ebx, DWORD PTR [esi + 4] ; Input pels 4-7
|
|
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
|
|
mov DWORD PTR [edi + 4], ebx ; Output pels 4-7
|
|
add esi, 8
|
|
add edi, 8
|
|
|
|
Lchk4:
|
|
; Check 4 edge pels
|
|
test ecx, 04H
|
|
jz L2_cont
|
|
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
|
|
add esi, 4
|
|
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
|
|
add edi, 4
|
|
|
|
L2_cont:
|
|
add esi, _LineAdd
|
|
mov ecx, _uWidth
|
|
dec edx
|
|
jnz L2
|
|
|
|
; chroma
|
|
mov esi, _uUPlane
|
|
mov ecx, _uWidth
|
|
shr ecx, 1
|
|
mov ebx, _uUVPitch
|
|
sub ebx, ecx ; ebx = pitch - width/2
|
|
mov edx, _uHeight
|
|
shr edx, 1
|
|
mov _LineAdd, ebx
|
|
mov _uWidth, ecx
|
|
mov _uHeight, edx
|
|
|
|
U2:
|
|
test ecx, 0FFFFFFF8H
|
|
jz UEdgePels ; Width may be less than 16
|
|
|
|
U1:
|
|
mov ebx, DWORD PTR [edi] ; heat cache
|
|
add edi, 8
|
|
mov eax, DWORD PTR [esi + 0]
|
|
mov ebx, DWORD PTR [esi + 4]
|
|
mov DWORD PTR [edi - 8], eax
|
|
mov DWORD PTR [edi - 4], ebx
|
|
|
|
add esi, 8
|
|
sub ecx, 8
|
|
|
|
test ecx, 0FFFFFFF8H
|
|
jnz U1
|
|
|
|
UEdgePels:
|
|
; Do edge pels is needed (if width a multiple of 4, but not 16)
|
|
|
|
; Check 4 edge pels
|
|
test ecx, 04H
|
|
jz Uchk4
|
|
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
|
|
add esi, 4
|
|
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
|
|
add edi, 4
|
|
|
|
Uchk4:
|
|
; Check 2 edge pels
|
|
test ecx, 02H
|
|
jz U2_cont
|
|
mov ax, WORD PTR [esi + 0] ; Input pels 0-3
|
|
add esi, 2
|
|
mov WORD PTR [edi + 0], ax ; Output pels 0-3
|
|
add edi, 2
|
|
|
|
U2_cont:
|
|
add esi, _LineAdd
|
|
mov ecx, _uWidth
|
|
dec edx
|
|
jnz U2
|
|
|
|
|
|
; chroma
|
|
mov esi, _uVPlane
|
|
mov ecx, _uWidth
|
|
mov edx, _uHeight
|
|
nop
|
|
|
|
V2:
|
|
test ecx, 0FFFFFFF8H
|
|
jz UEdgePels ; Width may be less than 16
|
|
|
|
V1:
|
|
mov ebx, DWORD PTR [edi] ; heat cache
|
|
add edi, 8
|
|
mov eax, DWORD PTR [esi + 0]
|
|
mov ebx, DWORD PTR [esi + 4]
|
|
mov DWORD PTR [edi - 8], eax
|
|
mov DWORD PTR [edi - 4], ebx
|
|
|
|
add esi, 8
|
|
sub ecx, 8
|
|
|
|
test ecx, 0FFFFFFF8H
|
|
jnz V1
|
|
|
|
VEdgePels:
|
|
; Do edge pels is needed (if width a multiple of 4, but not 16)
|
|
|
|
; Check 4 edge pels
|
|
test ecx, 04H
|
|
jz Vchk4
|
|
mov eax, DWORD PTR [esi + 0] ; Input pels 0-3
|
|
add esi, 4
|
|
mov DWORD PTR [edi + 0], eax ; Output pels 0-3
|
|
add edi, 4
|
|
|
|
Vchk4:
|
|
; Check 2 edge pels
|
|
test ecx, 02H
|
|
jz V2_cont
|
|
mov ax, WORD PTR [esi + 0] ; Input pels 0-3
|
|
add esi, 2
|
|
mov WORD PTR [edi + 0], ax ; Output pels 0-3
|
|
add edi, 2
|
|
|
|
V2_cont:
|
|
add esi, _LineAdd
|
|
mov ecx, _uWidth
|
|
dec edx
|
|
jnz V2
|
|
|
|
add esp, LocalFrameSize ; restore esp to registers
|
|
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret 52 ; 13*4 bytes of arguments
|
|
|
|
YUV12ToYUV ENDP
|
|
|
|
|
|
END
|