You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
198 lines
5.2 KiB
198 lines
5.2 KiB
;*************************************************************************
|
|
;** INTEL Corporation Proprietary Information
|
|
;**
|
|
;** This listing is supplied under the terms of a license
|
|
;** agreement with INTEL Corporation and may not be copied
|
|
;** nor disclosed except in accordance with the terms of
|
|
;** that agreement.
|
|
;**
|
|
;** Copyright (c) 1995, 1996 Intel Corporation.
|
|
;** All Rights Reserved.
|
|
;**
|
|
;*************************************************************************
|
|
.486
|
|
.Model FLAT, C
|
|
APP_32BIT equ 1
|
|
;.CODE
|
|
IACODE2 SEGMENT PARA USE32 PUBLIC 'CODE'
|
|
IACODE2 ENDS
|
|
|
|
IACODE2 SEGMENT
|
|
|
|
LoopFilter PROC C PUBLIC USES esi edi ebx ebp in8x8:DWORD, out8x8:DWORD, pitch:DWORD
|
|
LOCAL filt_temp[32]:DWORD, loop_count:DWORD
|
|
; **************************************************
|
|
; output pitch is hard coded to 8 for uFilterBBuffer
|
|
; input pitch is 384 (as passed parameter)
|
|
; **************************************************
|
|
|
|
mov esi,in8x8
|
|
; mov edi,out8x8 ; for debug
|
|
lea edi,filt_temp ; use temporary storage
|
|
mov loop_count,8
|
|
xor eax,eax
|
|
|
|
|
|
|
|
; filter 8x8 block horizontally
|
|
; input is 8-bit, output is 16-bit temporary storage
|
|
ALIGN 4
|
|
do_row:
|
|
; pixel 0
|
|
mov al,byte ptr [esi] ; get p0, eax = a
|
|
xor ebx,ebx
|
|
mov edx,eax ; copy pixel 0
|
|
xor ecx,ecx
|
|
shl edx,2 ; a<<2
|
|
; pixel 0 + pixel 1
|
|
mov bl,byte ptr [esi+1] ; get p1, ebx = b
|
|
mov [edi],dx ; output p0 = a<<2
|
|
add eax,ebx ; eax = (a+b)
|
|
mov cl,byte ptr [esi+2] ; get p2, ecx = c
|
|
; pixel 1 + pixel 2
|
|
xor edx,edx
|
|
add ebx,ecx ; ebx = (b+c)
|
|
mov dl,byte ptr [esi+3] ; get p3, edx = c
|
|
add eax,ebx ; eax = (a+b) + (b+c)
|
|
add ecx,edx ; ecx = (b+c)
|
|
mov [edi+2],ax ; output p1 = (a+b) + (b+c)
|
|
add ebx,ecx ; ebx = (a+b) + (b+c)
|
|
; pixel 2 + pixel 3
|
|
mov [edi+4],bx ; output p2 = (a+b) + (b+c)
|
|
xor eax,eax
|
|
mov al,byte ptr [esi+4] ; get p4, eax = c
|
|
; pixel 3 + pixel 4
|
|
xor ebx,ebx
|
|
add edx,eax ; edx = (b+c)
|
|
mov bl,byte ptr [esi+5] ; get p5, ebx = c
|
|
add ecx,edx ; ecx = (a+b) + (b+c)
|
|
add eax,ebx ; eax = (b+c)
|
|
mov [edi+6],cx ; output p3 = (a+b) + (b+c)
|
|
add edx,eax ; edx = (a+b) + (b+c)
|
|
; pixel 4 + pixel 5
|
|
mov [edi+8],dx ; output p4 = (a+b) + (b+c)
|
|
xor ecx,ecx
|
|
mov cl,byte ptr [esi+6] ; get p6, ecx = c
|
|
; pixel 5 + pixel 6
|
|
xor edx,edx
|
|
add ebx,ecx ; ebx = (a+b)
|
|
mov dl,byte ptr [esi+7] ; get p7, edx = c
|
|
add eax,ebx ; eax = (a+b) + (b+c)
|
|
add ecx,edx ; ecx = (b+c)
|
|
shl edx,2 ; p7<<2
|
|
add ebx,ecx ; ebx = (a+b) + (b+c)
|
|
mov [edi+10],ax ; output p5 = (a+b) + (b+c)
|
|
; pixel 6 + pixel 7
|
|
xor eax,eax ; for next iteration
|
|
mov [edi+12],bx ; output p6 = (a+b) + (b+c)
|
|
mov ecx,loop_count
|
|
mov [edi+14],dx ; output p7 = c<<2
|
|
mov ebx,pitch
|
|
add edi,16
|
|
add esi,ebx ; inc input ptr
|
|
dec ecx
|
|
mov loop_count,ecx
|
|
jnz do_row
|
|
|
|
; filter 8x8 block vertically
|
|
; input is 16-bit from temporary storage, output is 8-bit
|
|
|
|
lea esi,filt_temp
|
|
mov edi,out8x8
|
|
|
|
mov loop_count,4 ; loop counter
|
|
|
|
;
|
|
;
|
|
ALIGN 4
|
|
row0:
|
|
mov eax,[esi] ; eax = a
|
|
; row0 + row1
|
|
mov ebx,[esi+16] ; get b
|
|
mov edx,eax ; copy a
|
|
add eax,ebx ; eax = (a+b)
|
|
add edx,00020002h ; round result
|
|
mov ecx,[esi+32] ; get c
|
|
shr edx,2 ; divide by 4
|
|
add ebx,ecx ; ebx = (b+c)
|
|
and edx,00ff00ffh ; convert back to 8-bit
|
|
add eax,ebx ; eax = (a+b) + (b+c)
|
|
mov [edi],dl ; output a for column 0
|
|
add eax,00080008h ; round
|
|
shr edx,16
|
|
shr eax,4
|
|
mov [edi+1],dl ; output a for column 1
|
|
; row1 + row2
|
|
mov edx,[esi+48] ; get c
|
|
and eax,00ff00ffh
|
|
add ecx,edx ; ecx = (b+c)
|
|
mov [edi+8],al ; output b for column 0
|
|
shr eax,16
|
|
add ebx,ecx ; ebx = (a+b) + (b+c)
|
|
mov [edi+9],al ; output b for column 1
|
|
add ebx,00080008h ; round
|
|
shr ebx,4
|
|
; row2 + row3
|
|
mov eax,[esi+64] ; get c
|
|
and ebx,00ff00ffh
|
|
add edx,eax ; edx = (b+c)
|
|
mov [edi+16],bl ; output c for column 0
|
|
add ecx,edx ; ecx = (a+b) + (b+c)
|
|
shr ebx,16
|
|
add ecx,00080008h ; round
|
|
shr ecx,4
|
|
mov [edi+17],bl ; output c for column 1
|
|
and ecx,00ff00ffh
|
|
; row3 + row4
|
|
mov ebx,[esi+80] ; get c
|
|
mov [edi+24],cl ; output c
|
|
add eax,ebx ; eax = (b+c)
|
|
shr ecx,16
|
|
add edx,eax ; edx = (a+b) + (b+c)
|
|
mov [edi+25],cl ; output c
|
|
add edx,00080008h ; round
|
|
shr edx,4
|
|
; row4 + row5
|
|
mov ecx,[esi+96] ; get c
|
|
and edx,00ff00ffh
|
|
add ebx,ecx ; ebx = (b+c)
|
|
mov [edi+32],dl ; output c
|
|
add eax,ebx ; eax = (a+b) + (b+c)
|
|
shr edx,16
|
|
add eax,00080008h ; round
|
|
shr eax,4
|
|
mov [edi+33],dl ; output c
|
|
and eax,00ff00ffh
|
|
; row5 + row6
|
|
mov edx,[esi+112] ; get c
|
|
mov [edi+40],al ; output c
|
|
add ecx,edx ; ecx = (b+c)
|
|
shr eax,16
|
|
; row6 + row7
|
|
add edx,00020002h ; round result
|
|
shr edx,2 ; divide by 4
|
|
mov [edi+41],al ; output c
|
|
add ebx,ecx ; ebx = (a+b) + (b+c)
|
|
and edx,00ff00ffh ; convert back to 8-bit
|
|
add ebx,00080008h ; round
|
|
mov [edi+56],dl ; output c
|
|
shr ebx,4
|
|
mov ecx,loop_count
|
|
shr edx,16
|
|
and ebx,00ff00ffh
|
|
mov [edi+48],bl ; output c
|
|
mov [edi+57],dl ; output c
|
|
shr ebx,16
|
|
add esi,4 ; inc input ptr
|
|
mov [edi+49],bl ; output c
|
|
add edi,2
|
|
dec ecx
|
|
mov loop_count,ecx
|
|
jnz row0
|
|
|
|
ret
|
|
LoopFilter EndP
|
|
|
|
IACODE2 ENDS
|
|
|
|
END
|