|
|
/* *************************************************************************
** INTEL Corporation Proprietary Information ** ** This listing is supplied under the terms of a license ** agreement with INTEL Corporation and may not be copied ** nor disclosed except in accordance with the terms of ** that agreement. ** ** Copyright (c) 1995 Intel Corporation. ** Copyright (c) 1996 Intel Corporation. ** All Rights Reserved. ** ** ************************************************************************* */
#include "precomp.h"
/* map of coded and not-coded blocks */ extern char coded_map[][22+1]; /* QP map */ extern char QP_map[][22];
#if defined(H263P) // { if defined(H263P)
/* table for de-blocking filter */ /* currently requires 2048 bytes */ signed char dxQP[64][32];
#if 0 // { 0
static void HorizEdgeFilter(unsigned char *rec, int width, int height, int pitch, int shift) {
int i, j, k; int d, delta; int mbc; int mod_div = 1 << shift; unsigned char *r = rec + (pitch << 3); unsigned char *r_2 = r - (pitch << 1); unsigned char *r_1 = r - pitch; unsigned char *r1 = r + pitch; char *pcoded_row0 = &coded_map[8>>shift][0]; char *pcoded_row1 = pcoded_row0 + sizeof(coded_map[0]); char *pQP_map = &QP_map[0][0];
for (j = 8; j < height; ) { for (i = 0; i < width; i += 8) { mbc = i >> shift; if (pcoded_row0[mbc+1] || pcoded_row1[mbc+1]) { for (k = i; k < i+8; k++) { d = (r_2[k]+(r_2[k]<<1)-(r_1[k]<<3)+(r[k]<<3)-(r1[k]+(r1[k]<<1)))>>4; if (d && (d >= -32) && (d < 32)) { delta = dxQP[d+32][pQP_map[mbc]]; r[k] = ClampTbl[r[k]-delta+CLAMP_BIAS]; r_1[k] = ClampTbl[r_1[k]+delta+CLAMP_BIAS]; } } } } r_2 += (pitch<<3); r_1 += (pitch<<3); r += (pitch<<3); r1 += (pitch<<3); if (0 == ((j+=8)%mod_div)) { pcoded_row0 += sizeof(coded_map[0]); pcoded_row1 += sizeof(coded_map[0]); pQP_map += sizeof(QP_map[0]); } } }
static void VertEdgeFilter(unsigned char *rec, int width, int height, int pitch, int shift) {
unsigned char *r = rec; int i, j, k; int mbc; int d, delta; int mod_div = 1 << shift; char *pcoded_row1 = &coded_map[1][0]; char *pQP_map = &QP_map[0][0];
for (j = 0; j < height; ) { for (i = 8; i < width; i += 8) { mbc = i >> shift; if (pcoded_row1[mbc] || pcoded_row1[mbc+1]) { for (k = 0; k < 8; k++) { d = (r[i-2]+(r[i-2]<<1)-(r[i-1]<<3)+(r[i]<<3)-(r[i+1]+(r[i+1]<<1)))>>4; if (d && (d > -32) && (d < 32)) { delta = dxQP[d+32][pQP_map[mbc]]; r[i] = ClampTbl[r[i]-delta+CLAMP_BIAS]; r[i-1] = ClampTbl[r[i-1]+delta+CLAMP_BIAS]; } r += pitch; } r -= pitch<<3; } } r += pitch<<3; if (0 == ((j+=8)%mod_div)) { pcoded_row1 += sizeof(coded_map[0]); pQP_map += sizeof(QP_map[0]); } } }
#else // }{ 0
__declspec(naked) static void HorizEdgeFilter(unsigned char *rec, int width, int height, int pitch, int shift) {
// Permanent (callee-save) registers - ebx, esi, edi, ebp
// Temporary (caller-save) registers - eax, ecx, edx
//
// Stack frame layout
// | shift | + 68
// | pitch | + 64
// | height | + 60
// | width | + 56
// | rec | + 52
// -----------------------------
// | return addr | + 48
// | saved ebp | + 44
// | saved ebx | + 40
// | saved esi | + 36
// | saved edi | + 32
#define LOCALSIZE 32
#define SHIFT 68
#define PITCH_PARM 64
#define HEIGHT 60
#define WIDTH 56
#define REC 52
#define LOOP_I 28
#define LOOP_J 24
#define LOOP_K 20
#define PCODED_ROW0 16
#define PCODED_ROW1 12
#define PQP_MAP 8
#define MBC 4
#define LOOP_K_LIMIT 0
_asm {
push ebp push ebx push esi push edi sub esp, LOCALSIZE
// r = rec + (pitch << 3)
// r_2 = r - (pitch << 1)
// r_1 = r - pitch
// r1 = r + pitch
// assign(esi, r_2)
// assign(edi, r1)
// assign(ebp, pitch)
mov ebp, [esp + PITCH_PARM] mov esi, [esp + REC] lea esi, [esi + ebp*4] lea esi, [esi + ebp*2] lea edi, [esi + ebp*2] lea edi, [edi + ebp] // pcoded_row0 = &coded_map[8>>shift][0]
// pcoded_row1 = pcoded_row0 + sizeof(coded_map[0])
// pQP_map = &QP_map[0][0]
mov eax, 8 mov ecx, [esp + SHIFT] shr eax, cl mov ebx, TYPE coded_map[0] imul eax, ebx lea eax, [coded_map + eax] mov [esp + PCODED_ROW0], eax add eax, ebx mov [esp + PCODED_ROW1], eax lea eax, [QP_map] mov [esp + PQP_MAP], eax
// for (j = 8; j < height; )
mov DWORD PTR [esp + LOOP_J], 8 L1: // for (i = 0; i < width; i += 8)
mov DWORD PTR [esp + LOOP_I], 0 L2: // mbc = i >> shift
// if (pcoded_row0[mbc+1] || pcoded_row1[mbc+1])
mov eax, [esp + LOOP_I] mov ecx, [esp + SHIFT] shr eax, cl mov ebx, [esp + PCODED_ROW0] mov [esp + MBC], eax mov cl, [ebx+eax+1] mov ebx, [esp + PCODED_ROW1] test ecx, ecx jnz L3 mov cl, [ebx+eax+1] test ecx, ecx jz L4 L3: // for (k = i; k < i+8; k++)
mov eax, [esp + LOOP_I] xor ebx, ebx add eax, 8 // read r_1[k]
mov bl, [esi+ebp] mov [esp + LOOP_K_LIMIT], eax xor eax, eax L5: // d = (r_2[k]+(r_2[k]<<1)-(r_1[k]<<3)+(r[k]<<3)-(r1[k]+(r1[k]<<1)))>>4
// read r_2[k]
mov al, [esi] xor ecx, ecx // read r[k]
mov cl, [esi+ebp*2] xor edx, edx // read r1[k] and compute r_2[k]*3
mov dl, [edi] lea eax,[eax+eax*2] // compute r_1[k]*8 and r[k]*8
lea ebx, [ebx*8] lea ecx, [ecx*8] // compute r1[k]*3 and (r_2[k]*3 - r_1[k]*8)
lea edx, [edx+edx*2] sub eax, ebx // compute (r_2[k]*3 - r_1[k]*8 + r[k]*8)
add eax, ecx xor ecx, ecx // compute (r_2[k]*3 - r_1[k]*8 + r[k]*8 - r1[k]*3)
sub eax, edx xor edx, edx // compute (r_2[k]*3 - r_1[k]*8 + r[k]*8 - r1[k]*3) >> 4
sar eax, 4 mov ebx, [esp + PQP_MAP] // if (d && (d >= -32) && (d < 32))
add ebx, [esp + MBC] test eax, eax jz L6 cmp eax, -32 jl L6 cmp eax, 32 jge L6 // delta = dxQP[d+32][pQP_map[mbc]]
// r[k] = ClampTbl[r[k]-delta+CLAMP_BIAS]
// r_1[k] = ClampTbl[r_1[k]+delta+CLAMP_BIAS]
lea eax, [eax + 32] mov cl, [ebx] shl eax, 5 mov dl, [esi+ebp] mov al, dxQP[eax+ecx] mov cl, [esi+ebp*2] movsx eax, al sub ecx, eax mov dl, ClampTbl[edx + eax + CLAMP_BIAS] mov cl, ClampTbl[ecx + CLAMP_BIAS] mov [esi+ebp], dl mov [esi+ebp*2], cl nop L6: mov edx, [esp + LOOP_I] inc esi inc edx inc edi xor eax, eax xor ebx, ebx mov [esp + LOOP_I], edx mov bl, [esi+ebp] cmp edx, [esp + LOOP_K_LIMIT] jl L5 jmp L4a L4: mov eax, [esp + LOOP_I] lea esi, [esi+8] add eax, 8 lea edi, [edi+8] mov [esp + LOOP_I],eax nop L4a: mov eax, [esp + LOOP_I] cmp eax, [esp + WIDTH] jl L2 // r_2 += (pitch<<3)
// r_1 += (pitch<<3)
// r += (pitch<<3)
// r1 += (pitch<<3)
mov eax, ebp shl eax, 3 sub eax, [esp + WIDTH] lea esi, [esi + eax] lea edi, [edi + eax] // if (0 == ((j+=8)%mod_div))
mov eax, [esp + LOOP_J] add eax, 8 mov [esp + LOOP_J], eax mov ebx, eax mov ecx, [esp + SHIFT] shr eax, cl shl eax, cl sub ebx, eax jnz L7 // pcoded_row0 += sizeof(coded_map[0])
// pcoded_row1 += sizeof(coded_map[0])
// pQP_map += sizeof(QP_map[0])
mov eax, [esp + PCODED_ROW0] mov ebx, [esp + PCODED_ROW1] mov ecx, [esp + PQP_MAP] add eax, TYPE coded_map[0] add ebx, TYPE coded_map[0] add ecx, TYPE QP_map[0] mov [esp + PCODED_ROW0], eax mov [esp + PCODED_ROW1], ebx mov [esp + PQP_MAP], ecx L7: mov eax, [esp + LOOP_J] cmp eax, [esp + HEIGHT] jl L1
add esp, LOCALSIZE pop edi pop esi pop ebx pop ebp ret
} }
#undef LOCALSIZE
#undef SHIFT
#undef PITCH_PARM
#undef HEIGHT
#undef WIDTH
#undef REC
#undef LOOP_I
#undef LOOP_J
#undef LOOP_K
#undef PCODED_ROW0
#undef PCODED_ROW1
#undef PQP_MAP
#undef MBC
#undef LOOP_K_LIMIT
__declspec(naked) static void VertEdgeFilter(unsigned char *rec, int width, int height, int pitch, int shift) {
// Permanent (callee-save) registers - ebx, esi, edi, ebp
// Temporary (caller-save) registers - eax, ecx, edx
//
// Stack frame layout
// | shift | + 56
// | pitch | + 52
// | height | + 48
// | width | + 44
// | rec | + 40
// -----------------------------
// | return addr | + 36
// | saved ebp | + 32
// | saved ebx | + 28
// | saved esi | + 24
// | saved edi | + 20
#define LOCALSIZE 20
#define SHIFT 56
#define PITCH_PARM 52
#define HEIGHT 48
#define WIDTH 44
#define REC 40
#define LOOP_K 16
#define LOOP_J 12
#define PCODED_ROW1 8
#define PQP_MAP 4
#define MBC 0
_asm {
push ebp push ebx push esi push edi sub esp, LOCALSIZE
// assign(esi, r)
mov esi, [esp + REC] // assign(edi, pitch)
mov edi, [esp + PITCH_PARM] // pcoded_row1 = &coded_map[1][0]
mov eax, TYPE coded_map[0] lea eax, [coded_map + eax] mov [esp + PCODED_ROW1], eax // pQP_map = &QP_map[0][0]
lea eax, [QP_map] mov [esp + PQP_MAP], eax // for (j = 0; j < height; )
xor eax, eax mov [esp + LOOP_J], eax L1: // for (i = 8; i < width; i += 8)
// assign(ebp,i)
mov ebp, 8 // mbc = i >> shift
L2: mov eax, ebp mov ecx, [esp + SHIFT] shr eax, cl mov [esp + MBC], eax // if (pcoded_row1[mbc] || pcoded_row1[mbc+1])
xor ecx, ecx mov ebx, [esp + PCODED_ROW1] mov cl, [ebx+eax] test ecx, ecx jnz L3 mov cl, [ebx+eax+1] test ecx, ecx jz L4 L3: // for (k = 0; k < 8; k++)
mov DWORD PTR [esp + LOOP_K], 8 xor eax, eax xor ebx, ebx xor ecx, ecx xor edx, edx L5: // d = (r[i-2]+(r[i-2]<<1)-(r[i-1]<<3)+(r[i]<<3)-(r[i+1]+(r[i+1]<<1)))>>4
// read r[i-2] and r[i]
mov al, [esi+ebp-2] mov bl, [esi+ebp] // read r[i-1] and r[i+1]
mov cl, [esi+ebp-1] mov dl, [esi+ebp+1] // compute r[i-2]*3 and r[i]*8
lea eax, [eax+eax*2] lea ebx, [ebx*8] // compute r[i-1]*8 and r[i+1]*3
lea ecx, [ecx*8] lea edx, [edx+edx*2] // compute (r[i-2]*3 + r[i]*8) and (r[i-1]*8 + r[i+1]*3)
add eax, ebx add ecx, edx // compute (r[i-2]*3 - r[i-1]*8 + r[i]*8 - r[i+1]*3)
sub eax, ecx xor ecx, ecx // compute ((r[i-2]*3 - r[i-1]*8 + r[i]*8 - r[i+1]*3) >> 4)
sar eax, 4 xor edx, edx // if (d && (d >= -32) && (d < 32))
test eax, eax jz L6 cmp eax, -32 jl L6 cmp eax, 32 jge L6 // delta = dxQP[d+32][pQP_map[mbc]]
// r[i] = ClampTbl[r[i]-delta+CLAMP_BIAS]
// r[i-1] = ClampTbl[r[i-1]+delta+CLAMP_BIAS]
lea eax, [eax + 32] mov ebx, [esp + PQP_MAP] shl eax, 5 add ebx, [esp + MBC] mov cl, [ebx] xor ebx, ebx mov al, dxQP[eax+ecx] mov bl, [esi+ebp] movsx eax, al sub ebx, eax mov cl, [esi+ebp-1] mov bl, ClampTbl[ebx + CLAMP_BIAS] mov cl, ClampTbl[ecx + eax + CLAMP_BIAS] mov [esi+ebp], bl mov [esi+ebp-1], cl L6: add esi, edi mov eax, [esp + LOOP_K] xor ebx, ebx dec eax mov [esp + LOOP_K], eax jnz L5 // r -= (pitch<<3)
mov eax, edi shl eax, 3 sub esi, eax L4: add ebp, 8 cmp ebp, [esp + WIDTH] jl L2 // r += (pitch<<3)
mov eax, edi shl eax, 3 lea esi, [esi + eax] // if (0 == ((j+=8)%mod_div))
mov eax, [esp + LOOP_J] add eax, 8 mov [esp + LOOP_J], eax mov ebx, eax mov ecx, [esp + SHIFT] shr eax, cl shl eax, cl sub ebx, eax jnz L7 // pcoded_row1 += sizeof(coded_map[0])
// pQP_map += sizeof(QP_map[0])
mov eax, [esp + PCODED_ROW1] mov ebx, [esp + PQP_MAP] add eax, TYPE coded_map[0] add ebx, TYPE QP_map[0] mov [esp + PCODED_ROW1], eax mov [esp + PQP_MAP], ebx L7: mov eax, [esp + LOOP_J] cmp eax, [esp + HEIGHT] jl L1
add esp, LOCALSIZE pop edi pop esi pop ebx pop ebp ret
} }
#undef LOCALSIZE
#undef SHIFT
#undef PITCH_PARM
#undef HEIGHT
#undef WIDTH
#undef REC
#undef LOOP_K
#undef LOOP_J
#undef PCODED_ROW1
#undef PQP_MAP
#undef MBC
#endif // } 0
#define abs(x) (((x)>0)?(x):(-(x)))
#define sign(x) (((x)<0)?(-1):(1))
void InitEdgeFilterTab() { int d,QP;
for (d = 0; d < 64; d++) { // -32 <= d < 32
for (QP = 0; QP < 32; QP++) { // 0 <= QP < 32
dxQP[d][QP] = sign(d-32)*(max(0,(abs(d-32)-max(0,((2*abs(d-32))-QP))))); } } }
/**********************************************************************
* * Name: EdgeFilter * Description: performs deblocking filtering on * reconstructed frames * * Input: pointers to reconstructed frame and difference * image * Returns: * Side effects: * * Date: 951129 Author: Gisle.Bjontegaard@fou.telenor.no * Karl.Lillevold@nta.no * Modified for annex J in H.263+: 961120 Karl O. Lillevold * ***********************************************************************/ // C version of block edge filter functions
// takes about 3 ms for QCIF and 12 ms for CIF on a Pentium 120.
void EdgeFilter(unsigned char *lum, unsigned char *Cb, unsigned char *Cr, int width, int height, int pitch) {
/* Luma */ HorizEdgeFilter(lum, width, height, pitch, 4); VertEdgeFilter (lum, width, height, pitch, 4);
/* Chroma */ HorizEdgeFilter(Cb, width>>1, height>>1, pitch, 3); VertEdgeFilter (Cb, width>>1, height>>1, pitch, 3); HorizEdgeFilter(Cr, width>>1, height>>1, pitch, 3); VertEdgeFilter (Cr, width>>1, height>>1, pitch, 3);
return; }
#else // Karl's original version }{
/* currently requires 11232 bytes */ signed char dtab[352*32];
/***********************************************************************/ static void HorizEdgeFilter(unsigned char *rec, int width, int height, int pitch, int chr) { int i,j,k; int delta; int mbc, mbr, do_filter; unsigned char *r_2, *r_1, *r, *r1; signed char *deltatab;
/* horizontal edges */ r = rec + 8*pitch; r_2 = r - 2*pitch; r_1 = r - pitch; r1 = r + pitch;
for (j = 8; j < height; j += 8) { for (i = 0; i < width; i += 8) {
if (!chr) { mbr = (j >> 4); mbc = (i >> 4); } else { mbr = (j >> 3); mbc = (i >> 3); }
deltatab = dtab + 176 + 351 * (QP_map[mbr][mbc] - 1);
do_filter = coded_map[mbr+1][mbc+1] || coded_map[mbr][mbc+1];
if (do_filter) { for (k = i; k < i+8; k++) { delta = (int)deltatab[ (( (int)(*(r_2 + k) * 3) - (int)(*(r_1 + k) * 8) + (int)(*(r + k) * 8) - (int)(*(r1 + k) * 3)) >>4)]; *(r + k) = ClampTbl[ (int)(*(r + k)) - delta + CLAMP_BIAS]; *(r_1 + k) = ClampTbl[ (int)(*(r_1 + k)) + delta + CLAMP_BIAS];
} } } r += (pitch<<3); r1 += (pitch<<3); r_1 += (pitch<<3); r_2 += (pitch<<3); } return; }
static void VertEdgeFilter(unsigned char *rec, int width, int height, int pitch, int chr) { int i,j,k; int delta; int mbc, mbr; int do_filter; signed char *deltatab; unsigned char *r;
/* vertical edges */ for (i = 8; i < width; i += 8) { r = rec; for (j = 0; j < height; j +=8) { if (!chr) { mbr = (j >> 4); mbc = (i >> 4); } else { mbr = (j >> 3); mbc = (i >> 3); } deltatab = dtab + 176 + 351 * (QP_map[mbr][mbc] - 1);
do_filter = coded_map[mbr+1][mbc+1] || coded_map[mbr+1][mbc];
if (do_filter) { for (k = 0; k < 8; k++) { delta = (int)deltatab[(( (int)(*(r + i-2 ) * 3) - (int)(*(r + i-1 ) * 8) + (int)(*(r + i ) * 8) - (int)(*(r + i+1 ) * 3) ) >>4)];
*(r + i ) = ClampTbl[ (int)(*(r + i )) - delta + CLAMP_BIAS]; *(r + i-1 ) = ClampTbl[ (int)(*(r + i-1)) + delta + CLAMP_BIAS]; r += pitch; } } else { r += (pitch<<3); } } } return; }
/**********************************************************************
* * Name: EdgeFilter * Description: performs deblocking filtering on * reconstructed frames * * Input: pointers to reconstructed frame and difference * image * Returns: * Side effects: * * Date: 951129 Author: Gisle.Bjontegaard@fou.telenor.no * Karl.Lillevold@nta.no * Modified for annex J in H.263+: 961120 Karl O. Lillevold * ***********************************************************************/
void EdgeFilter(unsigned char *lum, unsigned char *Cb, unsigned char *Cr, int width, int height, int pitch) {
/* Luma */ HorizEdgeFilter(lum, width, height, pitch, 0); VertEdgeFilter (lum, width, height, pitch, 0);
/* Chroma */ HorizEdgeFilter(Cb, width>>1, height>>1, pitch, 1); VertEdgeFilter (Cb, width>>1, height>>1, pitch, 1); HorizEdgeFilter(Cr, width>>1, height>>1, pitch, 1); VertEdgeFilter (Cr, width>>1, height>>1, pitch, 1);
return; }
#define sign(a) ((a) < 0 ? -1 : 1)
void InitEdgeFilterTab() { int i,QP; for (QP = 1; QP <= 31; QP++) { for (i = -176; i <= 175; i++) { dtab[i+176 +(QP-1)*351] = sign(i) * (max(0,abs(i)-max(0,2*abs(i) - QP))); } } }
#endif // } if defined(H263P)
|