|
|
/* File: sv_h263_dct.c */ /*****************************************************************************
** Copyright (c) Digital Equipment Corporation, 1995, 1997 ** ** ** ** All Rights Reserved. Unpublished rights reserved under the copyright ** ** laws of the United States. ** ** ** ** The software contained on this media is proprietary to and embodies ** ** the confidential technology of Digital Equipment Corporation. ** ** Possession, use, duplication or dissemination of the software and ** ** media is authorized only pursuant to a valid written license from ** ** Digital Equipment Corporation. ** ** ** ** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. ** ** Government is subject to restrictions as set forth in Subparagraph ** ** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. ** ******************************************************************************/
#include <math.h>
#include "sv_h263.h"
#include "proto.h"
#define F (float)
#define S (short)
static const unsigned int tdzz[64] = { 0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, 3, 8, 12, 17, 25, 30, 41, 43, 9, 11, 18, 24, 31, 40, 44, 53, 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63};
static const unsigned int ttdzz[64] = { 0, 2, 3, 9, 10, 20, 21, 35, 1, 4, 8, 11, 19, 22, 34, 36, 5, 7, 12, 18, 23, 33, 37, 48, 6, 13, 17, 24, 32, 38, 47, 49, 14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58, 27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63};
/**********************************************************************
* * Name: Dct * Description: Does dct on an 8x8 block, does zigzag-scanning of * coefficients * * Input: 64 pixels in a 1D array * Returns: 64 coefficients in a 1D array * Side effects: * **********************************************************************/
/*
** Name: ScFDCT8x8s_C ** Purpose: 2-d Forward DCT (C version) for (8x8) blocks ** ** update: Wei-Lien Hsu, store in ZZ order. */
static const float W0=(float).7071068, W1=(float).4903926, W2=(float).4619398, W3=(float).4157348, W4=(float).3535534, W5=(float).2777851, W6=(float).1913417, W7=(float).0975452;
int sv_H263DCT( short *block, short *coeff, int QP, int Mode) { int i; register float b0, b1, b2, b3, b4, b5, b6, b7, tmp, t0, t1, t2; float tmpbuf[64]; const unsigned int *ptdzz=ttdzz;
register short *blockptr, *coeffptr ; register float *dptr;
#if 1
short val, halfQ;
/* check significant signals in Inter-frame */ if(!(Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) { halfQ = QP >> 1; blockptr = block; for (i=0; i < 64; i++) { val = *blockptr++; if((val > halfQ) | (val < -halfQ)) break; } if(i==64){ memset(coeff,0,128) ; return 0; } } #endif
/* Horizontal transform */ dptr = tmpbuf; blockptr = block; for (i = 0; i < 8; i++) { t0 = *blockptr++; t1 = *blockptr++; t2 = *blockptr++; tmp= *blockptr++; b4 = *blockptr++; b5 = *blockptr++; b6 = *blockptr++; b7 = *blockptr++;
b0 = t0 + b7; b7 = t0 - b7;
b1 = t1 + b6; b6 = t1 - b6;
b2 = t2 + b5; b5 = t2 - b5;
b3 = tmp + b4; b4 = tmp - b4;
t0 = b0 + b3; b3 = b0 - b3;
t1 = b1 + b2; b2 = b1 - b2;
tmp = b5; b5 = (b6 - b5) * W0; b6 = (b6 + tmp) * W0;
t2 = b4 + b5; b5 = b4 - b5;
tmp = b7 + b6; b6 = b7 - b6;
*dptr++ = (t0 + t1) * W4; *dptr++ = t2 * W7 + tmp * W1; *dptr++ = b2 * W6 + b3 * W2; *dptr++ = b6 * W3 - b5 * W5; *dptr++ = (t0 - t1) * W4; *dptr++ = b5 * W3 + b6 * W5; *dptr++ = b3 * W6 - b2 * W2; *dptr++ = tmp * W7 - t2 * W1; }
/* Vertical transform */ dptr = tmpbuf; coeffptr = coeff; for (i = 0; i < 8; i++, dptr++) { b0 = *dptr; tmp = *(dptr + 56) ; b7 = b0 - tmp ; b0 += tmp;
b1 = *(dptr + 8); tmp = *(dptr + 48) ; b6 = b1 - tmp; b1 += tmp;
b2 = *(dptr + 16); tmp = *(dptr + 40) ; b5 = b2 - tmp; b2 += tmp;
b3 = *(dptr + 24); tmp = *(dptr + 32) ; b4 = b3 - tmp; b3 += tmp;
t0 = b0 + b3; b3 = b0 - b3;
t1 = b1 + b2; b2 = b1 - b2;
tmp = b5; b5 = (b6 - b5) * W0; b6 = (b6 + tmp) * W0;
t2 = b4 + b5; b5 = b4 - b5;
tmp = b7 + b6; b6 = b7 - b6;
*(coeffptr + *ptdzz++) = S ((t0 + t1) * W4); *(coeffptr + *ptdzz++) = S (t2 * W7 + tmp * W1); *(coeffptr + *ptdzz++) = S (b2 * W6 + b3 * W2); *(coeffptr + *ptdzz++) = S (b6 * W3 - b5 * W5); *(coeffptr + *ptdzz++) = S ((t0 - t1) * W4); *(coeffptr + *ptdzz++) = S (b5 * W3 + b6 * W5); *(coeffptr + *ptdzz++) = S (b3 * W6 - b2 * W2); *(coeffptr + *ptdzz++) = S (tmp * W7 - t2 * W1); }
return 1; }
/**********************************************************************
* * Description: Does zone-filter on an 8x8 block-dct, * does zigzag-scanning of coefficients * * Input: 64 pixels in a 1D array * Returns: 64 coefficients in a 1D array * Side effects: * **********************************************************************/
int sv_H263ZoneDCT( short *block, short *coeff, int QP, int Mode) { int i; register float b0, b1, b2, b3, b4, b5, b6, b7, tmp, t0, t1, t2; float tmpbuf[64]; const unsigned int *ptdzz=ttdzz;
register short *blockptr, *coeffptr ; register float *dptr;
#if 1
short val, halfQ;
/* check significant signals in Inter-frame */ if(!(Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) { halfQ = QP >> 1; blockptr = block; for (i=0; i < 64; i++) { val = *blockptr++; if((val > halfQ) | (val < -halfQ)) break; } if(i==64){ memset(coeff,0,128) ; return 0; } } #endif
/* Horizontal transform */ dptr = tmpbuf; blockptr = block; for (i = 0; i < 8; i++) { t0 = *blockptr++; t1 = *blockptr++; t2 = *blockptr++; tmp= *blockptr++; b4 = *blockptr++; b5 = *blockptr++; b6 = *blockptr++; b7 = *blockptr++;
b0 = t0 + b7; b7 = t0 - b7;
b1 = t1 + b6; b6 = t1 - b6;
b2 = t2 + b5; b5 = t2 - b5;
b3 = tmp + b4; b4 = tmp - b4;
t0 = b0 + b3; b3 = b0 - b3;
t1 = b1 + b2; b2 = b1 - b2;
tmp = b5; b5 = (b6 - b5) * W0; b6 = (b6 + tmp) * W0;
t2 = b4 + b5; b5 = b4 - b5;
tmp = b7 + b6; b6 = b7 - b6;
*dptr++ = (t0 + t1) * W4; *dptr++ = t2 * W7 + tmp * W1; *dptr++ = b2 * W6 + b3 * W2; *dptr++ = b6 * W3 - b5 * W5; dptr+= 4; }
/* Vertical transform */ dptr = tmpbuf; coeffptr = coeff;
memset(coeff,0,128) ;
for (i = 0; i < 4; i++, dptr++) { b0 = *dptr; tmp = *(dptr + 56) ; b7 = b0 - tmp ; b0 += tmp;
b1 = *(dptr + 8); tmp = *(dptr + 48) ; b6 = b1 - tmp; b1 += tmp;
b2 = *(dptr + 16); tmp = *(dptr + 40) ; b5 = b2 - tmp; b2 += tmp;
b3 = *(dptr + 24); tmp = *(dptr + 32) ; b4 = b3 - tmp; b3 += tmp;
t0 = b0 + b3; b3 = b0 - b3;
t1 = b1 + b2; b2 = b1 - b2;
tmp = b5; b5 = (b6 - b5) * W0; b6 = (b6 + tmp) * W0;
t2 = b4 + b5; b5 = b4 - b5;
tmp = b7 + b6; b6 = b7 - b6;
*(coeffptr + *ptdzz++) = S ((t0 + t1) * W4); *(coeffptr + *ptdzz++) = S (t2 * W7 + tmp * W1); *(coeffptr + *ptdzz++) = S (b2 * W6 + b3 * W2); *(coeffptr + *ptdzz++) = S (b6 * W3 - b5 * W5);
ptdzz+=4; }
return 1; }
/**********************************************************************
* * Name: idct * Description: inverse dct on 64 coefficients * * Input: 64 coefficients, block for 64 pixels * Returns: 0 * Side effects: * **********************************************************************/
/*
** Function: ScIDCT8x8s ** Note: This scheme uses the direct transposition of the forward ** DCT. This may not be the preferred way in Hardware ** Implementations ** #define W1 2841 */ /* 2048*sqrt(2)*cos(1*pi/16)
** #define W2 2676 */ /* 2048*sqrt(2)*cos(2*pi/16)
** #define W5 1609 */ /* 2048*sqrt(2)*cos(5*pi/16)
*/
#define WW3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
#define WW6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
#define WW7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
#define AW26 3784
#define DW26 1568
#define AW17 3406
#define DW17 2276
#define AW35 4017
#define DW35 799
#define IDCTSHIFTR 8
#define IDCTSHIFTC 14
#ifndef USE_C
void sv_H263FillX0_S(short *stream, short wd); #endif
int sv_H263IDCT(short *inbuf, short *outbuf, int QP, int Mode, int outbuf_size) { int i; const unsigned int *ptdzz=tdzz; register int tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8; register short *inblk, *outblk; register short *tmpblk; short tmpbuf[64]; int Q2,QP_1; int p1, p2, p3, p4, p5, p6, p7;
/* double quantization step */ Q2 = QP << 1; QP_1 = QP - 1;
inblk = inbuf; tmpblk = tmpbuf;
if((QP %2) == 0){ for (i=0; i<8; i++) { /* read in ZZ order */ x0 = inblk[*ptdzz++]; x4 = inblk[*ptdzz++]; x3 = inblk[*ptdzz++]; x7 = inblk[*ptdzz++]; x1 = inblk[*ptdzz++]; x6 = inblk[*ptdzz++]; x2 = inblk[*ptdzz++]; x5 = inblk[*ptdzz++];
/* dequantize DC */ if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) x0 = x0 << 3; else if(x0) x0 = (x0 > 0) ? Q2*x0+QP-1 : Q2*x0-QP+1 ;
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) { if(!x0) memset(tmpblk, 0, 16) ; else { #ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ; #else
*tmpblk = *(tmpblk+1) = *(tmpblk+2) = *(tmpblk+3) = *(tmpblk+4) = *(tmpblk+5) = *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ; #endif
} tmpblk += 8; } else { /* dequantize AC */ if(x1) x1 = (x1 > 0) ? Q2*x1+QP_1 : Q2*x1-QP_1 ; if(x2) x2 = (x2 > 0) ? Q2*x2+QP_1 : Q2*x2-QP_1 ; if(x3) x3 = (x3 > 0) ? Q2*x3+QP_1 : Q2*x3-QP_1 ; if(x4) x4 = (x4 > 0) ? Q2*x4+QP_1 : Q2*x4-QP_1 ; if(x5) x5 = (x5 > 0) ? Q2*x5+QP_1 : Q2*x5-QP_1 ; if(x6) x6 = (x6 > 0) ? Q2*x6+QP_1 : Q2*x6-QP_1 ; if(x7) x7 = (x7 > 0) ? Q2*x7+QP_1 : Q2*x7-QP_1 ;
x1 = x1<<11;
tmp0 = x4 + x5; tmp0 = WW7*tmp0;
x0 = x0<<11; x0 = x0 + 128; x8 = x0 + x1;
tmp1 = x6 + x7; x0 = x0 - x1; tmp1 = WW3*tmp1; tmp2 = AW26*x2; tmp3 = DW26*x3;
x4 = DW17*x4; x5 = AW17*x5;
x4 = tmp0 + x4; x1 = x3 + x2; x5 = tmp0 - x5; x1 = WW6*x1; tmp0 = DW35*x6; x7 = AW35*x7; x2 = x1 - tmp2; x3 = x1 + tmp3; tmp0 = tmp1 - tmp0; x7 = tmp1 - x7; x1 = x4 + tmp0; x4 = x4 - tmp0; x6 = x5 + x7; /* F */ x5 = x5 - x7; /* F */ tmp0 = x4 + x5; tmp0 = 181*tmp0; x7 = x8 + x3; /* F */ tmp1 = x4 - x5; x8 = x8 - x3; /* F */ tmp1 = 181*tmp1; x3 = x0 + x2; /* F */ x0 = x0 - x2; /* F */ x2 = tmp0 + 128; x4 = tmp1 + 128; x2 = x2>>8; /* F */ x4 = x4>>8; /* F */
tmp0 = x7+x1; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x3+x2; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x0+x4; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x8+x6; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; tmp0 = x8-x6; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x0-x4; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x3-x2; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x7-x1; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; } } } else{ for (i=0; i<8; i++) { /* read in ZZ order */ x0 = inblk[*ptdzz++]; x4 = inblk[*ptdzz++]; x3 = inblk[*ptdzz++]; x7 = inblk[*ptdzz++]; x1 = inblk[*ptdzz++]; x6 = inblk[*ptdzz++]; x2 = inblk[*ptdzz++]; x5 = inblk[*ptdzz++];
/* quantize DC */ if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) x0 = x0 << 3; else if(x0) x0 = (x0 > 0) ? Q2*x0+QP : Q2*x0-QP ;
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) { if(!x0) memset(tmpblk, 0, 16) ; else { #ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ; #else
*tmpblk = *(tmpblk+1) = *(tmpblk+2) = *(tmpblk+3) = *(tmpblk+4) = *(tmpblk+5) = *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ; #endif
} tmpblk += 8; } else { /* dequantize AC */ if(x1) x1 = (x1 > 0) ? Q2*x1+QP : Q2*x1-QP ; if(x2) x2 = (x2 > 0) ? Q2*x2+QP : Q2*x2-QP ; if(x3) x3 = (x3 > 0) ? Q2*x3+QP : Q2*x3-QP ; if(x4) x4 = (x4 > 0) ? Q2*x4+QP : Q2*x4-QP ; if(x5) x5 = (x5 > 0) ? Q2*x5+QP : Q2*x5-QP ; if(x6) x6 = (x6 > 0) ? Q2*x6+QP : Q2*x6-QP ; if(x7) x7 = (x7 > 0) ? Q2*x7+QP : Q2*x7-QP ;
x1 = x1<<11;
tmp0 = x4 + x5; tmp0 = WW7*tmp0;
x0 = x0<<11; x0 = x0 + 128; x8 = x0 + x1;
tmp1 = x6 + x7; x0 = x0 - x1; tmp1 = WW3*tmp1; tmp2 = AW26*x2; tmp3 = DW26*x3;
x4 = DW17*x4; x5 = AW17*x5;
x4 = tmp0 + x4; x1 = x3 + x2; x5 = tmp0 - x5; x1 = WW6*x1; tmp0 = DW35*x6; x7 = AW35*x7; x2 = x1 - tmp2; x3 = x1 + tmp3; tmp0 = tmp1 - tmp0; x7 = tmp1 - x7; x1 = x4 + tmp0; x4 = x4 - tmp0; x6 = x5 + x7; /* F */ x5 = x5 - x7; /* F */ tmp0 = x4 + x5; tmp0 = 181*tmp0; x7 = x8 + x3; /* F */ tmp1 = x4 - x5; x8 = x8 - x3; /* F */ tmp1 = 181*tmp1; x3 = x0 + x2; /* F */ x0 = x0 - x2; /* F */ x2 = tmp0 + 128; x4 = tmp1 + 128; x2 = x2>>8; /* F */ x4 = x4>>8; /* F */
tmp0 = x7+x1; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x3+x2; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x0+x4; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x8+x6; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; tmp0 = x8-x6; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x0-x4; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x3-x2; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x7-x1; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; } } }
/* output position */ p1 = outbuf_size; p2 = p1 + outbuf_size; p3 = p2 + outbuf_size; p4 = p3 + outbuf_size; p5 = p4 + outbuf_size; p6 = p5 + outbuf_size; p7 = p6 + outbuf_size;
tmpblk = tmpbuf; outblk = outbuf; for (i=0; i<8; i++, tmpblk++, outblk++) { /* shortcut */ x0 = tmpblk[0]; x1 = tmpblk[32]; x2 = tmpblk[48]; x3 = tmpblk[16]; x4 = tmpblk[8]; x5 = tmpblk[56]; x6 = tmpblk[40]; x7 = tmpblk[24]; if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) { tmp0=(x0+32)>>6; outblk[0]=outblk[p1]=outblk[p2]=outblk[p3]=outblk[p4]=outblk[p5]= outblk[p6]=outblk[p7]= (short)tmp0 ; } else { x1 = x1 <<8; tmp0 = x4+x5; x0 = x0<<8; tmp0 = WW7*tmp0; x0 = x0 + 8192; tmp1 = x6+x7; tmp0 = tmp0 + 4; tmp1 = WW3*tmp1; tmp1 = tmp1 + 4; x8 = x0 + x1; tmp2 = AW26*x2; x0 = x0 - x1; x1 = x3 + x2; x1 = WW6*x1; tmp3 = DW26*x3; x1 = x1 + 4; x4 = DW17*x4; x4 = tmp0 + x4; x4 = x4>>3; x5 = AW17*x5; x2 = x1 - tmp2; x3 = x1 + tmp3; x6 = DW35*x6; x2 = x2>>3; x5 = tmp0 - x5; x5 = x5>>3; x6 = tmp1 - x6; x6 = x6>>3; x7 = AW35*x7; x7 = tmp1 - x7; x3 = x3>>3; x7 = x7>>3; x1 = x4 + x6; /* F */ x4 = x4 - x6; x6 = x5 + x7; /* F */ x5 = x5 - x7; /* F */ tmp1 = x4 + x5; x7 = x8 + x3; /* F */ tmp1 = 181*tmp1; x8 = x8 - x3; /* F */ x3 = x0 + x2; /* F */ tmp2 = x4 - x5; x0 = x0 - x2; /* F */ tmp2 = 181*tmp2; x2 = tmp1+128; x4 = tmp2+128; x2 = x2>>8; /* F */ x4 = x4>>8; /* F */
/* fourth stage */ tmp0=x7+x1; tmp1=x3+x2; tmp0=tmp0>>IDCTSHIFTC; tmp2=x0+x4; tmp1=tmp1>>IDCTSHIFTC; tmp3=x8+x6; tmp2=tmp2>>IDCTSHIFTC; tmp3=tmp3>>IDCTSHIFTC;
outblk[0] = (short)tmp0; outblk[p1] = (short)tmp1; outblk[p2] = (short)tmp2; outblk[p3] = (short)tmp3;
tmp0=x8-x6; tmp1=x0-x4; tmp0=tmp0>>IDCTSHIFTC; tmp2=x3-x2; tmp1=tmp1>>IDCTSHIFTC; tmp3=x7-x1; tmp2=tmp2>>IDCTSHIFTC; tmp3=tmp3>>IDCTSHIFTC;
outblk[p4] = (short)tmp0; outblk[p5] = (short)tmp1; outblk[p6] = (short)tmp2; outblk[p7] = (short)tmp3; } }
return 0; }
/**********************************************************************
* * Description: inverse zone-dct on 64 coefficients * * Input: 64 coefficients, block for 64 pixels * Returns: 0 * Side effects: * **********************************************************************/
int sv_H263ZoneIDCT(short *inbuf, short *outbuf, int QP, int Mode, int outbuf_size) { int i; const unsigned int *ptdzz=tdzz; register int tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8; register short *inblk, *outblk; register short *tmpblk; short tmpbuf[64]; int Q2,QP_1; int p1, p2, p3, p4, p5, p6, p7;
/* double quantization step */ Q2 = QP << 1; QP_1 = QP - 1;
inblk = inbuf; tmpblk = tmpbuf;
memset(tmpblk, 0, 128) ;
if((QP %2) == 0){ for (i=0; i<4; i++) { /* read in ZZ order */ x0 = inblk[*ptdzz++]; x4 = inblk[*ptdzz++]; x3 = inblk[*ptdzz++]; x7 = inblk[*ptdzz++]; x1 = x6 = x2 = x5 = 0; ptdzz += 4;
/* dequantize DC */ if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) x0 = x0 << 3; else if(x0) x0 = (x0 > 0) ? Q2*x0+QP-1 : Q2*x0-QP+1 ;
if (!(x3 | x4 | x7)) { if(!x0) memset(tmpblk, 0, 16) ; else { #ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ; #else
*tmpblk = *(tmpblk+1) = *(tmpblk+2) = *(tmpblk+3) = *(tmpblk+4) = *(tmpblk+5) = *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ; #endif
} tmpblk += 8; } else { /* dequantize AC */ if(x3) x3 = (x3 > 0) ? Q2*x3+QP_1 : Q2*x3-QP_1 ; if(x4) x4 = (x4 > 0) ? Q2*x4+QP_1 : Q2*x4-QP_1 ; if(x7) x7 = (x7 > 0) ? Q2*x7+QP_1 : Q2*x7-QP_1 ;
tmp0 = x4; tmp0 = WW7*tmp0;
x0 = x0<<11; x0 = x0 + 128; x8 = x0;
tmp1 = WW3*x7; tmp3 = DW26*x3;
x4 = DW17*x4;
x4 = tmp0 + x4; x1 = x3; x5 = tmp0;
x7 = AW35*x7; x2 = x1; x3 = x1 + tmp3; tmp0 = tmp1; x7 = tmp1 - x7; x1 = x4 + tmp0; x4 = x4 - tmp0; x6 = x5 + x7; /* F */ x5 = x5 - x7; /* F */ tmp0 = x4 + x5; tmp0 = 181*tmp0; x7 = x8 + x3; /* F */ tmp1 = x4 - x5; x8 = x8 - x3; /* F */ tmp1 = 181*tmp1; x3 = x0 + x2; /* F */ x0 = x0 - x2; /* F */ x2 = tmp0 + 128; x4 = tmp1 + 128; x2 = x2>>8; /* F */ x4 = x4>>8; /* F */
tmp0 = x7+x1; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x3+x2; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x0+x4; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x8+x6; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; tmp0 = x8-x6; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x0-x4; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x3-x2; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x7-x1; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; } } } else{ for (i=0; i<4; i++) { /* read in ZZ order */ x0 = inblk[*ptdzz++]; x4 = inblk[*ptdzz++]; x3 = inblk[*ptdzz++]; x7 = inblk[*ptdzz++]; x1 = x6 = x2 = x5 = 0; ptdzz += 4;
/* quantize DC */ if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) x0 = x0 << 3; else if(x0) x0 = (x0 > 0) ? Q2*x0+QP : Q2*x0-QP ;
if (!(x3 | x4 | x7)) { if(!x0) memset(tmpblk, 0, 16) ; else { #ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ; #else
*tmpblk = *(tmpblk+1) = *(tmpblk+2) = *(tmpblk+3) = *(tmpblk+4) = *(tmpblk+5) = *(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ; #endif
} tmpblk += 8; } else { /* dequantize AC */ if(x3) x3 = (x3 > 0) ? Q2*x3+QP : Q2*x3-QP ; if(x4) x4 = (x4 > 0) ? Q2*x4+QP : Q2*x4-QP ; if(x7) x7 = (x7 > 0) ? Q2*x7+QP : Q2*x7-QP ;
tmp0 = x4; tmp0 = WW7*tmp0;
x0 = x0<<11; x0 = x0 + 128; x8 = x0;
tmp1 = WW3*x7; tmp3 = DW26*x3;
x4 = DW17*x4;
x4 = tmp0 + x4; x1 = x3; x5 = tmp0; x1 = WW6*x1; tmp0 = 0; x7 = AW35*x7; x2 = x1 - tmp2; x3 = x1 + tmp3; tmp0 = tmp1; x7 = tmp1 - x7; x1 = x4 + tmp0; x4 = x4 - tmp0; x6 = x5 + x7; /* F */ x5 = x5 - x7; /* F */ tmp0 = x4 + x5; tmp0 = 181*tmp0; x7 = x8 + x3; /* F */ tmp1 = x4 - x5; x8 = x8 - x3; /* F */ tmp1 = 181*tmp1; x3 = x0 + x2; /* F */ x0 = x0 - x2; /* F */ x2 = tmp0 + 128; x4 = tmp1 + 128; x2 = x2>>8; /* F */ x4 = x4>>8; /* F */
tmp0 = x7+x1; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x3+x2; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x0+x4; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x8+x6; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; tmp0 = x8-x6; tmp0 = tmp0>>IDCTSHIFTR; tmp1 = x0-x4; tmp1 = tmp1>>IDCTSHIFTR; tmp2 = x3-x2; tmp2 = tmp2>>IDCTSHIFTR; tmp3 = x7-x1; tmp3 = tmp3>>IDCTSHIFTR; *tmpblk++ = (short)tmp0; *tmpblk++ = (short)tmp1; *tmpblk++ = (short)tmp2; *tmpblk++ = (short)tmp3; } } }
/* output position */ p1 = outbuf_size; p2 = p1 + outbuf_size; p3 = p2 + outbuf_size; p4 = p3 + outbuf_size; p5 = p4 + outbuf_size; p6 = p5 + outbuf_size; p7 = p6 + outbuf_size;
tmpblk = tmpbuf; outblk = outbuf; for (i=0; i<8; i++, tmpblk++, outblk++) { /* shortcut */ x0 = tmpblk[0]; x1 = tmpblk[32]; x2 = tmpblk[48]; x3 = tmpblk[16]; x4 = tmpblk[8]; x5 = tmpblk[56]; x6 = tmpblk[40]; x7 = tmpblk[24]; if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) { tmp0=(x0+32)>>6; outblk[0]=outblk[p1]=outblk[p2]=outblk[p3]=outblk[p4]=outblk[p5]= outblk[p6]=outblk[p7]= (short)tmp0 ; } else { x1 = x1 <<8; tmp0 = x4+x5; x0 = x0<<8; tmp0 = WW7*tmp0; x0 = x0 + 8192; tmp1 = x6+x7; tmp0 = tmp0 + 4; tmp1 = WW3*tmp1; tmp1 = tmp1 + 4; x8 = x0 + x1; tmp2 = AW26*x2; x0 = x0 - x1; x1 = x3 + x2; x1 = WW6*x1; tmp3 = DW26*x3; x1 = x1 + 4; x4 = DW17*x4; x4 = tmp0 + x4; x4 = x4>>3; x5 = AW17*x5; x2 = x1 - tmp2; x3 = x1 + tmp3; x6 = DW35*x6; x2 = x2>>3; x5 = tmp0 - x5; x5 = x5>>3; x6 = tmp1 - x6; x6 = x6>>3; x7 = AW35*x7; x7 = tmp1 - x7; x3 = x3>>3; x7 = x7>>3; x1 = x4 + x6; /* F */ x4 = x4 - x6; x6 = x5 + x7; /* F */ x5 = x5 - x7; /* F */ tmp1 = x4 + x5; x7 = x8 + x3; /* F */ tmp1 = 181*tmp1; x8 = x8 - x3; /* F */ x3 = x0 + x2; /* F */ tmp2 = x4 - x5; x0 = x0 - x2; /* F */ tmp2 = 181*tmp2; x2 = tmp1+128; x4 = tmp2+128; x2 = x2>>8; /* F */ x4 = x4>>8; /* F */
/* fourth stage */ tmp0=x7+x1; tmp1=x3+x2; tmp0=tmp0>>IDCTSHIFTC; tmp2=x0+x4; tmp1=tmp1>>IDCTSHIFTC; tmp3=x8+x6; tmp2=tmp2>>IDCTSHIFTC; tmp3=tmp3>>IDCTSHIFTC;
outblk[0] = (short)tmp0; outblk[p1] = (short)tmp1; outblk[p2] = (short)tmp2; outblk[p3] = (short)tmp3;
tmp0=x8-x6; tmp1=x0-x4; tmp0=tmp0>>IDCTSHIFTC; tmp2=x3-x2; tmp1=tmp1>>IDCTSHIFTC; tmp3=x7-x1; tmp2=tmp2>>IDCTSHIFTC; tmp3=tmp3>>IDCTSHIFTC;
outblk[p4] = (short)tmp0; outblk[p5] = (short)tmp1; outblk[p6] = (short)tmp2; outblk[p7] = (short)tmp3; } }
return 0; }
#if 0
/*
** Function: ZigzagMatrix() ** Purpose: Performs a zig-zag translation on the input imatrix ** and puts the output in omatrix. */ void svH263ZigzagMatrix(short *imatrix, short *omatrix) { const unsigned int *ptdzz=tdzz; int k;
for(k=64; k; k--) omatrix[*ptdzz++] = *imatrix++; }
/*
** Function: InvZigzagMatrix() ** Purpose: Performs an inverse zig-zag translation on the input imatrix ** and puts the output in omatrix. */ void svH263InvZigzagMatrix(short *imatrix, short *omatrix) { const unsigned int *ptdzz=tdzz; int k;
for(k=64; k; k--) *omatrix++ = imatrix[*ptdzz++];
} #endif
#ifndef PI
# ifdef M_PI
# define PI M_PI
# else
# define PI 3.14159265358979323846
# endif
#endif
int zigzag[8][8] = { {0, 1, 5, 6,14,15,27,28}, {2, 4, 7,13,16,26,29,42}, {3, 8,12,17,25,30,41,43}, {9,11,18,24,31,40,44,53}, {10,19,23,32,39,45,52,54}, {20,22,33,38,46,51,55,60}, {21,34,37,47,50,56,59,61}, {35,36,48,49,57,58,62,63}, };
/* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
* direct matrix multiply) Inverse Discrete Cosine Transform */
/* Here we use math.h to generate constants. Compiler results may
vary a little */
/* private data */
/* cosine transform matrix for 8x1 IDCT */ static double c[8][8];
/* initialize DCT coefficient matrix */
void sv_H263init_idctref() { int freq, time; double scale;
for (freq=0; freq < 8; freq++) { scale = (freq == 0) ? sqrt(0.125) : 0.5; for (time=0; time<8; time++) c[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5)); } }
/* perform IDCT matrix multiply for 8x8 coefficient block */
void sv_H263idctref(short *coeff, short *block) { int i, j, k, v; double partial_product; double tmp[64]; int tmp2[64]; extern int zigzag[8][8];
for (i=0; i<8; i++) for (j=0; j<8; j++) tmp2[j+i*8] = *(coeff + zigzag[i][j]);
for (i=0; i<8; i++) for (j=0; j<8; j++) { partial_product = 0.0;
for (k=0; k<8; k++) partial_product+= c[k][j]*tmp2[8*i+k];
tmp[8*i+j] = partial_product; }
/* Transpose operation is integrated into address mapping by switching
loop order of i and j */
for (j=0; j<8; j++) for (i=0; i<8; i++) { partial_product = 0.0;
for (k=0; k<8; k++) partial_product+= c[k][i]*tmp[8*k+j];
v = (int)floor(partial_product+0.5); block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v); } }
|