Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1212 lines
29 KiB

/* File: sv_h263_dct.c */
/*****************************************************************************
** Copyright (c) Digital Equipment Corporation, 1995, 1997 **
** **
** All Rights Reserved. Unpublished rights reserved under the copyright **
** laws of the United States. **
** **
** The software contained on this media is proprietary to and embodies **
** the confidential technology of Digital Equipment Corporation. **
** Possession, use, duplication or dissemination of the software and **
** media is authorized only pursuant to a valid written license from **
** Digital Equipment Corporation. **
** **
** RESTRICTED RIGHTS LEGEND Use, duplication, or disclosure by the U.S. **
** Government is subject to restrictions as set forth in Subparagraph **
** (c)(1)(ii) of DFARS 252.227-7013, or in FAR 52.227-19, as applicable. **
******************************************************************************/
#include <math.h>
#include "sv_h263.h"
#include "proto.h"
#define F (float)
#define S (short)
static const unsigned int tdzz[64] = {
0, 1, 5, 6, 14, 15, 27, 28,
2, 4, 7, 13, 16, 26, 29, 42,
3, 8, 12, 17, 25, 30, 41, 43,
9, 11, 18, 24, 31, 40, 44, 53,
10, 19, 23, 32, 39, 45, 52, 54,
20, 22, 33, 38, 46, 51, 55, 60,
21, 34, 37, 47, 50, 56, 59, 61,
35, 36, 48, 49, 57, 58, 62, 63};
static const unsigned int ttdzz[64] = {
0, 2, 3, 9, 10, 20, 21, 35,
1, 4, 8, 11, 19, 22, 34, 36,
5, 7, 12, 18, 23, 33, 37, 48,
6, 13, 17, 24, 32, 38, 47, 49,
14, 16, 25, 31, 39, 46, 50, 57,
15, 26, 30, 40, 45, 51, 56, 58,
27, 29, 41, 44, 52, 55, 59, 62,
28, 42, 43, 53, 54, 60, 61, 63};
/**********************************************************************
*
* Name: Dct
* Description: Does dct on an 8x8 block, does zigzag-scanning of
* coefficients
*
* Input: 64 pixels in a 1D array
* Returns: 64 coefficients in a 1D array
* Side effects:
*
**********************************************************************/
/*
** Name: ScFDCT8x8s_C
** Purpose: 2-d Forward DCT (C version) for (8x8) blocks
**
** update: Wei-Lien Hsu, store in ZZ order.
*/
static const float W0=(float).7071068, W1=(float).4903926, W2=(float).4619398,
W3=(float).4157348, W4=(float).3535534, W5=(float).2777851,
W6=(float).1913417, W7=(float).0975452;
int sv_H263DCT( short *block, short *coeff, int QP, int Mode)
{
int i;
register float b0, b1, b2, b3, b4, b5, b6, b7, tmp, t0, t1, t2;
float tmpbuf[64];
const unsigned int *ptdzz=ttdzz;
register short *blockptr, *coeffptr ;
register float *dptr;
#if 1
short val, halfQ;
/* check significant signals in Inter-frame */
if(!(Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) {
halfQ = QP >> 1;
blockptr = block;
for (i=0; i < 64; i++) {
val = *blockptr++;
if((val > halfQ) | (val < -halfQ)) break;
}
if(i==64){ memset(coeff,0,128) ; return 0; }
}
#endif
/* Horizontal transform */
dptr = tmpbuf;
blockptr = block;
for (i = 0; i < 8; i++)
{
t0 = *blockptr++;
t1 = *blockptr++;
t2 = *blockptr++;
tmp= *blockptr++;
b4 = *blockptr++;
b5 = *blockptr++;
b6 = *blockptr++;
b7 = *blockptr++;
b0 = t0 + b7;
b7 = t0 - b7;
b1 = t1 + b6;
b6 = t1 - b6;
b2 = t2 + b5;
b5 = t2 - b5;
b3 = tmp + b4;
b4 = tmp - b4;
t0 = b0 + b3;
b3 = b0 - b3;
t1 = b1 + b2;
b2 = b1 - b2;
tmp = b5;
b5 = (b6 - b5) * W0;
b6 = (b6 + tmp) * W0;
t2 = b4 + b5;
b5 = b4 - b5;
tmp = b7 + b6;
b6 = b7 - b6;
*dptr++ = (t0 + t1) * W4;
*dptr++ = t2 * W7 + tmp * W1;
*dptr++ = b2 * W6 + b3 * W2;
*dptr++ = b6 * W3 - b5 * W5;
*dptr++ = (t0 - t1) * W4;
*dptr++ = b5 * W3 + b6 * W5;
*dptr++ = b3 * W6 - b2 * W2;
*dptr++ = tmp * W7 - t2 * W1;
}
/* Vertical transform */
dptr = tmpbuf;
coeffptr = coeff;
for (i = 0; i < 8; i++, dptr++)
{
b0 = *dptr;
tmp = *(dptr + 56) ;
b7 = b0 - tmp ;
b0 += tmp;
b1 = *(dptr + 8);
tmp = *(dptr + 48) ;
b6 = b1 - tmp;
b1 += tmp;
b2 = *(dptr + 16);
tmp = *(dptr + 40) ;
b5 = b2 - tmp;
b2 += tmp;
b3 = *(dptr + 24);
tmp = *(dptr + 32) ;
b4 = b3 - tmp;
b3 += tmp;
t0 = b0 + b3;
b3 = b0 - b3;
t1 = b1 + b2;
b2 = b1 - b2;
tmp = b5;
b5 = (b6 - b5) * W0;
b6 = (b6 + tmp) * W0;
t2 = b4 + b5;
b5 = b4 - b5;
tmp = b7 + b6;
b6 = b7 - b6;
*(coeffptr + *ptdzz++) = S ((t0 + t1) * W4);
*(coeffptr + *ptdzz++) = S (t2 * W7 + tmp * W1);
*(coeffptr + *ptdzz++) = S (b2 * W6 + b3 * W2);
*(coeffptr + *ptdzz++) = S (b6 * W3 - b5 * W5);
*(coeffptr + *ptdzz++) = S ((t0 - t1) * W4);
*(coeffptr + *ptdzz++) = S (b5 * W3 + b6 * W5);
*(coeffptr + *ptdzz++) = S (b3 * W6 - b2 * W2);
*(coeffptr + *ptdzz++) = S (tmp * W7 - t2 * W1);
}
return 1;
}
/**********************************************************************
*
* Description: Does zone-filter on an 8x8 block-dct,
* does zigzag-scanning of coefficients
*
* Input: 64 pixels in a 1D array
* Returns: 64 coefficients in a 1D array
* Side effects:
*
**********************************************************************/
int sv_H263ZoneDCT( short *block, short *coeff, int QP, int Mode)
{
int i;
register float b0, b1, b2, b3, b4, b5, b6, b7, tmp, t0, t1, t2;
float tmpbuf[64];
const unsigned int *ptdzz=ttdzz;
register short *blockptr, *coeffptr ;
register float *dptr;
#if 1
short val, halfQ;
/* check significant signals in Inter-frame */
if(!(Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q)) {
halfQ = QP >> 1;
blockptr = block;
for (i=0; i < 64; i++) {
val = *blockptr++;
if((val > halfQ) | (val < -halfQ)) break;
}
if(i==64){ memset(coeff,0,128) ; return 0; }
}
#endif
/* Horizontal transform */
dptr = tmpbuf;
blockptr = block;
for (i = 0; i < 8; i++)
{
t0 = *blockptr++;
t1 = *blockptr++;
t2 = *blockptr++;
tmp= *blockptr++;
b4 = *blockptr++;
b5 = *blockptr++;
b6 = *blockptr++;
b7 = *blockptr++;
b0 = t0 + b7;
b7 = t0 - b7;
b1 = t1 + b6;
b6 = t1 - b6;
b2 = t2 + b5;
b5 = t2 - b5;
b3 = tmp + b4;
b4 = tmp - b4;
t0 = b0 + b3;
b3 = b0 - b3;
t1 = b1 + b2;
b2 = b1 - b2;
tmp = b5;
b5 = (b6 - b5) * W0;
b6 = (b6 + tmp) * W0;
t2 = b4 + b5;
b5 = b4 - b5;
tmp = b7 + b6;
b6 = b7 - b6;
*dptr++ = (t0 + t1) * W4;
*dptr++ = t2 * W7 + tmp * W1;
*dptr++ = b2 * W6 + b3 * W2;
*dptr++ = b6 * W3 - b5 * W5;
dptr+= 4;
}
/* Vertical transform */
dptr = tmpbuf;
coeffptr = coeff;
memset(coeff,0,128) ;
for (i = 0; i < 4; i++, dptr++)
{
b0 = *dptr;
tmp = *(dptr + 56) ;
b7 = b0 - tmp ;
b0 += tmp;
b1 = *(dptr + 8);
tmp = *(dptr + 48) ;
b6 = b1 - tmp;
b1 += tmp;
b2 = *(dptr + 16);
tmp = *(dptr + 40) ;
b5 = b2 - tmp;
b2 += tmp;
b3 = *(dptr + 24);
tmp = *(dptr + 32) ;
b4 = b3 - tmp;
b3 += tmp;
t0 = b0 + b3;
b3 = b0 - b3;
t1 = b1 + b2;
b2 = b1 - b2;
tmp = b5;
b5 = (b6 - b5) * W0;
b6 = (b6 + tmp) * W0;
t2 = b4 + b5;
b5 = b4 - b5;
tmp = b7 + b6;
b6 = b7 - b6;
*(coeffptr + *ptdzz++) = S ((t0 + t1) * W4);
*(coeffptr + *ptdzz++) = S (t2 * W7 + tmp * W1);
*(coeffptr + *ptdzz++) = S (b2 * W6 + b3 * W2);
*(coeffptr + *ptdzz++) = S (b6 * W3 - b5 * W5);
ptdzz+=4;
}
return 1;
}
/**********************************************************************
*
* Name: idct
* Description: inverse dct on 64 coefficients
*
* Input: 64 coefficients, block for 64 pixels
* Returns: 0
* Side effects:
*
**********************************************************************/
/*
** Function: ScIDCT8x8s
** Note: This scheme uses the direct transposition of the forward
** DCT. This may not be the preferred way in Hardware
** Implementations
** #define W1 2841 */ /* 2048*sqrt(2)*cos(1*pi/16)
** #define W2 2676 */ /* 2048*sqrt(2)*cos(2*pi/16)
** #define W5 1609 */ /* 2048*sqrt(2)*cos(5*pi/16)
*/
#define WW3 2408 /* 2048*sqrt(2)*cos(3*pi/16) */
#define WW6 1108 /* 2048*sqrt(2)*cos(6*pi/16) */
#define WW7 565 /* 2048*sqrt(2)*cos(7*pi/16) */
#define AW26 3784
#define DW26 1568
#define AW17 3406
#define DW17 2276
#define AW35 4017
#define DW35 799
#define IDCTSHIFTR 8
#define IDCTSHIFTC 14
#ifndef USE_C
void sv_H263FillX0_S(short *stream, short wd);
#endif
int sv_H263IDCT(short *inbuf, short *outbuf, int QP, int Mode, int outbuf_size)
{
int i;
const unsigned int *ptdzz=tdzz;
register int tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
register short *inblk, *outblk;
register short *tmpblk;
short tmpbuf[64];
int Q2,QP_1;
int p1, p2, p3, p4, p5, p6, p7;
/* double quantization step */
Q2 = QP << 1;
QP_1 = QP - 1;
inblk = inbuf;
tmpblk = tmpbuf;
if((QP %2) == 0){
for (i=0; i<8; i++)
{
/* read in ZZ order */
x0 = inblk[*ptdzz++];
x4 = inblk[*ptdzz++];
x3 = inblk[*ptdzz++];
x7 = inblk[*ptdzz++];
x1 = inblk[*ptdzz++];
x6 = inblk[*ptdzz++];
x2 = inblk[*ptdzz++];
x5 = inblk[*ptdzz++];
/* dequantize DC */
if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
x0 = x0 << 3;
else
if(x0) x0 = (x0 > 0) ? Q2*x0+QP-1 : Q2*x0-QP+1 ;
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
if(!x0) memset(tmpblk, 0, 16) ;
else {
#ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
#else
*tmpblk = *(tmpblk+1) =
*(tmpblk+2) = *(tmpblk+3) =
*(tmpblk+4) = *(tmpblk+5) =
*(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
#endif
}
tmpblk += 8;
}
else
{
/* dequantize AC */
if(x1) x1 = (x1 > 0) ? Q2*x1+QP_1 : Q2*x1-QP_1 ;
if(x2) x2 = (x2 > 0) ? Q2*x2+QP_1 : Q2*x2-QP_1 ;
if(x3) x3 = (x3 > 0) ? Q2*x3+QP_1 : Q2*x3-QP_1 ;
if(x4) x4 = (x4 > 0) ? Q2*x4+QP_1 : Q2*x4-QP_1 ;
if(x5) x5 = (x5 > 0) ? Q2*x5+QP_1 : Q2*x5-QP_1 ;
if(x6) x6 = (x6 > 0) ? Q2*x6+QP_1 : Q2*x6-QP_1 ;
if(x7) x7 = (x7 > 0) ? Q2*x7+QP_1 : Q2*x7-QP_1 ;
x1 = x1<<11;
tmp0 = x4 + x5;
tmp0 = WW7*tmp0;
x0 = x0<<11;
x0 = x0 + 128;
x8 = x0 + x1;
tmp1 = x6 + x7;
x0 = x0 - x1;
tmp1 = WW3*tmp1;
tmp2 = AW26*x2;
tmp3 = DW26*x3;
x4 = DW17*x4;
x5 = AW17*x5;
x4 = tmp0 + x4;
x1 = x3 + x2;
x5 = tmp0 - x5;
x1 = WW6*x1;
tmp0 = DW35*x6;
x7 = AW35*x7;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
tmp0 = tmp1 - tmp0;
x7 = tmp1 - x7;
x1 = x4 + tmp0;
x4 = x4 - tmp0;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp0 = x4 + x5;
tmp0 = 181*tmp0;
x7 = x8 + x3; /* F */
tmp1 = x4 - x5;
x8 = x8 - x3; /* F */
tmp1 = 181*tmp1;
x3 = x0 + x2; /* F */
x0 = x0 - x2; /* F */
x2 = tmp0 + 128;
x4 = tmp1 + 128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
tmp0 = x7+x1;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x3+x2;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x0+x4;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x8+x6;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
tmp0 = x8-x6;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x0-x4;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x3-x2;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x7-x1;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
}
}
}
else{
for (i=0; i<8; i++)
{
/* read in ZZ order */
x0 = inblk[*ptdzz++];
x4 = inblk[*ptdzz++];
x3 = inblk[*ptdzz++];
x7 = inblk[*ptdzz++];
x1 = inblk[*ptdzz++];
x6 = inblk[*ptdzz++];
x2 = inblk[*ptdzz++];
x5 = inblk[*ptdzz++];
/* quantize DC */
if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
x0 = x0 << 3;
else
if(x0) x0 = (x0 > 0) ? Q2*x0+QP : Q2*x0-QP ;
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
if(!x0) memset(tmpblk, 0, 16) ;
else {
#ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
#else
*tmpblk = *(tmpblk+1) =
*(tmpblk+2) = *(tmpblk+3) =
*(tmpblk+4) = *(tmpblk+5) =
*(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
#endif
}
tmpblk += 8;
}
else
{
/* dequantize AC */
if(x1) x1 = (x1 > 0) ? Q2*x1+QP : Q2*x1-QP ;
if(x2) x2 = (x2 > 0) ? Q2*x2+QP : Q2*x2-QP ;
if(x3) x3 = (x3 > 0) ? Q2*x3+QP : Q2*x3-QP ;
if(x4) x4 = (x4 > 0) ? Q2*x4+QP : Q2*x4-QP ;
if(x5) x5 = (x5 > 0) ? Q2*x5+QP : Q2*x5-QP ;
if(x6) x6 = (x6 > 0) ? Q2*x6+QP : Q2*x6-QP ;
if(x7) x7 = (x7 > 0) ? Q2*x7+QP : Q2*x7-QP ;
x1 = x1<<11;
tmp0 = x4 + x5;
tmp0 = WW7*tmp0;
x0 = x0<<11;
x0 = x0 + 128;
x8 = x0 + x1;
tmp1 = x6 + x7;
x0 = x0 - x1;
tmp1 = WW3*tmp1;
tmp2 = AW26*x2;
tmp3 = DW26*x3;
x4 = DW17*x4;
x5 = AW17*x5;
x4 = tmp0 + x4;
x1 = x3 + x2;
x5 = tmp0 - x5;
x1 = WW6*x1;
tmp0 = DW35*x6;
x7 = AW35*x7;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
tmp0 = tmp1 - tmp0;
x7 = tmp1 - x7;
x1 = x4 + tmp0;
x4 = x4 - tmp0;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp0 = x4 + x5;
tmp0 = 181*tmp0;
x7 = x8 + x3; /* F */
tmp1 = x4 - x5;
x8 = x8 - x3; /* F */
tmp1 = 181*tmp1;
x3 = x0 + x2; /* F */
x0 = x0 - x2; /* F */
x2 = tmp0 + 128;
x4 = tmp1 + 128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
tmp0 = x7+x1;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x3+x2;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x0+x4;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x8+x6;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
tmp0 = x8-x6;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x0-x4;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x3-x2;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x7-x1;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
}
}
}
/* output position */
p1 = outbuf_size;
p2 = p1 + outbuf_size;
p3 = p2 + outbuf_size;
p4 = p3 + outbuf_size;
p5 = p4 + outbuf_size;
p6 = p5 + outbuf_size;
p7 = p6 + outbuf_size;
tmpblk = tmpbuf;
outblk = outbuf;
for (i=0; i<8; i++, tmpblk++, outblk++)
{
/* shortcut */
x0 = tmpblk[0];
x1 = tmpblk[32];
x2 = tmpblk[48];
x3 = tmpblk[16];
x4 = tmpblk[8];
x5 = tmpblk[56];
x6 = tmpblk[40];
x7 = tmpblk[24];
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
{
tmp0=(x0+32)>>6;
outblk[0]=outblk[p1]=outblk[p2]=outblk[p3]=outblk[p4]=outblk[p5]=
outblk[p6]=outblk[p7]= (short)tmp0 ;
}
else
{
x1 = x1 <<8;
tmp0 = x4+x5;
x0 = x0<<8;
tmp0 = WW7*tmp0;
x0 = x0 + 8192;
tmp1 = x6+x7;
tmp0 = tmp0 + 4;
tmp1 = WW3*tmp1;
tmp1 = tmp1 + 4;
x8 = x0 + x1;
tmp2 = AW26*x2;
x0 = x0 - x1;
x1 = x3 + x2;
x1 = WW6*x1;
tmp3 = DW26*x3;
x1 = x1 + 4;
x4 = DW17*x4;
x4 = tmp0 + x4;
x4 = x4>>3;
x5 = AW17*x5;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
x6 = DW35*x6;
x2 = x2>>3;
x5 = tmp0 - x5;
x5 = x5>>3;
x6 = tmp1 - x6;
x6 = x6>>3;
x7 = AW35*x7;
x7 = tmp1 - x7;
x3 = x3>>3;
x7 = x7>>3;
x1 = x4 + x6; /* F */
x4 = x4 - x6;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp1 = x4 + x5;
x7 = x8 + x3; /* F */
tmp1 = 181*tmp1;
x8 = x8 - x3; /* F */
x3 = x0 + x2; /* F */
tmp2 = x4 - x5;
x0 = x0 - x2; /* F */
tmp2 = 181*tmp2;
x2 = tmp1+128;
x4 = tmp2+128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
/* fourth stage */
tmp0=x7+x1;
tmp1=x3+x2;
tmp0=tmp0>>IDCTSHIFTC;
tmp2=x0+x4;
tmp1=tmp1>>IDCTSHIFTC;
tmp3=x8+x6;
tmp2=tmp2>>IDCTSHIFTC;
tmp3=tmp3>>IDCTSHIFTC;
outblk[0] = (short)tmp0;
outblk[p1] = (short)tmp1;
outblk[p2] = (short)tmp2;
outblk[p3] = (short)tmp3;
tmp0=x8-x6;
tmp1=x0-x4;
tmp0=tmp0>>IDCTSHIFTC;
tmp2=x3-x2;
tmp1=tmp1>>IDCTSHIFTC;
tmp3=x7-x1;
tmp2=tmp2>>IDCTSHIFTC;
tmp3=tmp3>>IDCTSHIFTC;
outblk[p4] = (short)tmp0;
outblk[p5] = (short)tmp1;
outblk[p6] = (short)tmp2;
outblk[p7] = (short)tmp3;
}
}
return 0;
}
/**********************************************************************
*
* Description: inverse zone-dct on 64 coefficients
*
* Input: 64 coefficients, block for 64 pixels
* Returns: 0
* Side effects:
*
**********************************************************************/
int sv_H263ZoneIDCT(short *inbuf, short *outbuf, int QP, int Mode, int outbuf_size)
{
int i;
const unsigned int *ptdzz=tdzz;
register int tmp0, tmp1, tmp2, tmp3, x0, x1, x2, x3, x4, x5, x6, x7, x8;
register short *inblk, *outblk;
register short *tmpblk;
short tmpbuf[64];
int Q2,QP_1;
int p1, p2, p3, p4, p5, p6, p7;
/* double quantization step */
Q2 = QP << 1;
QP_1 = QP - 1;
inblk = inbuf;
tmpblk = tmpbuf;
memset(tmpblk, 0, 128) ;
if((QP %2) == 0){
for (i=0; i<4; i++)
{
/* read in ZZ order */
x0 = inblk[*ptdzz++];
x4 = inblk[*ptdzz++];
x3 = inblk[*ptdzz++];
x7 = inblk[*ptdzz++];
x1 = x6 = x2 = x5 = 0;
ptdzz += 4;
/* dequantize DC */
if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
x0 = x0 << 3;
else
if(x0) x0 = (x0 > 0) ? Q2*x0+QP-1 : Q2*x0-QP+1 ;
if (!(x3 | x4 | x7)) {
if(!x0) memset(tmpblk, 0, 16) ;
else {
#ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
#else
*tmpblk = *(tmpblk+1) =
*(tmpblk+2) = *(tmpblk+3) =
*(tmpblk+4) = *(tmpblk+5) =
*(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
#endif
}
tmpblk += 8;
}
else
{
/* dequantize AC */
if(x3) x3 = (x3 > 0) ? Q2*x3+QP_1 : Q2*x3-QP_1 ;
if(x4) x4 = (x4 > 0) ? Q2*x4+QP_1 : Q2*x4-QP_1 ;
if(x7) x7 = (x7 > 0) ? Q2*x7+QP_1 : Q2*x7-QP_1 ;
tmp0 = x4;
tmp0 = WW7*tmp0;
x0 = x0<<11;
x0 = x0 + 128;
x8 = x0;
tmp1 = WW3*x7;
tmp3 = DW26*x3;
x4 = DW17*x4;
x4 = tmp0 + x4;
x1 = x3;
x5 = tmp0;
x7 = AW35*x7;
x2 = x1;
x3 = x1 + tmp3;
tmp0 = tmp1;
x7 = tmp1 - x7;
x1 = x4 + tmp0;
x4 = x4 - tmp0;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp0 = x4 + x5;
tmp0 = 181*tmp0;
x7 = x8 + x3; /* F */
tmp1 = x4 - x5;
x8 = x8 - x3; /* F */
tmp1 = 181*tmp1;
x3 = x0 + x2; /* F */
x0 = x0 - x2; /* F */
x2 = tmp0 + 128;
x4 = tmp1 + 128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
tmp0 = x7+x1;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x3+x2;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x0+x4;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x8+x6;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
tmp0 = x8-x6;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x0-x4;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x3-x2;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x7-x1;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
}
}
}
else{
for (i=0; i<4; i++)
{
/* read in ZZ order */
x0 = inblk[*ptdzz++];
x4 = inblk[*ptdzz++];
x3 = inblk[*ptdzz++];
x7 = inblk[*ptdzz++];
x1 = x6 = x2 = x5 = 0;
ptdzz += 4;
/* quantize DC */
if (!i && (Mode == H263_MODE_INTRA || Mode == H263_MODE_INTRA_Q))
x0 = x0 << 3;
else
if(x0) x0 = (x0 > 0) ? Q2*x0+QP : Q2*x0-QP ;
if (!(x3 | x4 | x7)) {
if(!x0) memset(tmpblk, 0, 16) ;
else {
#ifndef USE_C
sv_H263FillX0_S(tmpblk, (short)(x0 << 3)) ;
#else
*tmpblk = *(tmpblk+1) =
*(tmpblk+2) = *(tmpblk+3) =
*(tmpblk+4) = *(tmpblk+5) =
*(tmpblk+6) = *(tmpblk+7) = (short)(x0 << 3) ;
#endif
}
tmpblk += 8;
}
else
{
/* dequantize AC */
if(x3) x3 = (x3 > 0) ? Q2*x3+QP : Q2*x3-QP ;
if(x4) x4 = (x4 > 0) ? Q2*x4+QP : Q2*x4-QP ;
if(x7) x7 = (x7 > 0) ? Q2*x7+QP : Q2*x7-QP ;
tmp0 = x4;
tmp0 = WW7*tmp0;
x0 = x0<<11;
x0 = x0 + 128;
x8 = x0;
tmp1 = WW3*x7;
tmp3 = DW26*x3;
x4 = DW17*x4;
x4 = tmp0 + x4;
x1 = x3;
x5 = tmp0;
x1 = WW6*x1;
tmp0 = 0;
x7 = AW35*x7;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
tmp0 = tmp1;
x7 = tmp1 - x7;
x1 = x4 + tmp0;
x4 = x4 - tmp0;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp0 = x4 + x5;
tmp0 = 181*tmp0;
x7 = x8 + x3; /* F */
tmp1 = x4 - x5;
x8 = x8 - x3; /* F */
tmp1 = 181*tmp1;
x3 = x0 + x2; /* F */
x0 = x0 - x2; /* F */
x2 = tmp0 + 128;
x4 = tmp1 + 128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
tmp0 = x7+x1;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x3+x2;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x0+x4;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x8+x6;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
tmp0 = x8-x6;
tmp0 = tmp0>>IDCTSHIFTR;
tmp1 = x0-x4;
tmp1 = tmp1>>IDCTSHIFTR;
tmp2 = x3-x2;
tmp2 = tmp2>>IDCTSHIFTR;
tmp3 = x7-x1;
tmp3 = tmp3>>IDCTSHIFTR;
*tmpblk++ = (short)tmp0;
*tmpblk++ = (short)tmp1;
*tmpblk++ = (short)tmp2;
*tmpblk++ = (short)tmp3;
}
}
}
/* output position */
p1 = outbuf_size;
p2 = p1 + outbuf_size;
p3 = p2 + outbuf_size;
p4 = p3 + outbuf_size;
p5 = p4 + outbuf_size;
p6 = p5 + outbuf_size;
p7 = p6 + outbuf_size;
tmpblk = tmpbuf;
outblk = outbuf;
for (i=0; i<8; i++, tmpblk++, outblk++)
{
/* shortcut */
x0 = tmpblk[0];
x1 = tmpblk[32];
x2 = tmpblk[48];
x3 = tmpblk[16];
x4 = tmpblk[8];
x5 = tmpblk[56];
x6 = tmpblk[40];
x7 = tmpblk[24];
if (!(x1 | x2 | x3 | x4 | x5 | x6 | x7))
{
tmp0=(x0+32)>>6;
outblk[0]=outblk[p1]=outblk[p2]=outblk[p3]=outblk[p4]=outblk[p5]=
outblk[p6]=outblk[p7]= (short)tmp0 ;
}
else
{
x1 = x1 <<8;
tmp0 = x4+x5;
x0 = x0<<8;
tmp0 = WW7*tmp0;
x0 = x0 + 8192;
tmp1 = x6+x7;
tmp0 = tmp0 + 4;
tmp1 = WW3*tmp1;
tmp1 = tmp1 + 4;
x8 = x0 + x1;
tmp2 = AW26*x2;
x0 = x0 - x1;
x1 = x3 + x2;
x1 = WW6*x1;
tmp3 = DW26*x3;
x1 = x1 + 4;
x4 = DW17*x4;
x4 = tmp0 + x4;
x4 = x4>>3;
x5 = AW17*x5;
x2 = x1 - tmp2;
x3 = x1 + tmp3;
x6 = DW35*x6;
x2 = x2>>3;
x5 = tmp0 - x5;
x5 = x5>>3;
x6 = tmp1 - x6;
x6 = x6>>3;
x7 = AW35*x7;
x7 = tmp1 - x7;
x3 = x3>>3;
x7 = x7>>3;
x1 = x4 + x6; /* F */
x4 = x4 - x6;
x6 = x5 + x7; /* F */
x5 = x5 - x7; /* F */
tmp1 = x4 + x5;
x7 = x8 + x3; /* F */
tmp1 = 181*tmp1;
x8 = x8 - x3; /* F */
x3 = x0 + x2; /* F */
tmp2 = x4 - x5;
x0 = x0 - x2; /* F */
tmp2 = 181*tmp2;
x2 = tmp1+128;
x4 = tmp2+128;
x2 = x2>>8; /* F */
x4 = x4>>8; /* F */
/* fourth stage */
tmp0=x7+x1;
tmp1=x3+x2;
tmp0=tmp0>>IDCTSHIFTC;
tmp2=x0+x4;
tmp1=tmp1>>IDCTSHIFTC;
tmp3=x8+x6;
tmp2=tmp2>>IDCTSHIFTC;
tmp3=tmp3>>IDCTSHIFTC;
outblk[0] = (short)tmp0;
outblk[p1] = (short)tmp1;
outblk[p2] = (short)tmp2;
outblk[p3] = (short)tmp3;
tmp0=x8-x6;
tmp1=x0-x4;
tmp0=tmp0>>IDCTSHIFTC;
tmp2=x3-x2;
tmp1=tmp1>>IDCTSHIFTC;
tmp3=x7-x1;
tmp2=tmp2>>IDCTSHIFTC;
tmp3=tmp3>>IDCTSHIFTC;
outblk[p4] = (short)tmp0;
outblk[p5] = (short)tmp1;
outblk[p6] = (short)tmp2;
outblk[p7] = (short)tmp3;
}
}
return 0;
}
#if 0
/*
** Function: ZigzagMatrix()
** Purpose: Performs a zig-zag translation on the input imatrix
** and puts the output in omatrix.
*/
void svH263ZigzagMatrix(short *imatrix, short *omatrix)
{
const unsigned int *ptdzz=tdzz;
int k;
for(k=64; k; k--)
omatrix[*ptdzz++] = *imatrix++;
}
/*
** Function: InvZigzagMatrix()
** Purpose: Performs an inverse zig-zag translation on the input imatrix
** and puts the output in omatrix.
*/
void svH263InvZigzagMatrix(short *imatrix, short *omatrix)
{
const unsigned int *ptdzz=tdzz;
int k;
for(k=64; k; k--)
*omatrix++ = imatrix[*ptdzz++];
}
#endif
#ifndef PI
# ifdef M_PI
# define PI M_PI
# else
# define PI 3.14159265358979323846
# endif
#endif
int zigzag[8][8] = {
{0, 1, 5, 6,14,15,27,28},
{2, 4, 7,13,16,26,29,42},
{3, 8,12,17,25,30,41,43},
{9,11,18,24,31,40,44,53},
{10,19,23,32,39,45,52,54},
{20,22,33,38,46,51,55,60},
{21,34,37,47,50,56,59,61},
{35,36,48,49,57,58,62,63},
};
/* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
* direct matrix multiply) Inverse Discrete Cosine Transform
*/
/* Here we use math.h to generate constants. Compiler results may
vary a little */
/* private data */
/* cosine transform matrix for 8x1 IDCT */
static double c[8][8];
/* initialize DCT coefficient matrix */
void sv_H263init_idctref()
{
int freq, time;
double scale;
for (freq=0; freq < 8; freq++)
{
scale = (freq == 0) ? sqrt(0.125) : 0.5;
for (time=0; time<8; time++)
c[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5));
}
}
/* perform IDCT matrix multiply for 8x8 coefficient block */
void sv_H263idctref(short *coeff, short *block)
{
int i, j, k, v;
double partial_product;
double tmp[64];
int tmp2[64];
extern int zigzag[8][8];
for (i=0; i<8; i++)
for (j=0; j<8; j++)
tmp2[j+i*8] = *(coeff + zigzag[i][j]);
for (i=0; i<8; i++)
for (j=0; j<8; j++)
{
partial_product = 0.0;
for (k=0; k<8; k++)
partial_product+= c[k][j]*tmp2[8*i+k];
tmp[8*i+j] = partial_product;
}
/* Transpose operation is integrated into address mapping by switching
loop order of i and j */
for (j=0; j<8; j++)
for (i=0; i<8; i++)
{
partial_product = 0.0;
for (k=0; k<8; k++)
partial_product+= c[k][i]*tmp[8*k+j];
v = (int)floor(partial_product+0.5);
block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v);
}
}