Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1841 lines
37 KiB

//
// ITU-T G.723 Floating Point Speech Coder ANSI C Source Code. Version 1.00
// copyright (c) 1995, AudioCodes, DSP Group, France Telecom,
// Universite de Sherbrooke, Intel Corporation. All rights reserved.
//
#include "opt.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <memory.h>
#include "typedef.h"
#include "cst_lbc.h"
#include "tab_lbc.h"
#include "util_lbc.h"
#include "exc_lbc.h"
#include "timer.h"
#include "mmxutil.h"
// This file contains pitch and excitation related functions.
//------------------------------------------------------
#if COMPILE_MMX
int Estim_Int(float *Dpnt, int Start)
{
int i,k;
#define NCOR (PitchMax+1-PitchMin) // = 128 (rounded up to mult of 4)
#define NTAPS (2*SubFrLen+12) // = 132
int Pr,Indx = PitchMin;
float MaxE = 1.0f;
float MaxC = 0.0f;
float E,C2,E2,Diff;
int corr[NCOR];
typedef struct
{
short taps[4][NTAPS]; //** These two arrays need
short temp[PitchMax-3+2*SubFrLen]; //** to be 8-byte aligned
double foo;
} EstimStruct;
EstimStruct est,*e;
e = &est;
ASM
{
mov eax,e;
add e,7;
and e,0fffffff8h;
mov e,eax;
}
// Convert just the necessary portion of Dpnt to 16-bit integers,
// store the result in 'temp'. 4 guard bits are needed since the
// correlations are length 120, which means 7 guard bits are needed.
// So we use 4 so that 4+4=8 guard bits are present in the product.
FloatToShortScaled(&Dpnt[Start-PitchMax+3],e->temp,PitchMax-3+2*SubFrLen,4);
MakeAligned4(&e->temp[PitchMax-3],e->taps[0],2*NTAPS);
MakeAligned2(&e->temp[PitchMax-4],e->taps[1],2*NTAPS);
MakeAligned0(&e->temp[PitchMax-5],e->taps[2],2*NTAPS);
MakeAligned6(&e->temp[PitchMax-6],e->taps[3],2*NTAPS);
for (i=0; i<4; i++)
{
for (k=0; k<i; k++)
e->taps[i][k] = 0;
for (k=NTAPS-12+i; k<NTAPS; k++)
e->taps[i][k] = 0;
}
// Compute cross-correlations, store in corr[] array
CorrelateInt4(e->taps[0],e->temp,corr,NTAPS-12,NCOR>>2);
for (i=1; i<4; i++)
CorrelateInt4(e->taps[i],e->temp,&corr[i],NTAPS,NCOR>>2);
// Now do the actual pitch search.
Pr = Start - PitchMin;
k = PitchMax-PitchMin-3;
E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen); // first energy value
for (i=0; i < (PitchMax-2-PitchMin); i++)
{
// Update energy
E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];
// Check for new maximum
if (corr[k] > 0)
{
C2 = ((float)corr[k]) * ((float)corr[k]);
E2 = C2*MaxE;
Diff = (E2 - E*MaxC)*4.0f;
if (asint(Diff) > asint(E2) || (Diff > 0.0f && ((i - Indx) < PitchMin)))
{
Indx = i;
MaxE = E;
MaxC = C2;
}
}
Pr--;
k--;
}
return(Indx+PitchMin);
}
#endif
//------------------------------------------------------
int Estim_Pitch(float *Dpnt, int Start)
{
int i;
int Pr,Indx = PitchMin;
float MaxE = 1.0f;
float MaxC = 0.0f;
float E,C,C2,E2,Diff;
int *dptr,*eptr;
dptr = (int *)&Diff;
eptr = (int *)&E2;
Pr = Start - PitchMin + 1;
// Init the energy estimate
E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen);
// Main Open loop pitch search loop
for (i=PitchMin; i <= PitchMax-3; i++)
{
Pr--;
// Update energy, compute cross
C = DotProd(&Dpnt[Start],&Dpnt[Pr],2*SubFrLen);
E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];
C2 = C*C;
// Check for new maximum
E2 = C2*MaxE;
Diff = (E2 - E*MaxC)*4.0f;
if (C > 0.0f && E > 0.0f &&
(*dptr > *eptr || (Diff > 0.0f && ((i - Indx) < PitchMin))))
{
Indx = i;
MaxE = E;
MaxC = C2;
}
}
return Indx;
}
//------------------------------------------------------
PWDEF Comp_Pw(float *Dpnt, int Start, int Olp)
{
int i,k;
float Energy,C,E,C2,MaxE,MaxC2,MaxC,Gopt;
PWDEF Pw;
// Compute target energy
Energy = DotProd(&Dpnt[Start],&Dpnt[Start],SubFrLen);
// Find position with maximum C2/E value
MaxE = 1.0f;
MaxC = 0.0f;
MaxC2 = 0.0f;
Pw.Indx = -1;
Pw.Gain = 0.0f;
k = Start - (Olp-PwRange);
E = DotProd(&Dpnt[k],&Dpnt[k],SubFrLen);
for (i=0; i <= 2*PwRange; i++)
{
C = DotProd(&Dpnt[Start],&Dpnt[k],SubFrLen);
if (E > 0.0f && C > 0.0f)
{
C2 = C*C;
if (C2*MaxE > E*MaxC2)
{
Pw.Indx = i;
MaxE = E;
MaxC = C;
MaxC2 = C2;
}
}
k--;
if (k < 0)
{
break;
}
E = E - Dpnt[k+SubFrLen]*Dpnt[k+SubFrLen] + Dpnt[k]*Dpnt[k];
}
if (Pw.Indx == -1)
{
Pw.Indx = Olp;
return Pw;
}
Pw.Gain = 0.0f;
if (MaxC2 > MaxE*Energy*0.375f)
{
if (MaxC > MaxE || MaxE == 0.0f)
Gopt = 1.0f;
else
Gopt = (float) fabs(MaxC)/MaxE;
Pw.Gain = 0.3125f*Gopt;
}
Pw.Indx = Olp - PwRange + Pw.Indx;
return Pw;
}
//--------------------------------------------------------------
void Filt_Pw(float *DataBuff, float *Dpnt, int Start, PWDEF Pw)
{
int i;
// Perform the harmonic weighting
for (i=0; i < SubFrLen; i++)
DataBuff[Start+i] = Dpnt[PitchMax+Start+i] -
Pw.Gain*Dpnt[PitchMax+Start-Pw.Indx+i];
}
//-----------------------------------------------------------------
void Find_Fcbk(float *Dpnt, float *ImpResp, LINEDEF *Line, int Sfc, enum Crate WrkRate, int flags, int UseMMX)
{
int i;
int Srate,T0_acelp;
float gain_T0;
BESTDEF Best = {0};
switch(WrkRate)
{
case Rate63:
Srate = Nb_puls[Sfc];
Best.MaxErr = -99999999.9f;
if (flags & SC_FINDB)
{
if ((*Line).Olp[Sfc>>1] < SubFrLen-2)
Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]);
else
Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen);
}
else
{
Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen);
if ((*Line).Olp[Sfc>>1] < SubFrLen-2)
Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]);
}
// Reconstruct the excitation
for (i=0; i < SubFrLen; i++)
Dpnt[i] = 0.0f;
for (i=0; i < Srate; i++)
Dpnt[Best.Ploc[i]] = Best.Pamp[i];
// Code the excitation
Fcbk_Pack(Dpnt, &((*Line).Sfs[Sfc]), &Best, Srate);
if (Best.UseTrn == 1)
Gen_Trn(Dpnt, Dpnt, (*Line).Olp[Sfc>>1]);
break;
case Rate53:
T0_acelp = search_T0 ((*Line).Olp[Sfc>>1]-1+(*Line).Sfs[Sfc].AcLg,
(*Line).Sfs[Sfc].AcGn, &gain_T0);
#if COMPILE_MMX
if (UseMMX)
{
(*Line).Sfs[Sfc].Ppos = ACELP_LBC_code_int(Dpnt, ImpResp, T0_acelp, Dpnt,
&(*Line).Sfs[Sfc].Mamp, &(*Line).Sfs[Sfc].Grid,
&(*Line).Sfs[Sfc].Pamp, gain_T0, flags);
}
else
#endif //COMPILE_MMX
{
(*Line).Sfs[Sfc].Ppos = ACELP_LBC_code(Dpnt, ImpResp, T0_acelp, Dpnt,
&(*Line).Sfs[Sfc].Mamp, &(*Line).Sfs[Sfc].Grid,
&(*Line).Sfs[Sfc].Pamp, gain_T0, flags);
}
(*Line).Sfs[Sfc].Tran = 0;
break;
}
return;
}
//---------------------------------------------------------
void Fcbk_Unpk(float *Tv, SFSDEF Sfs, int Olp, int Sfc, enum Crate WrkRate)
{
int i,j,Np;
float Tv_tmp[SubFrLen+4];
float acelp_gain,gain_T0;
int acelp_sign, acelp_shift, acelp_pos;
int offset, ipos, T0_acelp;
Word32 Acc0;
switch(WrkRate)
{
case Rate63:
{
Np = Nb_puls[Sfc];
for (i=0; i < SubFrLen; i++)
Tv[i] = 0.0f;
if (Sfs.Ppos >= MaxPosTable[Sfc])
return;
// Decode the amplitudes and positions
j = MaxPulseNum - Np;
Acc0 = Sfs.Ppos;
for (i = 0; i < SubFrLen/Sgrid; i++)
{
Acc0 -= CombinatorialTable[j][i];
if (Acc0 < (Word32) 0)
{
Acc0 += CombinatorialTable[j][i];
j++;
if ((Sfs.Pamp & (1 << (MaxPulseNum-j))) != 0)
Tv[Sfs.Grid + Sgrid*i] = -FcbkGainTable[Sfs.Mamp];
else
Tv[Sfs.Grid + Sgrid*i] = FcbkGainTable[Sfs.Mamp];
if (j == MaxPulseNum)
break;
}
}
if (Sfs.Tran == 1)
Gen_Trn(Tv, Tv, Olp);
break;
}
case Rate53:
{
for (i = 0; i < SubFrLen+4; i++)
Tv_tmp[i] = 0.0f;
acelp_gain = FcbkGainTable[Sfs.Mamp];
acelp_shift = Sfs.Grid;
acelp_sign = Sfs.Pamp;
acelp_pos = Sfs.Ppos;
offset = 0;
for(i=0; i<4; i++)
{
ipos = (acelp_pos & 7);
ipos = (ipos << 3) + acelp_shift + offset;
if((acelp_sign & 1)== 1)
Tv_tmp[ipos] = acelp_gain;
else
Tv_tmp[ipos] = -acelp_gain;
offset += 2;
acelp_pos = acelp_pos >> 3;
acelp_sign = acelp_sign >> 1;
}
for (i = 0; i < SubFrLen; i++)
Tv[i] = Tv_tmp[i];
T0_acelp = search_T0(Olp-1+Sfs.AcLg, Sfs.AcGn, &gain_T0);
if (T0_acelp < SubFrLen-2)
{
for (i = T0_acelp; i < SubFrLen; i++)
Tv[i] += Tv[i-T0_acelp]*gain_T0;
}
break;
}
}
return;
}
//---------------------------------------------------------------------
void Acbk_Filt(float *output,float *input,float fac,float *impresp)
{
#if OPT_ACBKF
ASM
{
push esi;
push edi;
push ebx;
mov eax,58;
mov esi,input;
mov edi,output;
mov ebx,impresp;
loop1:
fld DP[ebx+4*eax];
fmul fac;
fld DP[ebx+4*eax-4];
fmul fac;
fld DP[ebx+4*eax-8];
fmul fac;
fld DP[ebx+4*eax-12];
fmul fac; // a3 a2 a1 a0
fxch ST(3);
fadd DP[esi+4*eax]; // b0 a2 a1 a3
fxch ST(2);
fadd DP[esi+4*eax-4]; // b1 a2 b0 a3
fxch ST(1);
fadd DP[esi+4*eax-8]; // b2 b1 b0 a3
fxch ST(3);
fadd DP[esi+4*eax-12]; // b3 b1 b0 b2
fxch ST(2);
fstp DP[edi+4*eax]; // b1 b3 b2
fstp DP[edi+4*eax-4]; // b3 b2
fxch ST(1);
fstp DP[edi+4*eax-8];
fstp DP[edi+4*eax-12];
sub eax,4;
cmp eax,2;
jg loop1;
pop ebx;
pop edi;
pop esi;
}
#else
int i;
for (i=58; i>2; i-=4)
{
output[i-0] = fac*impresp[i-0] + input[i-0];
output[i-1] = fac*impresp[i-1] + input[i-1];
output[i-2] = fac*impresp[i-2] + input[i-2];
output[i-3] = fac*impresp[i-3] + input[i-3];
}
#endif
output[2] = fac*impresp[2] + input[2];
output[1] = fac*impresp[1] + input[1];
output[0] = fac*impresp[0] + input[0];
}
//---------------------------------------------------------------------
#if COMPILE_MMX
void Find_AcbkInt(float *Tv, float *ImpResp, float *PrevExc, LINEDEF
*Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat)
{
int i,j,k;
float RezBuf[SubFrLen+ClPitchOrd-1];
short TvInt[SubFrLen];
int Tvxi[SubFrLen];
short *lPntInt,*sPntInt,*PtrInt;
int CorBufInt[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)];
int *lPntd;
long Acc0l, Acc1l;
int Olp,Lid,Gid,Hb,t,k1,k2;
int Bound[3];
int Lag1, Lag2;
int MaxInt,off_filt;
int shift,Tshift,mx;
DECLARE_SHORT(FltBuf0Int,63);
DECLARE_SHORT(FltBuf1Int,63);
DECLARE_SHORT(FltBuf2Int,63);
DECLARE_SHORT(FltBuf3Int,63);
DECLARE_SHORT(FltBuf4Int,63) ;
DECLARE_SHORT(CorVctInt,4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)) ;
DECLARE_SHORT(RezTmpInt,16) ;
DECLARE_SHORT(RezBufInt,SubFrLen+ClPitchOrd-1) ;
DECLARE_SHORT(ImpRespInt,63);
DECLARE_SHORT(Ix,2*SubFrLen+16) ;
DECLARE_SHORT(Rx,2*SubFrLen+16);
DECLARE_INT(Temp,64);
ALIGN_ARRAY(FltBuf0Int);
ALIGN_ARRAY(FltBuf1Int);
ALIGN_ARRAY(FltBuf2Int);
ALIGN_ARRAY(FltBuf3Int);
ALIGN_ARRAY(FltBuf4Int);
ALIGN_ARRAY(RezBufInt);
ALIGN_ARRAY(RezTmpInt);
ALIGN_ARRAY(ImpRespInt);
ALIGN_ARRAY(CorVctInt);
ALIGN_ARRAY(Ix);
ALIGN_ARRAY(Rx);
ALIGN_ARRAY(Temp);
Olp = (*Line).Olp[Sfc>>1];
Lid = Pstep;
Gid = 0;
Hb = 3 + (Sfc & 1);
// For even frames only
if ((Sfc & 1) == 0)
{
if (Olp == PitchMin)
Olp++;
if (Olp > (PitchMax-5))
Olp = PitchMax-5;
}
if (flags & SC_LAG1)
{
lPntInt = &CorVctInt[20];
k1 = 1;
k2 = 2;
}
else
{
lPntInt = CorVctInt;
k1 = 0;
k2 = Hb;
}
//TIMER_SPOT_ON(Conversion);
//Convert Tv to 16-bit
ConstFloatToInt(Tv, Tvxi, SubFrLen, 32768.0f);
for(i=0; i<SubFrLen; i++) TvInt[i] = (short)(((Tvxi[i]<<1)+0x00008000)>>16);
//Convert ImpResp to 16-bit
//Scale by 2^14 & truncate bits right of decimal
ConstFloatToShort(ImpResp,ImpRespInt,SubFrLen,16384.0f);
for (k=k1; k<k2; k++)
{
lPntd = &CorBufInt[k*20];
// Get residual from the excitation buffer
Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);
//Convert RezBuf to 16-bit
ConstFloatToShort(RezBuf,RezBufInt,SubFrLen+ClPitchOrd-1,1.0f);
// Filter the last one (ClPitchOrd-1) using the impulse responce
//TIMER_SPOT_OFF(Conversion);
//TIMER_SPOT_ON(Convolution);
ab2abbcw(&RezBufInt[ClPitchOrd-1], Rx, SubFrLen);
j=0;
for(i=0; i<SubFrLen; i+=2){
Ix[j] =Ix[j+2]=ImpRespInt[SubFrLen-1-i];
Ix[j+1]=Ix[j+3]=ImpRespInt[SubFrLen-2-i];
j+=4;
}
for(i=0; i<16; i++)
Ix[j+i]=0;
ConvMMX(Rx, Ix, Temp, 60);
for(i=0; i<SubFrLen; i++) FltBuf4Int[i] = (short)(((Temp[i]<<1)+0x00008000)>>16);
//TIMER_SPOT_OFF(Convolution);
//TIMER_SPOT_ON(FbufCalc);
// Update the others (ClPitchOrd-2 down to 0)
Acc0l = ((RezBufInt[3]<<13)+0x00004000)>>15;
FltBuf3Int[0] = (short)Acc0l;
Acc0l = ((RezBufInt[2]<<13)+0x00004000)>>15;
FltBuf2Int[0] = (short)Acc0l;
Acc0l = ((RezBufInt[1]<<13)+0x00004000)>>15;
FltBuf1Int[0] = (short)Acc0l;
Acc0l = ((RezBufInt[0]<<13)+0x00004000)>>15;
FltBuf0Int[0] = (short)Acc0l;
DupRezBuf(RezBufInt,RezTmpInt);
FBufCalcInt(FltBuf4Int,FltBuf3Int,ImpRespInt,RezTmpInt,0);
FBufCalcInt(FltBuf3Int,FltBuf2Int,ImpRespInt,RezTmpInt,1);
FBufCalcInt(FltBuf2Int,FltBuf1Int,ImpRespInt,RezTmpInt,2);
FBufCalcInt(FltBuf1Int,FltBuf0Int,ImpRespInt,RezTmpInt,3);
//TIMER_SPOT_OFF(FbufCalc);
//TIMER_SPOT_ON(Dots);
// Compute the cross products with the signal
*lPntd++ = DotMMX60(TvInt,FltBuf0Int)<<1;
*lPntd++ = DotMMX60(TvInt,FltBuf1Int)<<1;
*lPntd++ = DotMMX60(TvInt,FltBuf2Int)<<1;
*lPntd++ = DotMMX60(TvInt,FltBuf3Int)<<1;
*lPntd++ = DotMMX60(TvInt,FltBuf4Int)<<1;
// Compute the energies
*lPntd++ = DotMMX60(FltBuf0Int,FltBuf0Int)<<1;
*lPntd++ = DotMMX60(FltBuf1Int,FltBuf1Int)<<1;
*lPntd++ = DotMMX60(FltBuf2Int,FltBuf2Int)<<1;
*lPntd++ = DotMMX60(FltBuf3Int,FltBuf3Int)<<1;
*lPntd++ = DotMMX60(FltBuf4Int,FltBuf4Int)<<1;
// Compute the between crosses
*lPntd++ = DotMMX60(FltBuf1Int,FltBuf0Int)<<2;
*lPntd++ = DotMMX60(FltBuf2Int,FltBuf0Int)<<2;
*lPntd++ = DotMMX60(FltBuf2Int,FltBuf1Int)<<2;
*lPntd++ = DotMMX60(FltBuf3Int,FltBuf0Int)<<2;
*lPntd++ = DotMMX60(FltBuf3Int,FltBuf1Int)<<2;
*lPntd++ = DotMMX60(FltBuf3Int,FltBuf2Int)<<2;
*lPntd++ = DotMMX60(FltBuf4Int,FltBuf0Int)<<2;
*lPntd++ = DotMMX60(FltBuf4Int,FltBuf1Int)<<2;
*lPntd++ = DotMMX60(FltBuf4Int,FltBuf2Int)<<2;
*lPntd++ = DotMMX60(FltBuf4Int,FltBuf3Int)<<2;
//TIMER_SPOT_OFF(Dots);
}
//Convert k1 through k2 indices of CorBufInt to 16-bit
// values
Acc1l = 0L;
for(j=k1; j<k2; j++)
{
for(i=0; i<20; i++)
{
Acc0l = abs(CorBufInt[j*20 + i]);
if( Acc0l > Acc1l) Acc1l = Acc0l;
}
}
//Need a convert_long_to_short routine
shift = norm(Acc1l);
for(j=k1; j<k2; j++)
{
for(i=0; i<20; i++)
{
CorBufInt[j*20 + i]=CorBufInt[j*20 + i]<<shift;
CorBufInt[j*20 + i] += 0x00008000L; //round up to 16 MSBs
*lPntInt++=(short)(CorBufInt[j*20 + i]>>16);
}
}
/* Test potential error */
Lag1 = Olp - Pstep;
Lag2 = Olp - Pstep + Hb - 1;
off_filt = Test_Err(Lag1, Lag2, CodStat);
Bound[0] = NbFilt085_min + (off_filt << 2);
if(Bound[0] > NbFilt085) Bound[0] = NbFilt085;
Bound[1] = NbFilt170_min + (off_filt << 3);
if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;
Bound[2] = 85; //Use subset table in the case t=2
MaxInt = 0;
for (k=k1; k<k2; k++)
{
// Select Quantization table
t = 0;
if (WrkRate == Rate63)
{
if ((Sfc & 1) == 0)
{
if (Olp-Pstep+k >= SubFrLen-2)
t = 1;
}
else
{
if (Olp >= SubFrLen-2)
t = 1;
}
}
else
t = 1;
/* If Bound=170 and SC_GAIN=TRUE, use 170subset table.
Else, use full table with limited Bound.*/
//if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)
if ((WrkRate == Rate53) && (flags & SC_GAIN) && (Bound[t]==NbFilt170))
t = 2;
// Search for maximum
//t=1;
sPntInt = AcbkGainTablePtrInt[t];
PtrInt = &CorVctInt[k*20];
//TIMER_SPOT_ON(CodeBook);
CodeBkSrch(PtrInt, sPntInt, Bound[t], &Gid, &MaxInt);
//TIMER_SPOT_OFF(CodeBook);
if (t==2)
Gid = GainScramble[Gid];
//else
//Gid = Gid;
Lid = k;
}
// Modify Olp for even sub frames
if ((Sfc & 1) == 0)
{
Olp = Olp - Pstep + Lid;
Lid = Pstep;
}
// Save Lag, Gain and Olp
(*Line).Sfs[Sfc].AcLg = Lid;
(*Line).Sfs[Sfc].AcGn = Gid;
(*Line).Olp[Sfc>>1] = Olp;
//ASM emms;
/* ------------------------------ FLOAT -----------------------*/
// Decode the Acbk contribution and subtract it
Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);
//TIMER_SPOT_ON(LastConvolv);
mx = FloatToShortScaled(RezBuf, RezBufInt, SubFrLen+ClPitchOrd-1, 3);
Tshift = 11 - (mx-126);
if(mx==0) Tshift = 0;
ab2abbcw(RezBufInt, Rx, 60);
ConvMMX(Rx, Ix, Temp, SubFrLen);
//ASM emms;
if (Tshift >=0) {
for(j=0; j<SubFrLen; j++){
Temp[j] = Temp[j]>>Tshift;
Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f;
}
}
else
{
for(j=0; j<SubFrLen; j++){
Temp[j] = Temp[j]<<(-Tshift);
Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f;
}
}
//TIMER_SPOT_OFF(LastConvolv);
}
#endif //COMPILE_MMX
short norm(long L_var1)
{
short var_out;
if (L_var1 == 0L) {
var_out = (short)0;
}
else {
if (L_var1 == (long)0xffffffffL) {
var_out = (short)31;
}
else {
if (L_var1 < 0L) {
L_var1 = ~L_var1;
}
for(var_out = (short)0;L_var1 < 0x40000000L;var_out++)
{
L_var1 <<= 1L;
}
}
}
return(var_out);
}
/*---------------------------------------------------------------------*/
void Find_Acbk(float *Tv, float *ImpResp, float *PrevExc, LINEDEF
*Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat)
{
int i,j,k;
float Acc0,Max;
float RezBuf[SubFrLen+ClPitchOrd-1];
float FltBuf[ClPitchOrd][SubFrLen];
float CorVct[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)];
float *lPnt;
float *sPnt,*Ptr;
int Olp,Lid,Gid,Hb,t,k1,k2;
int Bound[3];
int Lag1, Lag2;
int off_filt;
Olp = (*Line).Olp[Sfc>>1];
Lid = Pstep;
Gid = 0;
Hb = 3 + (Sfc & 1);
// For even frames only
if ((Sfc & 1) == 0)
{
if (Olp == PitchMin)
Olp++;
if (Olp > (PitchMax-5))
Olp = PitchMax-5;
}
if (flags & SC_LAG1)
{
lPnt = &CorVct[20];
k1 = 1;
k2 = 2;
}
else
{
lPnt = CorVct;
k1 = 0;
k2 = Hb;
}
for (k=k1; k<k2; k++)
{
// Get residual from the exitation buffer
Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);
// Filter the last one (ClPitchOrd-1) using the impulse responce
for (i=0; i < SubFrLen; i++)
FltBuf[ClPitchOrd-1][i] = DotRev(&RezBuf[ClPitchOrd-1],ImpResp,i+1);
// Update the others (ClPitchOrd-2 down to 0)
for (i=ClPitchOrd-2; i >= 0; i --)
{
FltBuf[i][0] = RezBuf[i]*0.5f;
Acbk_Filt(&FltBuf[i][1],&FltBuf[i+1][0],RezBuf[i],&ImpResp[1]);
// for (j = 1; j < SubFrLen; j++)
// FltBuf[i][j] = RezBuf[i]*ImpResp[j] + FltBuf[i+1][j-1];
}
// Compute the cross products with the signal
for (i=0; i < ClPitchOrd; i++)
*lPnt++ = DotProd(Tv, FltBuf[i], SubFrLen);
// Compute the energies
for (i=0; i < ClPitchOrd; i++)
*lPnt++ = 0.5f*DotProd(FltBuf[i], FltBuf[i], SubFrLen);
// Compute the between crosses
for (i=1; i < ClPitchOrd; i++)
for (j = 0; j < i; j++)
*lPnt++ = DotProd(FltBuf[i], FltBuf[j], SubFrLen);
}
/* Test potential error */
Lag1 = Olp - Pstep;
Lag2 = Olp - Pstep + Hb - 1;
off_filt = Test_Err(Lag1, Lag2, CodStat);
Bound[0] = NbFilt085_min + (off_filt << 2);
if(Bound[0] > NbFilt085) Bound[0] = NbFilt085;
Bound[1] = NbFilt170_min + (off_filt << 3);
if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;
Bound[2] = 85; //Use subset table in the case t=2
Max = 0.0f;
for (k=k1; k<k2; k++)
{
// Select Quantization table
t = 0;
if (WrkRate == Rate63)
{
if ((Sfc & 1) == 0)
{
if (Olp-Pstep+k >= SubFrLen-2)
t = 1;
}
else
{
if (Olp >= SubFrLen-2)
t = 1;
}
}
else
t = 1;
/* If Bound=170 and SC_GAIN=TRUE, use 170subset table.
Else, use full table with limited Bound.*/
if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)
t = 2;
// Search for maximum
sPnt = AcbkGainTablePtr[t];
Ptr = &CorVct[k*20];
for (i=0; i < Bound[t]; i++)
{
Acc0 = Ptr[0]*sPnt[0] + Ptr[1]*sPnt[1] +
Ptr[2]*sPnt[2] + Ptr[3]*sPnt[3] +
Ptr[4]*sPnt[4] + Ptr[5]*sPnt[5] +
Ptr[6]*sPnt[6] + Ptr[7]*sPnt[7] +
Ptr[8]*sPnt[8] + Ptr[9]*sPnt[9] +
Ptr[10]*sPnt[10] + Ptr[11]*sPnt[11] +
Ptr[12]*sPnt[12] + Ptr[13]*sPnt[13] +
Ptr[14]*sPnt[14] + Ptr[15]*sPnt[15] +
Ptr[16]*sPnt[16] + Ptr[17]*sPnt[17] +
Ptr[18]*sPnt[18] + Ptr[19]*sPnt[19];
sPnt += 20;
if (asint(Acc0) > asint(Max)) // integer cmp, since Max is not negative.
{
Max = Acc0;
if (t==2)
Gid = GainScramble[i];
else
Gid = i;
Lid = k;
}
}
}
// Modify Olp for even sub frames
if ((Sfc & 1) == 0)
{
Olp = Olp - Pstep + Lid;
Lid = Pstep;
}
// Save Lag, Gain and Olp
(*Line).Sfs[Sfc].AcLg = Lid;
(*Line).Sfs[Sfc].AcGn = Gid;
(*Line).Olp[Sfc>>1] = Olp;
// Decode the Acbk contribution and subtract it
Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);
for (i=0; i < SubFrLen; i++)
Tv[i] -= DotRev(RezBuf,ImpResp,i+1);
}
//-----------------------------------------------------------------
void Get_Rez(float *Tv, float *PrevExc, int Lag)
{
int i,n,div,mod;
float *src,*dst;
for (i=0; i < ClPitchOrd/2; i++)
Tv[i] = PrevExc[PitchMax - Lag - ClPitchOrd/2 + i];
n = SubFrLen+ClPitchOrd/2;
div = n/Lag;
mod = n%Lag;
dst = &Tv[ClPitchOrd/2];
src = &PrevExc[PitchMax-Lag];
for (i=0; i<div; i++)
{
memcpy(dst,src,4*Lag);
dst += Lag;
}
memcpy(dst,src,4*mod);
}
//-----------------------------------------------------------------
void Decod_Acbk(float *Tv, float *PrevExc, int Olp, int Lid, int Gid, enum Crate WrkRate)
{
int i;
float RezBuf[SubFrLen+ClPitchOrd-1];
float *sPnt;
Get_Rez(RezBuf, PrevExc, (Olp + Lid) - Pstep);
// Select Quantization tables
i = 0;
if (WrkRate == Rate63)
{
if (Olp >= (SubFrLen-2))
i++;
}
else
i=1;
sPnt = AcbkGainTablePtr[i] + Gid*20;
// Compute output vector
for (i=0; i < SubFrLen; i++)
Tv[i] = RezBuf[i]*sPnt[0] + RezBuf[i+1]*sPnt[1] + RezBuf[i+2]*sPnt[2] +
RezBuf[i+3]*sPnt[3] + RezBuf[i+4]*sPnt[4];
}
//-----------------------------------------------
int Comp_Info(float Buff[60], int Olp)
{
int i;
float Acc0;
float Tenr;
float Ccr,Enr;
int Indx;
if (Olp > (PitchMax-3))
Olp = (PitchMax-3);
Indx = Olp;
Ccr = 0.0f;
for (i=Olp-3; i <= Olp+3; i++)
{
Acc0 = DotProd(&Buff[PitchMax+Frame-2*SubFrLen],
&Buff[PitchMax+Frame-2*SubFrLen-i],2*SubFrLen);
if (Acc0 > Ccr)
{
Ccr = Acc0;
Indx = i;
}
}
// Compute target energy
Tenr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen],
&Buff[PitchMax+Frame-2*SubFrLen],2*SubFrLen);
// Compute best energy
Enr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen-Indx],
&Buff[PitchMax+Frame-2*SubFrLen-Indx],2*SubFrLen);
if (Ccr <= 0.0f)
return 0;
if (((0.125f*Enr*Tenr) - (Ccr*Ccr)) < 0.0f)
return Indx;
else
return 0;
}
//------------------------------------------------------------------
void Regen(float *DataBuff, float *Buff, int Lag, float Gain,
int Ecount, int *Sd)
{
int i;
// Test for clearing
if (Ecount >= ErrMaxNum)
{
for (i = 0; i < Frame; i++)
DataBuff[i] = 0.0f;
for (i = 0; i < Frame+PitchMax; i++)
Buff[i] = 0.0f;
}
else
{
// Interpolate accordingly to the voicing estimation
if (Lag != 0)
{
// Voiced case
for (i = 0; i < Frame; i++)
Buff[PitchMax+i] = Buff[PitchMax-Lag+i];
for (i = 0; i < Frame; i++)
DataBuff[i] = Buff[PitchMax+i] = Buff[PitchMax+i] * 0.75f;
}
else
{
//Unvoiced case
for (i = 0; i < Frame; i++)
DataBuff[i] = Gain*(float)Rand_lbc(Sd)*(1.0f/16384.0f);
//Clear buffer to reset memory
for (i = 0; i < Frame+PitchMax; i++)
Buff[i] = 0.0f;
}
}
}
//------------------------------------------------------
//Comp_Lpf
//------------------------------------------------------
//Find_B
//------------------------------------------------------
//Find_F
//------------------------------------------------------
//Get_Ind
//------------------------------------------------------
//Filt_Lpf
//---------------------------------------------------------------
int search_T0 (int T0, int Gid, float *gain_T0)
{
int T0_mod;
T0_mod = T0+epsi170[Gid];
*gain_T0 = gain170[Gid];
return(T0_mod);
}
/*
**
** Function: Update_Err()
**
** Description: Estimation of the excitation error associated
** to the excitation signal when it is disturbed at
** the decoder, the disturbing signal being filtered
** by the long term synthesis filters
** one value for (SubFrLen/2) samples
** Updates the table CodStat.Err
**
** Links to text: Section
**
** Arguments:
**
** int Olp Center value for pitch delay
** int AcLg Offset value for pitch delay
** int AcGn Index of Gain LT filter
**
** Outputs: None
**
** Return value: None
**
*/
#define MAX 256.0f
void Update_Err(int Olp, int AcLg, int AcGn, CODDEF *CodStat)
{
int i, iz, temp2;
int Lag;
float Worst1, Worst0, wtemp;
float beta,*ptr_tab;
Lag = Olp - Pstep + AcLg;
/* Select Quantization tables */
i = 0 ;
ptr_tab = tabgain85;
if ( CodStat->WrkRate == Rate63 ) {
if ( Olp >= (SubFrLen-2) ) ptr_tab = tabgain170;
}
else {
ptr_tab = tabgain170;
}
beta = ptr_tab[(int)AcGn];
if(Lag <= (SubFrLen/2))
{
Worst0 = CodStat->Err[0]*beta + Err0;
Worst1 = Worst0;
}
else
{
iz = (Lag*1092) >> 15;
temp2 = 30*(iz+1);
if (temp2 != Lag)
{
if(iz == 1)
{
Worst0 = CodStat->Err[0]*beta + Err0;
Worst1 = CodStat->Err[1]*beta + Err0;
if (Worst0 > Worst1)
Worst1 = Worst0;
else
Worst0 = Worst1;
}
else
{
wtemp = CodStat->Err[iz-1]*beta + Err0;
Worst0 = CodStat->Err[iz-2]*beta + Err0;
if (wtemp > Worst0) Worst0 = wtemp;
Worst1 = CodStat->Err[iz]*beta + Err0;
if (wtemp > Worst1) Worst1 = wtemp;
}
}
else
{
Worst0 = CodStat->Err[iz-1]*beta + Err0;
Worst1 = CodStat->Err[iz]*beta + Err0;
}
}
if (Worst0 > MAX) Worst0 = MAX;
if (Worst1 > MAX) Worst1 = MAX;
for(i=4; i>=2; i--)
CodStat->Err[i] = CodStat->Err[i-2];
CodStat->Err[0] = Worst0;
CodStat->Err[1] = Worst1;
return;
}
/*
**
** Function: Test_Err()
**
** Description: Check the error excitation maximum for
** the subframe and computes an index iTest used to
** calculate the maximum nb of filters (in Find_Acbk) :
** Bound = Min(Nmin + iTest x pas, Nmax) , with
** AcbkGainTable085 : pas = 2, Nmin = 51, Nmax = 85
** AcbkGainTable170 : pas = 4, Nmin = 93, Nmax = 170
** iTest depends on the relative difference between
** errmax and a fixed threshold
**
** Links to text: Section
**
** Arguments:
**
** Word16 Lag1 1st long term Lag of the tested zone
** Word16 Lag2 2nd long term Lag of the tested zone
**
** Outputs: None
**
** Return value:
** Word16 index iTest used to compute Acbk number of filters
*/
int Test_Err(int Lag1, int Lag2, CODDEF *CodStat)
{
int i, i1, i2;
int zone1, zone2, iTest;
float Err_max;
i2 = Lag2 + ClPitchOrd/2;
zone2 = i2/30;
i1 = - SubFrLen + 1 + Lag1 - ClPitchOrd/2;
if (i1 <= 0) i1 = 1;
zone1 = i1/30;
Err_max = -1.0f;
for(i=zone2; i>=zone1; i--)
{
if (CodStat->Err[i] > Err_max)
Err_max = CodStat->Err[i];
}
if((Err_max > ThreshErr) || (CodStat->SinDet < 0 ) )
{
iTest = 0;
//ount_clip++;
}
else
{
iTest = (int)(ThreshErr - Err_max);
}
return(iTest);
}
#if COMPILE_MMX
#if ASM_FACBK
int DotMMX60(short *ind, short *oud)
{
int dotprod;
#define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define acc0 mm6
#define inx esi
#define oux edi
#define dot eax
#define jcnt ebx
#define l(n) ASM movq reg##n,QP[inx+8*n]
#define m(n) ASM pmaddwd reg##n,QP[oux+8*n]
#define a(n) ASM paddd acc0,reg##n
ASM
{
mov inx,ind;
mov oux,oud;
mov jcnt,5;
}
//Begin loop
ASM pxor acc0,acc0;
ASM pxor reg1,reg1; //make first a(1) a nop
ASM pxor reg2,reg2; //make first a(2) a nop
inner:
//------------------
l(0);
a(1);
m(0);
l(1);
a(2);
m(1);
l(2);
a(0);
m(2);
//-------------------
ASM add inx,24;
ASM add oux,24;
ASM sub jcnt,1;
ASM jg inner;
a(1);
a(2);
ASM
{
//Add the two halves of acc0
movq reg0,acc0;
psrlq acc0,32;
paddd acc0,reg0;
movd dot,acc0; //store
mov dotprod,dot
}
ASM emms;
return(dotprod);
#undef reg0
#undef reg1
#undef reg2
#undef acc0
#undef inx
#undef oux
#undef dot
#undef jcnt
#undef l
#undef m
#undef a
}
#else
int DotMMX60(short *in, short *out)
{
int dotprod;
int j;
dotprod=0;
for(i=0; i < 60; i++)
{
dotprod += in[j]*out[j];
}
return(dotprod);
}
#endif
#if ASM_FACBK
void DupRezBuf(short *rezbuf, short *reztemp)
{
#define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define reg3 mm3
#define rbuf edi
#define rztmp esi
//rezbuf duplication operations
#define cr(r0,r1) ASM movq reg##r0,reg##r1
#define uph(r0) ASM punpckhwd reg##r0,reg##r0
#define upl(r0) ASM punpcklwd reg##r0,reg##r0
#define sto(r0,i) ASM movq QP[rztmp+8*i],reg##r0
#define sl(r0) ASM psllw reg##r0,1
#define l(r0) ASM movq reg##r0,QP[rbuf]
//Duplicate first 4 rezbuf values 4 times each
// and store into 4 QWORDS in reztemp
//Multiply by two while we're at it
ASM mov rbuf,rezbuf;
ASM mov rztmp,reztemp;
l(0);
sl(0);
cr(2,0);
uph(0);
upl(2);
cr(1,0);
cr(3,2);
uph(0);
sto(0,0);
upl(1);
sto(1,1);
uph(2);
sto(2,2);
upl(3);
sto(3,3);
ASM emms;
}
#undef reg0
#undef reg1
#undef reg2
#undef reg3
#undef rbuf
#undef rztmp
#undef cr
#undef uph
#undef upl
#undef sto
#undef sl
#undef l
#endif
#if ASM_FACBK
void FBufCalcInt(short *fi, short *fo, short *impresp, short *reztemp, int n)
{
#define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define reg3 mm3
#define reg4 mm4
#define reg5 mm5
#define reg6 mm6
#define reg7 mm1
#define reg8 mm7
#define fbufi esi
#define rbuf edi
#define imp edx
#define fbufo ebx
#define jcnt ecx
#define rzv eax
//Diagonal array operations
#define l1(r0,j) ASM movq reg##r0,QP[fbufi+8*j]
#define l2(r0,j) ASM movq reg##r0,QP[fbufi+8+8*j]
#define c3(r0) ASM movq reg##r0,QP[rbuf+8*rzv]
#define m1(r0,j) ASM pmulhw reg##r0,QP[imp+8+8*j]
#define a1(r0,r1) ASM paddsw reg##r0,reg##r1
#define sto(r0,j) ASM movq QP[fbufo+8+8*j], reg##r0
#define s1(r0) ASM psrlq reg##r0,48
#define s2(r0) ASM psllq reg##r0,16
#define or(r0,r1) ASM por reg##r0,reg##r1
//Loop setup
ASM
{
mov rbuf,reztemp
mov jcnt,5;
mov fbufi,fi;
mov fbufo,fo;
mov imp,impresp;
mov rzv,n
}
//Compute initial values
//Zero-th QWORD is different
ASM
{
movq reg0,QP[fbufo];
psllq reg0,48;
psrlq reg0,48;
//zero-th part of fbufo now in reg0
movq reg2,QP[rbuf+8*rzv];
pmulhw reg2,QP[imp+2];
paddsw reg2,QP[fbufi];
psllq reg2,16;
por reg0,reg2;
movq QP[fbufo],reg0;
}
//begin loop
l2(0,0);
l1(1,0);
s2(0);
s1(1);
c3(2);
m1(2,0);
l2(3,1);
l1(4,1);
s2(3);
s1(4);
or(0,1);
inner:
//-------------------------
l2(6,2);
a1(0,2);
c3(5);
m1(5,1);
or(3,4);
l1(7,2);
s2(6);
sto(0,0);
s1(7);
l2(0,3);
a1(3,5);
c3(8);
m1(8,2);
or(6,7);
l1(1,3);
s2(0);
sto(3,1);
s1(1);
l2(3,4);
a1(6,8);
c3(2);
m1(2,3);
or(0,1);
l1(4,4);
s2(3);
sto(6,2);
s1(4);
//-------------------------
ASM add fbufo,24;
ASM add fbufi,24;
ASM add imp,24;
ASM sub jcnt,1;
ASM jg inner;
ASM emms;
}
#undef reg0
#undef reg1
#undef reg2
#undef reg3
#undef reg4
#undef reg5
#undef reg6
#undef reg7
#undef reg8
#undef fbufi
#undef rbuf
#undef imp
#undef fbufo
#undef jcnt
#undef rzv
#undef l1
#undef l2
#undef c3
#undef m1
#undef a1
#undef sto
#undef s1
#undef s2
#undef or
#else
void FBufCalcInt(short *fi, short *fo, short *impresp, short *rezbuf, short *reztemp, int n)
{
long Acc0l;
int j;
#define MAX16 32767
#define MIN16 -32768
for(j=1; j<SubFrLen; j++)
{
Acc0l = fi[j-1];
Acc0l += (((rezbuf[4-n]<<1)*impresp[j]))>>16;
if (Acc0l > MAX16) Acc0l = MAX16;
else if(Acc0l < MIN16) Acc0l = MIN16;
fo[j] = (short)(Acc0l);
}
}
#endif
#if ASM_FACBK
//#if 0
void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max)
{
#define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define reg3 mm3
#define reg4 mm4
#define acc1 mm5
#define acc0 mm6
#define gdx mm3
#define gd mm7
#define icx mm2
#define lp esi
#define sp edi
#define maxx eax
#define gidx edx
#define icnt ebx
// In the following macros, 'n' is the column number.
#define l(n) ASM movq reg##n,QP[lp+8*n]
#define m(n) ASM pmaddwd reg##n,QP[sp+8*n]
#define a(n) ASM paddd acc0,reg##n
ASM
{
mov sp,spint;
mov lp,lpint;
mov icnt,numvecs;
mov gidx,gid;
mov maxx,max;
}
ASM movd gd,numvecs;//load gd with top codebook index
ASM movd acc1,DP[maxx];//load acc1 with previous max
//Begin loop
outer:
//inner:
ASM pxor acc0,acc0;
ASM pxor reg1,reg1; //make first a(1) a nop
ASM pxor reg2,reg2; //make first a(2) a nop
//--------------------------
l(0);
a(1);
m(0);
l(1);
a(2);
m(1);
l(2);
a(0);
m(2);
l(3);
a(1);
m(3);
l(4);
a(2);
m(4);
ASM add sp,40;
a(3);
ASM movq gdx,gd;
ASM movd icx,icnt;
a(4);
ASM
{
movq reg0,acc0;
psrlq acc0,32;
pxor gd,icx;//gd=MASK
paddd acc0,reg0;
movq reg0,acc0; //copy acc0
movq reg1,acc1; //copy old max
pxor reg1,acc0
pcmpgtd reg0,acc1; //reg0=0xFF or 0x00
pand reg1,reg0; //reg1=MASK or 0x00
pxor acc1,reg1; //acc1=acc0 or acc1
pand gd,reg0; //gd=MASK or 0x00
pxor gd,gdx; //gd=icnt or previous value
sub icnt,1;
jg outer;
}
ASM movd reg0,numvecs;
ASM psubd reg0,gd;
ASM movd DP[gidx],reg0;//return gid
ASM movd DP[maxx],acc1;//return max
ASM emms;
}
#undef reg0
#undef reg1
#undef reg2
#undef reg3
#undef reg4
#undef acc1
#undef acc0
#undef gdx
#undef gd
#undef icx
#undef lp
#undef sp
#undef maxx
#undef gidx
#undef icnt
#undef l
#undef m
#undef a
#else
void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max)
{
int acc0;
int i,j;
for(i=0; i < numvecs; i++)
{
acc0 = 0;
for(j=0; j<20; j++)
acc0 += lpint[j]*spint[j];
if (acc0 > *max)
{
*max = acc0;
*gid = i;
}
spint += 20;
}
}
#endif
#endif //COMPILE_MMX