// ITU-T G.723 Floating Point Speech Coder ANSI C Source Code. Version 1.00
// copyright (c) 1995, AudioCodes, DSP Group, France Telecom,
// Universite de Sherbrooke, Intel Corporation. All rights reserved.
#include "opt.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <memory.h>
#include "typedef.h"
#include "cst_lbc.h"
#include "tab_lbc.h"
#include "util_lbc.h"
#include "exc_lbc.h"
#include "timer.h"
#include "mmxutil.h"
// This file contains pitch and excitation related functions.
int Estim_Int(float *Dpnt, int Start) { int i,k;
#define NCOR (PitchMax+1-PitchMin) // = 128 (rounded up to mult of 4)
#define NTAPS (2*SubFrLen+12) // = 132
int Pr,Indx = PitchMin; float MaxE = 1.0f; float MaxC = 0.0f; float E,C2,E2,Diff; int corr[NCOR];
typedef struct { short taps[4][NTAPS]; //** These two arrays need
short temp[PitchMax-3+2*SubFrLen]; //** to be 8-byte aligned
double foo; } EstimStruct;
EstimStruct est,*e; e = &est;
ASM { mov eax,e; add e,7; and e,0fffffff8h; mov e,eax; }
// Convert just the necessary portion of Dpnt to 16-bit integers,
// store the result in 'temp'. 4 guard bits are needed since the
// correlations are length 120, which means 7 guard bits are needed.
// So we use 4 so that 4+4=8 guard bits are present in the product.
MakeAligned4(&e->temp[PitchMax-3],e->taps[0],2*NTAPS); MakeAligned2(&e->temp[PitchMax-4],e->taps[1],2*NTAPS); MakeAligned0(&e->temp[PitchMax-5],e->taps[2],2*NTAPS); MakeAligned6(&e->temp[PitchMax-6],e->taps[3],2*NTAPS);
for (i=0; i<4; i++) { for (k=0; k<i; k++) e->taps[i][k] = 0; for (k=NTAPS-12+i; k<NTAPS; k++) e->taps[i][k] = 0; }
// Compute cross-correlations, store in corr[] array
CorrelateInt4(e->taps[0],e->temp,corr,NTAPS-12,NCOR>>2); for (i=1; i<4; i++) CorrelateInt4(e->taps[i],e->temp,&corr[i],NTAPS,NCOR>>2);
// Now do the actual pitch search.
Pr = Start - PitchMin; k = PitchMax-PitchMin-3; E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen); // first energy value
for (i=0; i < (PitchMax-2-PitchMin); i++) { // Update energy
E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];
// Check for new maximum
if (corr[k] > 0) { C2 = ((float)corr[k]) * ((float)corr[k]); E2 = C2*MaxE; Diff = (E2 - E*MaxC)*4.0f; if (asint(Diff) > asint(E2) || (Diff > 0.0f && ((i - Indx) < PitchMin))) { Indx = i; MaxE = E; MaxC = C2; } } Pr--; k--; } return(Indx+PitchMin); }
int Estim_Pitch(float *Dpnt, int Start) { int i;
int Pr,Indx = PitchMin; float MaxE = 1.0f; float MaxC = 0.0f; float E,C,C2,E2,Diff; int *dptr,*eptr;
dptr = (int *)&Diff; eptr = (int *)&E2; Pr = Start - PitchMin + 1;
// Init the energy estimate
E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen);
// Main Open loop pitch search loop
for (i=PitchMin; i <= PitchMax-3; i++) { Pr--;
// Update energy, compute cross
C = DotProd(&Dpnt[Start],&Dpnt[Pr],2*SubFrLen); E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr]; C2 = C*C;
// Check for new maximum
E2 = C2*MaxE; Diff = (E2 - E*MaxC)*4.0f; if (C > 0.0f && E > 0.0f && (*dptr > *eptr || (Diff > 0.0f && ((i - Indx) < PitchMin)))) { Indx = i; MaxE = E; MaxC = C2; } } return Indx; }
PWDEF Comp_Pw(float *Dpnt, int Start, int Olp) {
int i,k; float Energy,C,E,C2,MaxE,MaxC2,MaxC,Gopt; PWDEF Pw;
// Compute target energy
Energy = DotProd(&Dpnt[Start],&Dpnt[Start],SubFrLen);
// Find position with maximum C2/E value
MaxE = 1.0f; MaxC = 0.0f; MaxC2 = 0.0f; Pw.Indx = -1; Pw.Gain = 0.0f; k = Start - (Olp-PwRange); E = DotProd(&Dpnt[k],&Dpnt[k],SubFrLen); for (i=0; i <= 2*PwRange; i++) { C = DotProd(&Dpnt[Start],&Dpnt[k],SubFrLen);
if (E > 0.0f && C > 0.0f) { C2 = C*C; if (C2*MaxE > E*MaxC2) { Pw.Indx = i; MaxE = E; MaxC = C; MaxC2 = C2; } } k--;
if (k < 0) { break; }
E = E - Dpnt[k+SubFrLen]*Dpnt[k+SubFrLen] + Dpnt[k]*Dpnt[k];
if (Pw.Indx == -1) { Pw.Indx = Olp; return Pw; }
Pw.Gain = 0.0f; if (MaxC2 > MaxE*Energy*0.375f) { if (MaxC > MaxE || MaxE == 0.0f) Gopt = 1.0f; else Gopt = (float) fabs(MaxC)/MaxE;
Pw.Gain = 0.3125f*Gopt; } Pw.Indx = Olp - PwRange + Pw.Indx; return Pw;
void Filt_Pw(float *DataBuff, float *Dpnt, int Start, PWDEF Pw) { int i;
// Perform the harmonic weighting
for (i=0; i < SubFrLen; i++) DataBuff[Start+i] = Dpnt[PitchMax+Start+i] - Pw.Gain*Dpnt[PitchMax+Start-Pw.Indx+i]; }
void Find_Fcbk(float *Dpnt, float *ImpResp, LINEDEF *Line, int Sfc, enum Crate WrkRate, int flags, int UseMMX) { int i; int Srate,T0_acelp; float gain_T0;
BESTDEF Best = {0};
switch(WrkRate) { case Rate63: Srate = Nb_puls[Sfc]; Best.MaxErr = -99999999.9f;
if (flags & SC_FINDB) { if ((*Line).Olp[Sfc>>1] < SubFrLen-2) Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]); else Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen); } else { Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen); if ((*Line).Olp[Sfc>>1] < SubFrLen-2) Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]); }
// Reconstruct the excitation
for (i=0; i < SubFrLen; i++) Dpnt[i] = 0.0f;
for (i=0; i < Srate; i++) Dpnt[Best.Ploc[i]] = Best.Pamp[i];
// Code the excitation
Fcbk_Pack(Dpnt, &((*Line).Sfs[Sfc]), &Best, Srate);
if (Best.UseTrn == 1) Gen_Trn(Dpnt, Dpnt, (*Line).Olp[Sfc>>1]); break; case Rate53:
T0_acelp = search_T0 ((*Line).Olp[Sfc>>1]-1+(*Line).Sfs[Sfc].AcLg, (*Line).Sfs[Sfc].AcGn, &gain_T0);
if (UseMMX) { (*Line).Sfs[Sfc].Ppos = ACELP_LBC_code_int(Dpnt, ImpResp, T0_acelp, Dpnt, &(*Line).Sfs[Sfc].Mamp, &(*Line).Sfs[Sfc].Grid, &(*Line).Sfs[Sfc].Pamp, gain_T0, flags); } else #endif //COMPILE_MMX
{ (*Line).Sfs[Sfc].Ppos = ACELP_LBC_code(Dpnt, ImpResp, T0_acelp, Dpnt, &(*Line).Sfs[Sfc].Mamp, &(*Line).Sfs[Sfc].Grid, &(*Line).Sfs[Sfc].Pamp, gain_T0, flags);
} (*Line).Sfs[Sfc].Tran = 0; break; }
return; }
void Fcbk_Unpk(float *Tv, SFSDEF Sfs, int Olp, int Sfc, enum Crate WrkRate) { int i,j,Np; float Tv_tmp[SubFrLen+4]; float acelp_gain,gain_T0; int acelp_sign, acelp_shift, acelp_pos; int offset, ipos, T0_acelp; Word32 Acc0;
switch(WrkRate) { case Rate63: { Np = Nb_puls[Sfc];
for (i=0; i < SubFrLen; i++) Tv[i] = 0.0f;
if (Sfs.Ppos >= MaxPosTable[Sfc]) return;
// Decode the amplitudes and positions
j = MaxPulseNum - Np; Acc0 = Sfs.Ppos;
for (i = 0; i < SubFrLen/Sgrid; i++) { Acc0 -= CombinatorialTable[j][i];
if (Acc0 < (Word32) 0) { Acc0 += CombinatorialTable[j][i]; j++;
if ((Sfs.Pamp & (1 << (MaxPulseNum-j))) != 0) Tv[Sfs.Grid + Sgrid*i] = -FcbkGainTable[Sfs.Mamp]; else Tv[Sfs.Grid + Sgrid*i] = FcbkGainTable[Sfs.Mamp];
if (j == MaxPulseNum) break; } }
if (Sfs.Tran == 1) Gen_Trn(Tv, Tv, Olp); break; }
case Rate53: {
for (i = 0; i < SubFrLen+4; i++) Tv_tmp[i] = 0.0f;
acelp_gain = FcbkGainTable[Sfs.Mamp]; acelp_shift = Sfs.Grid; acelp_sign = Sfs.Pamp; acelp_pos = Sfs.Ppos; offset = 0; for(i=0; i<4; i++) { ipos = (acelp_pos & 7); ipos = (ipos << 3) + acelp_shift + offset; if((acelp_sign & 1)== 1) Tv_tmp[ipos] = acelp_gain; else Tv_tmp[ipos] = -acelp_gain; offset += 2; acelp_pos = acelp_pos >> 3; acelp_sign = acelp_sign >> 1; } for (i = 0; i < SubFrLen; i++) Tv[i] = Tv_tmp[i]; T0_acelp = search_T0(Olp-1+Sfs.AcLg, Sfs.AcGn, &gain_T0); if (T0_acelp < SubFrLen-2) { for (i = T0_acelp; i < SubFrLen; i++) Tv[i] += Tv[i-T0_acelp]*gain_T0; } break; } } return; }
void Acbk_Filt(float *output,float *input,float fac,float *impresp) { #if OPT_ACBKF
ASM { push esi; push edi; push ebx;
mov eax,58; mov esi,input; mov edi,output; mov ebx,impresp;
loop1: fld DP[ebx+4*eax]; fmul fac; fld DP[ebx+4*eax-4]; fmul fac; fld DP[ebx+4*eax-8]; fmul fac; fld DP[ebx+4*eax-12]; fmul fac; // a3 a2 a1 a0
fxch ST(3); fadd DP[esi+4*eax]; // b0 a2 a1 a3
fxch ST(2); fadd DP[esi+4*eax-4]; // b1 a2 b0 a3
fxch ST(1); fadd DP[esi+4*eax-8]; // b2 b1 b0 a3
fxch ST(3); fadd DP[esi+4*eax-12]; // b3 b1 b0 b2
fxch ST(2); fstp DP[edi+4*eax]; // b1 b3 b2
fstp DP[edi+4*eax-4]; // b3 b2
fxch ST(1); fstp DP[edi+4*eax-8]; fstp DP[edi+4*eax-12];
sub eax,4; cmp eax,2; jg loop1;
pop ebx; pop edi; pop esi; }
int i;
for (i=58; i>2; i-=4) { output[i-0] = fac*impresp[i-0] + input[i-0]; output[i-1] = fac*impresp[i-1] + input[i-1]; output[i-2] = fac*impresp[i-2] + input[i-2]; output[i-3] = fac*impresp[i-3] + input[i-3]; } #endif
output[2] = fac*impresp[2] + input[2]; output[1] = fac*impresp[1] + input[1]; output[0] = fac*impresp[0] + input[0]; } //---------------------------------------------------------------------
void Find_AcbkInt(float *Tv, float *ImpResp, float *PrevExc, LINEDEF *Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat) { int i,j,k;
float RezBuf[SubFrLen+ClPitchOrd-1];
short TvInt[SubFrLen]; int Tvxi[SubFrLen];
short *lPntInt,*sPntInt,*PtrInt;
int CorBufInt[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)]; int *lPntd;
long Acc0l, Acc1l; int Olp,Lid,Gid,Hb,t,k1,k2; int Bound[3]; int Lag1, Lag2; int MaxInt,off_filt;
int shift,Tshift,mx;
DECLARE_SHORT(FltBuf0Int,63); DECLARE_SHORT(FltBuf1Int,63); DECLARE_SHORT(FltBuf2Int,63); DECLARE_SHORT(FltBuf3Int,63); DECLARE_SHORT(FltBuf4Int,63) ; DECLARE_SHORT(CorVctInt,4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)) ; DECLARE_SHORT(RezTmpInt,16) ; DECLARE_SHORT(RezBufInt,SubFrLen+ClPitchOrd-1) ; DECLARE_SHORT(ImpRespInt,63);
DECLARE_SHORT(Ix,2*SubFrLen+16) ; DECLARE_SHORT(Rx,2*SubFrLen+16); DECLARE_INT(Temp,64);
Olp = (*Line).Olp[Sfc>>1]; Lid = Pstep; Gid = 0; Hb = 3 + (Sfc & 1);
// For even frames only
if ((Sfc & 1) == 0) { if (Olp == PitchMin) Olp++; if (Olp > (PitchMax-5)) Olp = PitchMax-5; }
if (flags & SC_LAG1) { lPntInt = &CorVctInt[20]; k1 = 1; k2 = 2; } else { lPntInt = CorVctInt; k1 = 0; k2 = Hb; }
//Convert Tv to 16-bit
ConstFloatToInt(Tv, Tvxi, SubFrLen, 32768.0f); for(i=0; i<SubFrLen; i++) TvInt[i] = (short)(((Tvxi[i]<<1)+0x00008000)>>16);
//Convert ImpResp to 16-bit
//Scale by 2^14 & truncate bits right of decimal
for (k=k1; k<k2; k++) { lPntd = &CorBufInt[k*20];
// Get residual from the excitation buffer
Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);
//Convert RezBuf to 16-bit
// Filter the last one (ClPitchOrd-1) using the impulse responce
ab2abbcw(&RezBufInt[ClPitchOrd-1], Rx, SubFrLen);
j=0; for(i=0; i<SubFrLen; i+=2){ Ix[j] =Ix[j+2]=ImpRespInt[SubFrLen-1-i]; Ix[j+1]=Ix[j+3]=ImpRespInt[SubFrLen-2-i]; j+=4; } for(i=0; i<16; i++) Ix[j+i]=0; ConvMMX(Rx, Ix, Temp, 60); for(i=0; i<SubFrLen; i++) FltBuf4Int[i] = (short)(((Temp[i]<<1)+0x00008000)>>16);
// Update the others (ClPitchOrd-2 down to 0)
Acc0l = ((RezBufInt[3]<<13)+0x00004000)>>15; FltBuf3Int[0] = (short)Acc0l;
Acc0l = ((RezBufInt[2]<<13)+0x00004000)>>15; FltBuf2Int[0] = (short)Acc0l;
Acc0l = ((RezBufInt[1]<<13)+0x00004000)>>15; FltBuf1Int[0] = (short)Acc0l;
Acc0l = ((RezBufInt[0]<<13)+0x00004000)>>15; FltBuf0Int[0] = (short)Acc0l;
FBufCalcInt(FltBuf4Int,FltBuf3Int,ImpRespInt,RezTmpInt,0); FBufCalcInt(FltBuf3Int,FltBuf2Int,ImpRespInt,RezTmpInt,1); FBufCalcInt(FltBuf2Int,FltBuf1Int,ImpRespInt,RezTmpInt,2); FBufCalcInt(FltBuf1Int,FltBuf0Int,ImpRespInt,RezTmpInt,3);
// Compute the cross products with the signal
*lPntd++ = DotMMX60(TvInt,FltBuf0Int)<<1; *lPntd++ = DotMMX60(TvInt,FltBuf1Int)<<1; *lPntd++ = DotMMX60(TvInt,FltBuf2Int)<<1; *lPntd++ = DotMMX60(TvInt,FltBuf3Int)<<1; *lPntd++ = DotMMX60(TvInt,FltBuf4Int)<<1;
// Compute the energies
*lPntd++ = DotMMX60(FltBuf0Int,FltBuf0Int)<<1; *lPntd++ = DotMMX60(FltBuf1Int,FltBuf1Int)<<1; *lPntd++ = DotMMX60(FltBuf2Int,FltBuf2Int)<<1; *lPntd++ = DotMMX60(FltBuf3Int,FltBuf3Int)<<1; *lPntd++ = DotMMX60(FltBuf4Int,FltBuf4Int)<<1;
// Compute the between crosses
*lPntd++ = DotMMX60(FltBuf1Int,FltBuf0Int)<<2;
*lPntd++ = DotMMX60(FltBuf2Int,FltBuf0Int)<<2; *lPntd++ = DotMMX60(FltBuf2Int,FltBuf1Int)<<2;
*lPntd++ = DotMMX60(FltBuf3Int,FltBuf0Int)<<2; *lPntd++ = DotMMX60(FltBuf3Int,FltBuf1Int)<<2; *lPntd++ = DotMMX60(FltBuf3Int,FltBuf2Int)<<2;
*lPntd++ = DotMMX60(FltBuf4Int,FltBuf0Int)<<2; *lPntd++ = DotMMX60(FltBuf4Int,FltBuf1Int)<<2; *lPntd++ = DotMMX60(FltBuf4Int,FltBuf2Int)<<2; *lPntd++ = DotMMX60(FltBuf4Int,FltBuf3Int)<<2;
//Convert k1 through k2 indices of CorBufInt to 16-bit
// values
Acc1l = 0L; for(j=k1; j<k2; j++) { for(i=0; i<20; i++) { Acc0l = abs(CorBufInt[j*20 + i]); if( Acc0l > Acc1l) Acc1l = Acc0l; } }
//Need a convert_long_to_short routine
shift = norm(Acc1l); for(j=k1; j<k2; j++) { for(i=0; i<20; i++) { CorBufInt[j*20 + i]=CorBufInt[j*20 + i]<<shift; CorBufInt[j*20 + i] += 0x00008000L; //round up to 16 MSBs
*lPntInt++=(short)(CorBufInt[j*20 + i]>>16); } }
/* Test potential error */ Lag1 = Olp - Pstep; Lag2 = Olp - Pstep + Hb - 1;
off_filt = Test_Err(Lag1, Lag2, CodStat);
Bound[0] = NbFilt085_min + (off_filt << 2); if(Bound[0] > NbFilt085) Bound[0] = NbFilt085; Bound[1] = NbFilt170_min + (off_filt << 3); if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;
Bound[2] = 85; //Use subset table in the case t=2
MaxInt = 0;
for (k=k1; k<k2; k++) {
// Select Quantization table
t = 0; if (WrkRate == Rate63) { if ((Sfc & 1) == 0) { if (Olp-Pstep+k >= SubFrLen-2) t = 1; } else { if (Olp >= SubFrLen-2) t = 1; } } else t = 1;
/* If Bound=170 and SC_GAIN=TRUE, use 170subset table.
Else, use full table with limited Bound.*/ //if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)
if ((WrkRate == Rate53) && (flags & SC_GAIN) && (Bound[t]==NbFilt170)) t = 2; // Search for maximum
sPntInt = AcbkGainTablePtrInt[t]; PtrInt = &CorVctInt[k*20];
CodeBkSrch(PtrInt, sPntInt, Bound[t], &Gid, &MaxInt);
if (t==2) Gid = GainScramble[Gid]; //else
//Gid = Gid;
Lid = k; }
// Modify Olp for even sub frames
if ((Sfc & 1) == 0) { Olp = Olp - Pstep + Lid; Lid = Pstep; }
// Save Lag, Gain and Olp
(*Line).Sfs[Sfc].AcLg = Lid; (*Line).Sfs[Sfc].AcGn = Gid; (*Line).Olp[Sfc>>1] = Olp;
//ASM emms;
/* ------------------------------ FLOAT -----------------------*/
// Decode the Acbk contribution and subtract it
Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);
mx = FloatToShortScaled(RezBuf, RezBufInt, SubFrLen+ClPitchOrd-1, 3); Tshift = 11 - (mx-126); if(mx==0) Tshift = 0;
ab2abbcw(RezBufInt, Rx, 60); ConvMMX(Rx, Ix, Temp, SubFrLen);
//ASM emms;
if (Tshift >=0) { for(j=0; j<SubFrLen; j++){ Temp[j] = Temp[j]>>Tshift; Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f; } } else { for(j=0; j<SubFrLen; j++){ Temp[j] = Temp[j]<<(-Tshift); Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f; } }
} #endif //COMPILE_MMX
short norm(long L_var1) { short var_out;
if (L_var1 == 0L) { var_out = (short)0; } else { if (L_var1 == (long)0xffffffffL) { var_out = (short)31; } else { if (L_var1 < 0L) { L_var1 = ~L_var1; }
for(var_out = (short)0;L_var1 < 0x40000000L;var_out++) { L_var1 <<= 1L; } } }
return(var_out); } /*---------------------------------------------------------------------*/ void Find_Acbk(float *Tv, float *ImpResp, float *PrevExc, LINEDEF *Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat) { int i,j,k;
float Acc0,Max;
float RezBuf[SubFrLen+ClPitchOrd-1]; float FltBuf[ClPitchOrd][SubFrLen]; float CorVct[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)]; float *lPnt; float *sPnt,*Ptr; int Olp,Lid,Gid,Hb,t,k1,k2; int Bound[3]; int Lag1, Lag2; int off_filt;
Olp = (*Line).Olp[Sfc>>1]; Lid = Pstep; Gid = 0; Hb = 3 + (Sfc & 1);
// For even frames only
if ((Sfc & 1) == 0) { if (Olp == PitchMin) Olp++; if (Olp > (PitchMax-5)) Olp = PitchMax-5; }
if (flags & SC_LAG1) { lPnt = &CorVct[20]; k1 = 1; k2 = 2; } else { lPnt = CorVct; k1 = 0; k2 = Hb; }
for (k=k1; k<k2; k++) {
// Get residual from the exitation buffer
Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);
// Filter the last one (ClPitchOrd-1) using the impulse responce
for (i=0; i < SubFrLen; i++) FltBuf[ClPitchOrd-1][i] = DotRev(&RezBuf[ClPitchOrd-1],ImpResp,i+1); // Update the others (ClPitchOrd-2 down to 0)
for (i=ClPitchOrd-2; i >= 0; i --) { FltBuf[i][0] = RezBuf[i]*0.5f; Acbk_Filt(&FltBuf[i][1],&FltBuf[i+1][0],RezBuf[i],&ImpResp[1]); // for (j = 1; j < SubFrLen; j++)
// FltBuf[i][j] = RezBuf[i]*ImpResp[j] + FltBuf[i+1][j-1];
// Compute the cross products with the signal
for (i=0; i < ClPitchOrd; i++) *lPnt++ = DotProd(Tv, FltBuf[i], SubFrLen);
// Compute the energies
for (i=0; i < ClPitchOrd; i++) *lPnt++ = 0.5f*DotProd(FltBuf[i], FltBuf[i], SubFrLen);
// Compute the between crosses
for (i=1; i < ClPitchOrd; i++) for (j = 0; j < i; j++) *lPnt++ = DotProd(FltBuf[i], FltBuf[j], SubFrLen);
/* Test potential error */ Lag1 = Olp - Pstep; Lag2 = Olp - Pstep + Hb - 1;
off_filt = Test_Err(Lag1, Lag2, CodStat);
Bound[0] = NbFilt085_min + (off_filt << 2); if(Bound[0] > NbFilt085) Bound[0] = NbFilt085; Bound[1] = NbFilt170_min + (off_filt << 3); if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;
Bound[2] = 85; //Use subset table in the case t=2
Max = 0.0f;
for (k=k1; k<k2; k++) {
// Select Quantization table
t = 0; if (WrkRate == Rate63) { if ((Sfc & 1) == 0) { if (Olp-Pstep+k >= SubFrLen-2) t = 1; } else { if (Olp >= SubFrLen-2) t = 1; } } else t = 1;
/* If Bound=170 and SC_GAIN=TRUE, use 170subset table.
Else, use full table with limited Bound.*/ if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170) t = 2; // Search for maximum
sPnt = AcbkGainTablePtr[t]; Ptr = &CorVct[k*20];
for (i=0; i < Bound[t]; i++) { Acc0 = Ptr[0]*sPnt[0] + Ptr[1]*sPnt[1] + Ptr[2]*sPnt[2] + Ptr[3]*sPnt[3] + Ptr[4]*sPnt[4] + Ptr[5]*sPnt[5] + Ptr[6]*sPnt[6] + Ptr[7]*sPnt[7] + Ptr[8]*sPnt[8] + Ptr[9]*sPnt[9] + Ptr[10]*sPnt[10] + Ptr[11]*sPnt[11] + Ptr[12]*sPnt[12] + Ptr[13]*sPnt[13] + Ptr[14]*sPnt[14] + Ptr[15]*sPnt[15] + Ptr[16]*sPnt[16] + Ptr[17]*sPnt[17] + Ptr[18]*sPnt[18] + Ptr[19]*sPnt[19];
sPnt += 20;
if (asint(Acc0) > asint(Max)) // integer cmp, since Max is not negative.
{ Max = Acc0;
if (t==2) Gid = GainScramble[i]; else Gid = i;
Lid = k; } } }
// Modify Olp for even sub frames
if ((Sfc & 1) == 0) { Olp = Olp - Pstep + Lid; Lid = Pstep; }
// Save Lag, Gain and Olp
(*Line).Sfs[Sfc].AcLg = Lid; (*Line).Sfs[Sfc].AcGn = Gid; (*Line).Olp[Sfc>>1] = Olp;
// Decode the Acbk contribution and subtract it
Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);
for (i=0; i < SubFrLen; i++) Tv[i] -= DotRev(RezBuf,ImpResp,i+1);
void Get_Rez(float *Tv, float *PrevExc, int Lag) { int i,n,div,mod; float *src,*dst;
for (i=0; i < ClPitchOrd/2; i++) Tv[i] = PrevExc[PitchMax - Lag - ClPitchOrd/2 + i];
n = SubFrLen+ClPitchOrd/2; div = n/Lag; mod = n%Lag;
dst = &Tv[ClPitchOrd/2]; src = &PrevExc[PitchMax-Lag]; for (i=0; i<div; i++) { memcpy(dst,src,4*Lag); dst += Lag; } memcpy(dst,src,4*mod); }
void Decod_Acbk(float *Tv, float *PrevExc, int Olp, int Lid, int Gid, enum Crate WrkRate) { int i;
float RezBuf[SubFrLen+ClPitchOrd-1]; float *sPnt;
Get_Rez(RezBuf, PrevExc, (Olp + Lid) - Pstep);
// Select Quantization tables
i = 0; if (WrkRate == Rate63) { if (Olp >= (SubFrLen-2)) i++; } else i=1;
sPnt = AcbkGainTablePtr[i] + Gid*20;
// Compute output vector
for (i=0; i < SubFrLen; i++) Tv[i] = RezBuf[i]*sPnt[0] + RezBuf[i+1]*sPnt[1] + RezBuf[i+2]*sPnt[2] + RezBuf[i+3]*sPnt[3] + RezBuf[i+4]*sPnt[4]; }
int Comp_Info(float Buff[60], int Olp) { int i;
float Acc0;
float Tenr; float Ccr,Enr; int Indx;
if (Olp > (PitchMax-3)) Olp = (PitchMax-3);
Indx = Olp; Ccr = 0.0f;
for (i=Olp-3; i <= Olp+3; i++) { Acc0 = DotProd(&Buff[PitchMax+Frame-2*SubFrLen], &Buff[PitchMax+Frame-2*SubFrLen-i],2*SubFrLen); if (Acc0 > Ccr) { Ccr = Acc0; Indx = i; } }
// Compute target energy
Tenr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen], &Buff[PitchMax+Frame-2*SubFrLen],2*SubFrLen);
// Compute best energy
Enr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen-Indx], &Buff[PitchMax+Frame-2*SubFrLen-Indx],2*SubFrLen);
if (Ccr <= 0.0f) return 0; if (((0.125f*Enr*Tenr) - (Ccr*Ccr)) < 0.0f) return Indx; else return 0; }
void Regen(float *DataBuff, float *Buff, int Lag, float Gain, int Ecount, int *Sd) { int i;
// Test for clearing
if (Ecount >= ErrMaxNum) { for (i = 0; i < Frame; i++) DataBuff[i] = 0.0f; for (i = 0; i < Frame+PitchMax; i++) Buff[i] = 0.0f; } else { // Interpolate accordingly to the voicing estimation
if (Lag != 0) { // Voiced case
for (i = 0; i < Frame; i++) Buff[PitchMax+i] = Buff[PitchMax-Lag+i]; for (i = 0; i < Frame; i++) DataBuff[i] = Buff[PitchMax+i] = Buff[PitchMax+i] * 0.75f; } else {
//Unvoiced case
for (i = 0; i < Frame; i++) DataBuff[i] = Gain*(float)Rand_lbc(Sd)*(1.0f/16384.0f);
//Clear buffer to reset memory
for (i = 0; i < Frame+PitchMax; i++) Buff[i] = 0.0f; } } }
int search_T0 (int T0, int Gid, float *gain_T0) {
int T0_mod;
T0_mod = T0+epsi170[Gid]; *gain_T0 = gain170[Gid];
return(T0_mod); }
** ** Function: Update_Err() ** ** Description: Estimation of the excitation error associated ** to the excitation signal when it is disturbed at ** the decoder, the disturbing signal being filtered ** by the long term synthesis filters ** one value for (SubFrLen/2) samples ** Updates the table CodStat.Err ** ** Links to text: Section ** ** Arguments: ** ** int Olp Center value for pitch delay ** int AcLg Offset value for pitch delay ** int AcGn Index of Gain LT filter ** ** Outputs: None ** ** Return value: None ** */
#define MAX 256.0f
void Update_Err(int Olp, int AcLg, int AcGn, CODDEF *CodStat) { int i, iz, temp2; int Lag; float Worst1, Worst0, wtemp; float beta,*ptr_tab;
Lag = Olp - Pstep + AcLg;
/* Select Quantization tables */ i = 0 ; ptr_tab = tabgain85; if ( CodStat->WrkRate == Rate63 ) { if ( Olp >= (SubFrLen-2) ) ptr_tab = tabgain170; } else { ptr_tab = tabgain170; } beta = ptr_tab[(int)AcGn];
if(Lag <= (SubFrLen/2)) { Worst0 = CodStat->Err[0]*beta + Err0; Worst1 = Worst0; } else { iz = (Lag*1092) >> 15; temp2 = 30*(iz+1);
if (temp2 != Lag) { if(iz == 1) { Worst0 = CodStat->Err[0]*beta + Err0; Worst1 = CodStat->Err[1]*beta + Err0;
if (Worst0 > Worst1) Worst1 = Worst0; else Worst0 = Worst1; } else { wtemp = CodStat->Err[iz-1]*beta + Err0; Worst0 = CodStat->Err[iz-2]*beta + Err0; if (wtemp > Worst0) Worst0 = wtemp; Worst1 = CodStat->Err[iz]*beta + Err0; if (wtemp > Worst1) Worst1 = wtemp; } } else { Worst0 = CodStat->Err[iz-1]*beta + Err0; Worst1 = CodStat->Err[iz]*beta + Err0; } }
if (Worst0 > MAX) Worst0 = MAX; if (Worst1 > MAX) Worst1 = MAX;
for(i=4; i>=2; i--) CodStat->Err[i] = CodStat->Err[i-2];
CodStat->Err[0] = Worst0; CodStat->Err[1] = Worst1;
return; }
** ** Function: Test_Err() ** ** Description: Check the error excitation maximum for ** the subframe and computes an index iTest used to ** calculate the maximum nb of filters (in Find_Acbk) : ** Bound = Min(Nmin + iTest x pas, Nmax) , with ** AcbkGainTable085 : pas = 2, Nmin = 51, Nmax = 85 ** AcbkGainTable170 : pas = 4, Nmin = 93, Nmax = 170 ** iTest depends on the relative difference between ** errmax and a fixed threshold ** ** Links to text: Section ** ** Arguments: ** ** Word16 Lag1 1st long term Lag of the tested zone ** Word16 Lag2 2nd long term Lag of the tested zone ** ** Outputs: None ** ** Return value: ** Word16 index iTest used to compute Acbk number of filters */
int Test_Err(int Lag1, int Lag2, CODDEF *CodStat) { int i, i1, i2; int zone1, zone2, iTest; float Err_max;
i2 = Lag2 + ClPitchOrd/2; zone2 = i2/30;
i1 = - SubFrLen + 1 + Lag1 - ClPitchOrd/2; if (i1 <= 0) i1 = 1; zone1 = i1/30;
Err_max = -1.0f; for(i=zone2; i>=zone1; i--) { if (CodStat->Err[i] > Err_max) Err_max = CodStat->Err[i]; } if((Err_max > ThreshErr) || (CodStat->SinDet < 0 ) ) { iTest = 0; //ount_clip++;
} else { iTest = (int)(ThreshErr - Err_max); }
return(iTest); }
int DotMMX60(short *ind, short *oud) { int dotprod;
#define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define acc0 mm6
#define inx esi
#define oux edi
#define dot eax
#define jcnt ebx
#define l(n) ASM movq reg##n,QP[inx+8*n]
#define m(n) ASM pmaddwd reg##n,QP[oux+8*n]
#define a(n) ASM paddd acc0,reg##n
ASM { mov inx,ind; mov oux,oud; mov jcnt,5; }
//Begin loop
ASM pxor acc0,acc0; ASM pxor reg1,reg1; //make first a(1) a nop
ASM pxor reg2,reg2; //make first a(2) a nop
inner: //------------------
l(0); a(1); m(0); l(1); a(2); m(1); l(2); a(0); m(2); //-------------------
ASM add inx,24; ASM add oux,24;
ASM sub jcnt,1; ASM jg inner;
a(1); a(2);
ASM { //Add the two halves of acc0
movq reg0,acc0; psrlq acc0,32; paddd acc0,reg0; movd dot,acc0; //store
mov dotprod,dot }
ASM emms;
return(dotprod); #undef reg0
#undef reg1
#undef reg2
#undef acc0
#undef inx
#undef oux
#undef dot
#undef jcnt
#undef l
#undef m
#undef a
int DotMMX60(short *in, short *out) { int dotprod; int j;
dotprod=0; for(i=0; i < 60; i++) { dotprod += in[j]*out[j];
return(dotprod); }
void DupRezBuf(short *rezbuf, short *reztemp) { #define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define reg3 mm3
#define rbuf edi
#define rztmp esi
//rezbuf duplication operations
#define cr(r0,r1) ASM movq reg##r0,reg##r1
#define uph(r0) ASM punpckhwd reg##r0,reg##r0
#define upl(r0) ASM punpcklwd reg##r0,reg##r0
#define sto(r0,i) ASM movq QP[rztmp+8*i],reg##r0
#define sl(r0) ASM psllw reg##r0,1
#define l(r0) ASM movq reg##r0,QP[rbuf]
//Duplicate first 4 rezbuf values 4 times each
// and store into 4 QWORDS in reztemp
//Multiply by two while we're at it
ASM mov rbuf,rezbuf; ASM mov rztmp,reztemp;
l(0); sl(0); cr(2,0); uph(0); upl(2); cr(1,0); cr(3,2); uph(0); sto(0,0); upl(1); sto(1,1); uph(2); sto(2,2); upl(3); sto(3,3);
ASM emms;
} #undef reg0
#undef reg1
#undef reg2
#undef reg3
#undef rbuf
#undef rztmp
#undef cr
#undef uph
#undef upl
#undef sto
#undef sl
#undef l
void FBufCalcInt(short *fi, short *fo, short *impresp, short *reztemp, int n) { #define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define reg3 mm3
#define reg4 mm4
#define reg5 mm5
#define reg6 mm6
#define reg7 mm1
#define reg8 mm7
#define fbufi esi
#define rbuf edi
#define imp edx
#define fbufo ebx
#define jcnt ecx
#define rzv eax
//Diagonal array operations
#define l1(r0,j) ASM movq reg##r0,QP[fbufi+8*j]
#define l2(r0,j) ASM movq reg##r0,QP[fbufi+8+8*j]
#define c3(r0) ASM movq reg##r0,QP[rbuf+8*rzv]
#define m1(r0,j) ASM pmulhw reg##r0,QP[imp+8+8*j]
#define a1(r0,r1) ASM paddsw reg##r0,reg##r1
#define sto(r0,j) ASM movq QP[fbufo+8+8*j], reg##r0
#define s1(r0) ASM psrlq reg##r0,48
#define s2(r0) ASM psllq reg##r0,16
#define or(r0,r1) ASM por reg##r0,reg##r1
//Loop setup
ASM { mov rbuf,reztemp mov jcnt,5; mov fbufi,fi; mov fbufo,fo; mov imp,impresp; mov rzv,n } //Compute initial values
//Zero-th QWORD is different
ASM { movq reg0,QP[fbufo]; psllq reg0,48; psrlq reg0,48;
//zero-th part of fbufo now in reg0
movq reg2,QP[rbuf+8*rzv]; pmulhw reg2,QP[imp+2]; paddsw reg2,QP[fbufi]; psllq reg2,16; por reg0,reg2;
movq QP[fbufo],reg0; } //begin loop
l2(0,0); l1(1,0); s2(0); s1(1); c3(2); m1(2,0); l2(3,1); l1(4,1); s2(3); s1(4); or(0,1); inner: //-------------------------
l2(6,2); a1(0,2); c3(5); m1(5,1); or(3,4); l1(7,2); s2(6); sto(0,0); s1(7); l2(0,3); a1(3,5); c3(8); m1(8,2); or(6,7); l1(1,3); s2(0); sto(3,1); s1(1); l2(3,4); a1(6,8); c3(2); m1(2,3); or(0,1); l1(4,4); s2(3); sto(6,2); s1(4); //-------------------------
ASM add fbufo,24; ASM add fbufi,24; ASM add imp,24;
ASM sub jcnt,1; ASM jg inner;
ASM emms; } #undef reg0
#undef reg1
#undef reg2
#undef reg3
#undef reg4
#undef reg5
#undef reg6
#undef reg7
#undef reg8
#undef fbufi
#undef rbuf
#undef imp
#undef fbufo
#undef jcnt
#undef rzv
#undef l1
#undef l2
#undef c3
#undef m1
#undef a1
#undef sto
#undef s1
#undef s2
#undef or
void FBufCalcInt(short *fi, short *fo, short *impresp, short *rezbuf, short *reztemp, int n) { long Acc0l; int j;
#define MAX16 32767
#define MIN16 -32768
for(j=1; j<SubFrLen; j++) { Acc0l = fi[j-1]; Acc0l += (((rezbuf[4-n]<<1)*impresp[j]))>>16; if (Acc0l > MAX16) Acc0l = MAX16; else if(Acc0l < MIN16) Acc0l = MIN16; fo[j] = (short)(Acc0l); } } #endif
//#if 0
void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max) {
#define reg0 mm0
#define reg1 mm1
#define reg2 mm2
#define reg3 mm3
#define reg4 mm4
#define acc1 mm5
#define acc0 mm6
#define gdx mm3
#define gd mm7
#define icx mm2
#define lp esi
#define sp edi
#define maxx eax
#define gidx edx
#define icnt ebx
// In the following macros, 'n' is the column number.
#define l(n) ASM movq reg##n,QP[lp+8*n]
#define m(n) ASM pmaddwd reg##n,QP[sp+8*n]
#define a(n) ASM paddd acc0,reg##n
ASM { mov sp,spint; mov lp,lpint; mov icnt,numvecs; mov gidx,gid; mov maxx,max; } ASM movd gd,numvecs;//load gd with top codebook index
ASM movd acc1,DP[maxx];//load acc1 with previous max
//Begin loop
outer: //inner:
ASM pxor acc0,acc0; ASM pxor reg1,reg1; //make first a(1) a nop
ASM pxor reg2,reg2; //make first a(2) a nop
l(0); a(1); m(0); l(1); a(2); m(1); l(2); a(0); m(2); l(3); a(1); m(3); l(4); a(2); m(4);
ASM add sp,40; a(3); ASM movq gdx,gd; ASM movd icx,icnt;
ASM { movq reg0,acc0; psrlq acc0,32;
pxor gd,icx;//gd=MASK
paddd acc0,reg0; movq reg0,acc0; //copy acc0
movq reg1,acc1; //copy old max
pxor reg1,acc0 pcmpgtd reg0,acc1; //reg0=0xFF or 0x00
pand reg1,reg0; //reg1=MASK or 0x00
pxor acc1,reg1; //acc1=acc0 or acc1
pand gd,reg0; //gd=MASK or 0x00
pxor gd,gdx; //gd=icnt or previous value
sub icnt,1; jg outer; }
ASM movd reg0,numvecs; ASM psubd reg0,gd; ASM movd DP[gidx],reg0;//return gid
ASM movd DP[maxx],acc1;//return max
ASM emms; } #undef reg0
#undef reg1
#undef reg2
#undef reg3
#undef reg4
#undef acc1
#undef acc0
#undef gdx
#undef gd
#undef icx
#undef lp
#undef sp
#undef maxx
#undef gidx
#undef icnt
#undef l
#undef m
#undef a
void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max) { int acc0; int i,j;
for(i=0; i < numvecs; i++) { acc0 = 0;
for(j=0; j<20; j++) acc0 += lpint[j]*spint[j];
if (acc0 > *max) { *max = acc0; *gid = i; }
spint += 20; }
#endif //COMPILE_MMX