windows-server-2003/enduser/netmeeting/av/codecs/intel/g723/exc_lbc.c


								//

								//	ITU-T G.723 Floating Point Speech Coder	ANSI C Source Code.	Version 1.00

								//	copyright (c) 1995, AudioCodes, DSP Group, France Telecom,

								//	Universite de Sherbrooke, Intel Corporation.  All rights reserved.

								//


								#include "opt.h"


								#include <stdlib.h>

								#include <stdio.h>

								#include <math.h>

								#include <memory.h>

								#include "typedef.h"

								#include "cst_lbc.h"

								#include "tab_lbc.h"

								#include "util_lbc.h"

								#include "exc_lbc.h"

								#include "timer.h"

								#include "mmxutil.h"


								// This file contains pitch and excitation related functions.

								//------------------------------------------------------

								#if COMPILE_MMX


								int Estim_Int(float *Dpnt, int Start)

								{

								  int  i,k;


								#define NCOR (PitchMax+1-PitchMin)   // = 128 (rounded up to mult of 4)

								#define NTAPS (2*SubFrLen+12)        // = 132


								  int Pr,Indx = PitchMin;

								  float MaxE = 1.0f;

								  float MaxC = 0.0f;

								  float E,C2,E2,Diff;

								  int corr[NCOR];


								  typedef struct

								  {

								    short taps[4][NTAPS];                  //**  These two arrays need

								    short temp[PitchMax-3+2*SubFrLen];     //**  to be 8-byte aligned

								    double foo;

								  } EstimStruct;


								  EstimStruct est,*e;

								  e = &est;


								  ASM

								  {

								    mov eax,e;

								    add e,7;

								    and e,0fffffff8h;

								    mov e,eax;

								  }


								// Convert just the necessary portion of Dpnt to 16-bit integers,

								// store the result in 'temp'.  4 guard bits are needed since the

								// correlations are length 120, which means 7 guard bits are needed.

								// So we use 4 so that 4+4=8 guard bits are present in the product.


								  FloatToShortScaled(&Dpnt[Start-PitchMax+3],e->temp,PitchMax-3+2*SubFrLen,4);


								  MakeAligned4(&e->temp[PitchMax-3],e->taps[0],2*NTAPS);

								  MakeAligned2(&e->temp[PitchMax-4],e->taps[1],2*NTAPS);

								  MakeAligned0(&e->temp[PitchMax-5],e->taps[2],2*NTAPS);

								  MakeAligned6(&e->temp[PitchMax-6],e->taps[3],2*NTAPS);


								  for (i=0; i<4; i++)

								  {

								    for (k=0; k<i; k++)

								      e->taps[i][k] = 0;

								    for (k=NTAPS-12+i; k<NTAPS; k++)

								      e->taps[i][k] = 0;

								  }


								// Compute cross-correlations, store in corr[] array


								  CorrelateInt4(e->taps[0],e->temp,corr,NTAPS-12,NCOR>>2);

								  for (i=1; i<4; i++)

								    CorrelateInt4(e->taps[i],e->temp,&corr[i],NTAPS,NCOR>>2);


								// Now do the actual pitch search.


								  Pr = Start - PitchMin;

								  k = PitchMax-PitchMin-3;

								  E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen);  // first energy value


								  for (i=0; i < (PitchMax-2-PitchMin); i++)

								  {

								// Update energy


								    E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];


								// Check for new maximum


								    if (corr[k] > 0)

								    {

								      C2 = ((float)corr[k]) * ((float)corr[k]);

								      E2 = C2*MaxE;

								      Diff = (E2 - E*MaxC)*4.0f;

								      if (asint(Diff) > asint(E2) || (Diff > 0.0f && ((i - Indx) < PitchMin)))

								      {

								        Indx = i;

								        MaxE = E;

								        MaxC = C2;

								      }

								    }

								    Pr--;

								    k--;

								  }

								  return(Indx+PitchMin);

								}


								#endif


								//------------------------------------------------------

								int Estim_Pitch(float *Dpnt, int Start)

								{

								  int  i;


								  int Pr,Indx = PitchMin;

								  float MaxE = 1.0f;

								  float MaxC = 0.0f;

								  float E,C,C2,E2,Diff;

								  int *dptr,*eptr;


								  dptr = (int *)&Diff;

								  eptr = (int *)&E2;

								  Pr = Start - PitchMin + 1;


								// Init the energy estimate


								  E = DotProd(&Dpnt[Pr],&Dpnt[Pr],2*SubFrLen);


								// Main Open loop pitch search loop


								  for (i=PitchMin; i <= PitchMax-3; i++)

								  {

								    Pr--;


								// Update energy, compute cross


								    C = DotProd(&Dpnt[Start],&Dpnt[Pr],2*SubFrLen);

								    E = E - Dpnt[Pr+2*SubFrLen]*Dpnt[Pr+2*SubFrLen] + Dpnt[Pr]*Dpnt[Pr];

								    C2 = C*C;


								// Check for new maximum


								    E2 = C2*MaxE;

								    Diff = (E2 - E*MaxC)*4.0f;

								    if (C > 0.0f && E > 0.0f &&

								      (*dptr > *eptr || (Diff > 0.0f && ((i - Indx) < PitchMin))))

								    {

								      Indx = i;

								      MaxE = E;

								      MaxC = C2;

								    }

								  }

								  return Indx;

								}


								//------------------------------------------------------

								PWDEF Comp_Pw(float *Dpnt, int Start, int Olp)

								{


								  int  i,k;

								  float Energy,C,E,C2,MaxE,MaxC2,MaxC,Gopt;

								  PWDEF Pw;


								// Compute target energy


								  Energy = DotProd(&Dpnt[Start],&Dpnt[Start],SubFrLen);


								// Find position with maximum C2/E value


								  MaxE = 1.0f;

								  MaxC = 0.0f;

								  MaxC2 = 0.0f;

								  Pw.Indx = -1;

								  Pw.Gain = 0.0f;

								  k = Start - (Olp-PwRange);

								  E = DotProd(&Dpnt[k],&Dpnt[k],SubFrLen);


								  for (i=0; i <= 2*PwRange; i++)

								  {

								    C = DotProd(&Dpnt[Start],&Dpnt[k],SubFrLen);


								    if (E > 0.0f && C > 0.0f)

								    {

								      C2 = C*C;

								      if (C2*MaxE > E*MaxC2)

								      {

								        Pw.Indx = i;

								        MaxE = E;

								        MaxC = C;

								        MaxC2 = C2;

								      }

								    }

								    k--;


								    if (k < 0)

								    {

								      break;

								    }


								    E = E - Dpnt[k+SubFrLen]*Dpnt[k+SubFrLen] + Dpnt[k]*Dpnt[k];


								  }


								  if (Pw.Indx == -1)

								  {

								    Pw.Indx = Olp;

									return Pw;

								  }


								  Pw.Gain = 0.0f;

								  if (MaxC2 > MaxE*Energy*0.375f)

								  {

								    if (MaxC > MaxE || MaxE == 0.0f)

								      Gopt = 1.0f;

								    else

								      Gopt = (float) fabs(MaxC)/MaxE;


								    Pw.Gain = 0.3125f*Gopt;

								  }

								  Pw.Indx = Olp - PwRange + Pw.Indx;

								  return Pw;


								}


								//--------------------------------------------------------------

								void  Filt_Pw(float *DataBuff, float *Dpnt, int Start, PWDEF Pw)

								{

								  int  i;


								// Perform the harmonic weighting


								  for (i=0; i < SubFrLen; i++)

								    DataBuff[Start+i] = Dpnt[PitchMax+Start+i] -

								                        Pw.Gain*Dpnt[PitchMax+Start-Pw.Indx+i];

								}


								//-----------------------------------------------------------------

								void  Find_Fcbk(float *Dpnt, float *ImpResp, LINEDEF *Line, int Sfc, enum Crate WrkRate, int flags, int UseMMX)

								{

								  int  i;

								  int  Srate,T0_acelp;

								  float gain_T0;


								  BESTDEF  Best = {0};


								  switch(WrkRate)

								  {

								    case Rate63:

								      Srate = Nb_puls[Sfc];

								      Best.MaxErr = -99999999.9f;


								      if (flags & SC_FINDB)

								      {

								        if ((*Line).Olp[Sfc>>1] < SubFrLen-2)

								          Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]);

								        else

								          Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen);

								      }

								      else

								      {

								        Find_Best(&Best, Dpnt, ImpResp, Srate, SubFrLen);

								        if ((*Line).Olp[Sfc>>1] < SubFrLen-2)

								          Find_Best(&Best, Dpnt, ImpResp, Srate, (*Line).Olp[Sfc>>1]);

								      }


								// Reconstruct the excitation


								      for (i=0; i <  SubFrLen; i++)

								        Dpnt[i] = 0.0f;


								      for (i=0; i < Srate; i++)

								        Dpnt[Best.Ploc[i]] = Best.Pamp[i];


								// Code the excitation


								      Fcbk_Pack(Dpnt, &((*Line).Sfs[Sfc]), &Best, Srate);


								      if (Best.UseTrn == 1)

								        Gen_Trn(Dpnt, Dpnt, (*Line).Olp[Sfc>>1]);

								      break;


								    case Rate53:


								      T0_acelp = search_T0 ((*Line).Olp[Sfc>>1]-1+(*Line).Sfs[Sfc].AcLg,

								        (*Line).Sfs[Sfc].AcGn, &gain_T0);


								#if COMPILE_MMX

								      if (UseMMX)

									  {

										  (*Line).Sfs[Sfc].Ppos = ACELP_LBC_code_int(Dpnt, ImpResp, T0_acelp, Dpnt,

													&(*Line).Sfs[Sfc].Mamp,  &(*Line).Sfs[Sfc].Grid,

													&(*Line).Sfs[Sfc].Pamp, gain_T0, flags);

									  }

								      else

								#endif //COMPILE_MMX

									  {

										  (*Line).Sfs[Sfc].Ppos = ACELP_LBC_code(Dpnt, ImpResp, T0_acelp, Dpnt,

													&(*Line).Sfs[Sfc].Mamp,  &(*Line).Sfs[Sfc].Grid,

													&(*Line).Sfs[Sfc].Pamp, gain_T0, flags);


									  }

								      (*Line).Sfs[Sfc].Tran = 0;

								      break;

								  }


								  return;

								}


								//---------------------------------------------------------

								void  Fcbk_Unpk(float *Tv, SFSDEF Sfs, int Olp, int Sfc, enum Crate WrkRate)

								{

								  int  i,j,Np;

								  float  Tv_tmp[SubFrLen+4];

								  float  acelp_gain,gain_T0;

								  int acelp_sign, acelp_shift, acelp_pos;

								  int offset, ipos, T0_acelp;

								  Word32 Acc0;


									switch(WrkRate)

								    {

										case Rate63:

								        {

								          Np = Nb_puls[Sfc];


								          for (i=0; i < SubFrLen; i++)

								            Tv[i] = 0.0f;


								          if (Sfs.Ppos >= MaxPosTable[Sfc])

								           return;


								// Decode the amplitudes and positions


								          j = MaxPulseNum - Np;

								          Acc0 = Sfs.Ppos;


								          for (i = 0; i < SubFrLen/Sgrid; i++)

								          {

								            Acc0 -= CombinatorialTable[j][i];


								            if (Acc0 < (Word32) 0)

								            {

								              Acc0 += CombinatorialTable[j][i];

								              j++;


								            if ((Sfs.Pamp & (1 << (MaxPulseNum-j))) != 0)

								              Tv[Sfs.Grid + Sgrid*i] = -FcbkGainTable[Sfs.Mamp];

								            else

								              Tv[Sfs.Grid + Sgrid*i] =  FcbkGainTable[Sfs.Mamp];


								            if (j == MaxPulseNum)

								              break;

								           }

								         }


								         if (Sfs.Tran == 1)

								           Gen_Trn(Tv, Tv, Olp);

								         break;

								    }


								    case Rate53:

								    {


								      for (i = 0; i < SubFrLen+4; i++)

								        Tv_tmp[i] = 0.0f;


								      acelp_gain = FcbkGainTable[Sfs.Mamp];

								      acelp_shift = Sfs.Grid;

								      acelp_sign = Sfs.Pamp;

								      acelp_pos = Sfs.Ppos;


								      offset  = 0;

								      for(i=0; i<4; i++)

								      {

								        ipos = (acelp_pos & 7);

								        ipos = (ipos << 3) + acelp_shift + offset;


								        if((acelp_sign & 1)== 1)

								          Tv_tmp[ipos] = acelp_gain;

								        else

								          Tv_tmp[ipos] = -acelp_gain;


								        offset += 2;

								        acelp_pos = acelp_pos >> 3;

								        acelp_sign = acelp_sign >> 1;

								      }

								      for (i = 0; i < SubFrLen; i++)

								        Tv[i] = Tv_tmp[i];


								      T0_acelp = search_T0(Olp-1+Sfs.AcLg, Sfs.AcGn, &gain_T0);

								      if (T0_acelp < SubFrLen-2)

								      {

								        for (i = T0_acelp; i < SubFrLen; i++)

								          Tv[i] += Tv[i-T0_acelp]*gain_T0;

								      }

								   break;

								    }

								  }

								  return;

								}


								//---------------------------------------------------------------------

								void Acbk_Filt(float *output,float *input,float fac,float *impresp)

								{

								#if OPT_ACBKF


								  ASM

								  {

								    push esi;

								    push edi;

								    push ebx;


								    mov eax,58;

								    mov esi,input;

								    mov edi,output;

								    mov ebx,impresp;


								loop1:

								    fld  DP[ebx+4*eax];

								    fmul fac;

								    fld  DP[ebx+4*eax-4];

								    fmul fac;

								    fld  DP[ebx+4*eax-8];

								    fmul fac;

								    fld  DP[ebx+4*eax-12];

								    fmul fac;                // a3 a2 a1 a0


								    fxch ST(3);

								    fadd DP[esi+4*eax];      // b0 a2 a1 a3

								    fxch ST(2);

								    fadd DP[esi+4*eax-4];    // b1 a2 b0 a3

								    fxch ST(1);

								    fadd DP[esi+4*eax-8];    // b2 b1 b0 a3

								    fxch ST(3);

								    fadd DP[esi+4*eax-12];   // b3 b1 b0 b2


								    fxch ST(2);

								    fstp DP[edi+4*eax];      // b1 b3 b2

								    fstp DP[edi+4*eax-4];    // b3 b2

								    fxch ST(1);

								    fstp DP[edi+4*eax-8];

								    fstp DP[edi+4*eax-12];


								    sub eax,4;

								    cmp eax,2;

								    jg  loop1;


								    pop ebx;

								    pop edi;

								    pop esi;

								  }


								#else


								  int i;


								  for (i=58; i>2; i-=4)

								  {

								    output[i-0] = fac*impresp[i-0] + input[i-0];

								    output[i-1] = fac*impresp[i-1] + input[i-1];

								    output[i-2] = fac*impresp[i-2] + input[i-2];

								    output[i-3] = fac*impresp[i-3] + input[i-3];

								  }

								#endif


								  output[2] = fac*impresp[2] + input[2];

								  output[1] = fac*impresp[1] + input[1];

								  output[0] = fac*impresp[0] + input[0];

								}

								//---------------------------------------------------------------------

								#if COMPILE_MMX

								void  Find_AcbkInt(float *Tv, float *ImpResp, float *PrevExc, LINEDEF

								*Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat)

								{

								  int  i,j,k;


								  float RezBuf[SubFrLen+ClPitchOrd-1];


								  short TvInt[SubFrLen];

								  int   Tvxi[SubFrLen];


								  short *lPntInt,*sPntInt,*PtrInt;


								  int	CorBufInt[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)];

								  int  *lPntd;


								  long	Acc0l, Acc1l;


								  int   Olp,Lid,Gid,Hb,t,k1,k2;

								  int	Bound[3];

								  int	Lag1, Lag2;

								  int	MaxInt,off_filt;


								  int shift,Tshift,mx;


								  DECLARE_SHORT(FltBuf0Int,63);

								  DECLARE_SHORT(FltBuf1Int,63);

								  DECLARE_SHORT(FltBuf2Int,63);

								  DECLARE_SHORT(FltBuf3Int,63);

								  DECLARE_SHORT(FltBuf4Int,63) ;

								  DECLARE_SHORT(CorVctInt,4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)) ;

								  DECLARE_SHORT(RezTmpInt,16) ;

								  DECLARE_SHORT(RezBufInt,SubFrLen+ClPitchOrd-1) ;

								  DECLARE_SHORT(ImpRespInt,63);


								  DECLARE_SHORT(Ix,2*SubFrLen+16) ;

								  DECLARE_SHORT(Rx,2*SubFrLen+16);

								  DECLARE_INT(Temp,64);


								  ALIGN_ARRAY(FltBuf0Int);

								  ALIGN_ARRAY(FltBuf1Int);

								  ALIGN_ARRAY(FltBuf2Int);

								  ALIGN_ARRAY(FltBuf3Int);

								  ALIGN_ARRAY(FltBuf4Int);

								  ALIGN_ARRAY(RezBufInt);

								  ALIGN_ARRAY(RezTmpInt);

								  ALIGN_ARRAY(ImpRespInt);

								  ALIGN_ARRAY(CorVctInt);


								  ALIGN_ARRAY(Ix);

								  ALIGN_ARRAY(Rx);

								  ALIGN_ARRAY(Temp);


								  Olp = (*Line).Olp[Sfc>>1];

								  Lid = Pstep;

								  Gid = 0;

								  Hb  = 3 + (Sfc & 1);


								// For even frames only


								  if ((Sfc & 1) == 0)

								  {

								    if (Olp == PitchMin)

								      Olp++;

								    if (Olp > (PitchMax-5))

								      Olp = PitchMax-5;

								  }


								  if (flags & SC_LAG1)

								  {

								   lPntInt = &CorVctInt[20];

								    k1 = 1;

								    k2 = 2;

								  }

								  else

								  {

								   lPntInt = CorVctInt;

								    k1 = 0;

								    k2 = Hb;

								  }


								//TIMER_SPOT_ON(Conversion);

								  //Convert Tv to 16-bit

								  ConstFloatToInt(Tv, Tvxi, SubFrLen, 32768.0f);

								  for(i=0; i<SubFrLen; i++) TvInt[i] = (short)(((Tvxi[i]<<1)+0x00008000)>>16);


								  //Convert ImpResp to 16-bit

								  //Scale by 2^14 & truncate bits right of decimal

								  ConstFloatToShort(ImpResp,ImpRespInt,SubFrLen,16384.0f);


								  for (k=k1; k<k2; k++)

								  {

								   lPntd = &CorBufInt[k*20];


								// Get residual from the excitation buffer


								  	Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);


									//Convert RezBuf to 16-bit

									ConstFloatToShort(RezBuf,RezBufInt,SubFrLen+ClPitchOrd-1,1.0f);


									// Filter the last one (ClPitchOrd-1) using the impulse responce

								//TIMER_SPOT_OFF(Conversion);

								//TIMER_SPOT_ON(Convolution);


									ab2abbcw(&RezBufInt[ClPitchOrd-1], Rx, SubFrLen);


									j=0;

									for(i=0; i<SubFrLen;    i+=2){

										Ix[j]  =Ix[j+2]=ImpRespInt[SubFrLen-1-i];

										Ix[j+1]=Ix[j+3]=ImpRespInt[SubFrLen-2-i];

										j+=4;

									}

									for(i=0; i<16; i++)

										Ix[j+i]=0;


									ConvMMX(Rx, Ix, Temp, 60);

									for(i=0; i<SubFrLen; i++) FltBuf4Int[i] = (short)(((Temp[i]<<1)+0x00008000)>>16);


								//TIMER_SPOT_OFF(Convolution);

								//TIMER_SPOT_ON(FbufCalc);


									// Update the others (ClPitchOrd-2 down to 0)

									Acc0l = ((RezBufInt[3]<<13)+0x00004000)>>15;

									FltBuf3Int[0] = (short)Acc0l;


									Acc0l = ((RezBufInt[2]<<13)+0x00004000)>>15;

									FltBuf2Int[0] = (short)Acc0l;


									Acc0l = ((RezBufInt[1]<<13)+0x00004000)>>15;

									FltBuf1Int[0] = (short)Acc0l;


									Acc0l = ((RezBufInt[0]<<13)+0x00004000)>>15;

									FltBuf0Int[0] = (short)Acc0l;


									DupRezBuf(RezBufInt,RezTmpInt);


									FBufCalcInt(FltBuf4Int,FltBuf3Int,ImpRespInt,RezTmpInt,0);

									FBufCalcInt(FltBuf3Int,FltBuf2Int,ImpRespInt,RezTmpInt,1);

									FBufCalcInt(FltBuf2Int,FltBuf1Int,ImpRespInt,RezTmpInt,2);

									FBufCalcInt(FltBuf1Int,FltBuf0Int,ImpRespInt,RezTmpInt,3);


								//TIMER_SPOT_OFF(FbufCalc);

								//TIMER_SPOT_ON(Dots);

									// Compute the cross products with the signal


									*lPntd++ = DotMMX60(TvInt,FltBuf0Int)<<1;

									*lPntd++ = DotMMX60(TvInt,FltBuf1Int)<<1;

									*lPntd++ = DotMMX60(TvInt,FltBuf2Int)<<1;

									*lPntd++ = DotMMX60(TvInt,FltBuf3Int)<<1;

									*lPntd++ = DotMMX60(TvInt,FltBuf4Int)<<1;


								// Compute the energies


									*lPntd++ = DotMMX60(FltBuf0Int,FltBuf0Int)<<1;

									*lPntd++ = DotMMX60(FltBuf1Int,FltBuf1Int)<<1;

									*lPntd++ = DotMMX60(FltBuf2Int,FltBuf2Int)<<1;

									*lPntd++ = DotMMX60(FltBuf3Int,FltBuf3Int)<<1;

									*lPntd++ = DotMMX60(FltBuf4Int,FltBuf4Int)<<1;


								// Compute the between crosses


									*lPntd++ = DotMMX60(FltBuf1Int,FltBuf0Int)<<2;


									*lPntd++ = DotMMX60(FltBuf2Int,FltBuf0Int)<<2;

									*lPntd++ = DotMMX60(FltBuf2Int,FltBuf1Int)<<2;


									*lPntd++ = DotMMX60(FltBuf3Int,FltBuf0Int)<<2;

									*lPntd++ = DotMMX60(FltBuf3Int,FltBuf1Int)<<2;

									*lPntd++ = DotMMX60(FltBuf3Int,FltBuf2Int)<<2;


									*lPntd++ = DotMMX60(FltBuf4Int,FltBuf0Int)<<2;

									*lPntd++ = DotMMX60(FltBuf4Int,FltBuf1Int)<<2;

									*lPntd++ = DotMMX60(FltBuf4Int,FltBuf2Int)<<2;

									*lPntd++ = DotMMX60(FltBuf4Int,FltBuf3Int)<<2;


								//TIMER_SPOT_OFF(Dots);


								  }


								  //Convert k1 through k2 indices of CorBufInt to 16-bit

								  //	values

								  Acc1l = 0L;

								  for(j=k1; j<k2; j++)

								  {

								  	 for(i=0; i<20; i++)

									 {

								  	 	Acc0l = abs(CorBufInt[j*20 + i]);

									 	if( Acc0l > Acc1l) Acc1l = Acc0l;

									 }

								  }


								  //Need a convert_long_to_short routine

								  shift = norm(Acc1l);

								  for(j=k1; j<k2; j++)

								  {

									 for(i=0; i<20; i++)

									 {

								  	  	CorBufInt[j*20 + i]=CorBufInt[j*20 + i]<<shift;

									  	CorBufInt[j*20 + i] += 0x00008000L; //round up to 16 MSBs

									  	*lPntInt++=(short)(CorBufInt[j*20 + i]>>16);

									  }

								  }


								  /* Test potential error */

								  Lag1 = Olp - Pstep;

								  Lag2 = Olp - Pstep + Hb - 1;


								  off_filt = Test_Err(Lag1, Lag2, CodStat);


								  Bound[0] =  NbFilt085_min + (off_filt << 2);

								  if(Bound[0] > NbFilt085) Bound[0] = NbFilt085;

								  Bound[1] =  NbFilt170_min + (off_filt << 3);

								  if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;


								  Bound[2] = 85; //Use subset table in the case t=2


								  MaxInt = 0;


								  for (k=k1; k<k2; k++)

								  {


								// Select Quantization table


								    t = 0;

								    if (WrkRate == Rate63)

								    {

								      if ((Sfc & 1) == 0)

								      {

								        if (Olp-Pstep+k >= SubFrLen-2)

								          t = 1;

								      }

								      else

								      {

								        if (Olp >= SubFrLen-2)

								          t = 1;

								      }

								    }

								    else

								      t = 1;


									/* If Bound=170 and SC_GAIN=TRUE, use 170subset table.

									   Else, use full table with limited Bound.*/

								    //if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)

									if ((WrkRate == Rate53) && (flags & SC_GAIN) && (Bound[t]==NbFilt170))

								      t = 2;


								// Search for maximum

								//t=1;


									sPntInt = AcbkGainTablePtrInt[t];

									PtrInt = &CorVctInt[k*20];


								//TIMER_SPOT_ON(CodeBook);


									CodeBkSrch(PtrInt, sPntInt, Bound[t], &Gid, &MaxInt);


								//TIMER_SPOT_OFF(CodeBook);


								    if (t==2)

									 Gid = GainScramble[Gid];

								    //else

								        //Gid = Gid;


									Lid = k;

								  }


								// Modify Olp for even sub frames


								  if ((Sfc & 1) == 0)

								  {

								    Olp = Olp - Pstep + Lid;

								    Lid = Pstep;

								  }


								// Save Lag, Gain and Olp


								  (*Line).Sfs[Sfc].AcLg = Lid;

								  (*Line).Sfs[Sfc].AcGn = Gid;

								  (*Line).Olp[Sfc>>1] = Olp;


								//ASM emms;


								/* ------------------------------ FLOAT -----------------------*/


								// Decode the Acbk contribution and subtract it


								  Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);


								//TIMER_SPOT_ON(LastConvolv);


								  mx = FloatToShortScaled(RezBuf, RezBufInt, SubFrLen+ClPitchOrd-1, 3);

								  Tshift = 11 - (mx-126);

								  if(mx==0) Tshift = 0;


								  ab2abbcw(RezBufInt, Rx, 60);

								  ConvMMX(Rx, Ix, Temp, SubFrLen);


								  //ASM emms;


								  if (Tshift >=0) {

									for(j=0; j<SubFrLen; j++){

										Temp[j] = Temp[j]>>Tshift;

										Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f;

									}

								  }

								  else

								  {

								    for(j=0; j<SubFrLen; j++){

										Temp[j] = Temp[j]<<(-Tshift);

										Tv[j]=((float)(Tvxi[j] - Temp[j]))*0.00003052f;

									}

								  }


								//TIMER_SPOT_OFF(LastConvolv);


								}

								#endif //COMPILE_MMX


								short norm(long L_var1)

								{

								    short var_out;


								    if (L_var1 == 0L) {

								        var_out = (short)0;

								    }

								    else {

								        if (L_var1 == (long)0xffffffffL) {

								            var_out = (short)31;

								        }

								        else {

								            if (L_var1 < 0L) {

								                L_var1 = ~L_var1;

								            }


								            for(var_out = (short)0;L_var1 < 0x40000000L;var_out++)

								            {

								                L_var1 <<= 1L;

								            }

								        }

								    }


								    return(var_out);

								}

								/*---------------------------------------------------------------------*/

								void  Find_Acbk(float *Tv, float *ImpResp, float *PrevExc, LINEDEF

								*Line, int Sfc, enum Crate WrkRate, int flags, CODDEF *CodStat)

								{

								  int  i,j,k;


								  float Acc0,Max;


								  float RezBuf[SubFrLen+ClPitchOrd-1];

								  float FltBuf[ClPitchOrd][SubFrLen];

								  float CorVct[4*(2*ClPitchOrd + ClPitchOrd*(ClPitchOrd-1)/2)];

								  float *lPnt;

								  float *sPnt,*Ptr;

								  int   Olp,Lid,Gid,Hb,t,k1,k2;

								  int	Bound[3];

								  int	Lag1, Lag2;

								  int	off_filt;


								  Olp = (*Line).Olp[Sfc>>1];

								  Lid = Pstep;

								  Gid = 0;

								  Hb  = 3 + (Sfc & 1);


								// For even frames only


								  if ((Sfc & 1) == 0)

								  {

								    if (Olp == PitchMin)

								      Olp++;

								    if (Olp > (PitchMax-5))

								      Olp = PitchMax-5;

								  }


								  if (flags & SC_LAG1)

								  {

								    lPnt = &CorVct[20];

								    k1 = 1;

								    k2 = 2;

								  }

								  else

								  {

								    lPnt = CorVct;

								    k1 = 0;

								    k2 = Hb;

								  }


								  for (k=k1; k<k2; k++)

								  {


								// Get residual from the exitation buffer


								    Get_Rez(RezBuf, PrevExc, Olp-Pstep+k);


								// Filter the last one (ClPitchOrd-1) using the impulse responce


								    for (i=0; i < SubFrLen; i++)

								      FltBuf[ClPitchOrd-1][i] = DotRev(&RezBuf[ClPitchOrd-1],ImpResp,i+1);


								// Update the others (ClPitchOrd-2 down to 0)


								    for (i=ClPitchOrd-2; i >= 0; i --)

								    {

								      FltBuf[i][0] = RezBuf[i]*0.5f;

								      Acbk_Filt(&FltBuf[i][1],&FltBuf[i+1][0],RezBuf[i],&ImpResp[1]);

								//      for (j = 1; j < SubFrLen; j++)

								//        FltBuf[i][j] = RezBuf[i]*ImpResp[j] + FltBuf[i+1][j-1];

								    }


								// Compute the cross products with the signal


								    for (i=0; i < ClPitchOrd; i++)

								      *lPnt++ = DotProd(Tv, FltBuf[i], SubFrLen);


								// Compute the energies


								    for (i=0; i < ClPitchOrd; i++)

								      *lPnt++ = 0.5f*DotProd(FltBuf[i], FltBuf[i], SubFrLen);


								// Compute the between crosses


								    for (i=1; i < ClPitchOrd; i++)

								      for (j = 0; j < i; j++)

								        *lPnt++ = DotProd(FltBuf[i], FltBuf[j], SubFrLen);


								  }


								  /* Test potential error */

								  Lag1 = Olp - Pstep;

								  Lag2 = Olp - Pstep + Hb - 1;


								  off_filt = Test_Err(Lag1, Lag2, CodStat);


								  Bound[0] =  NbFilt085_min + (off_filt << 2);

								  if(Bound[0] > NbFilt085) Bound[0] = NbFilt085;

								  Bound[1] =  NbFilt170_min + (off_filt << 3);

								  if(Bound[1] > NbFilt170) Bound[1] = NbFilt170;


								  Bound[2] = 85; //Use subset table in the case t=2


								  Max = 0.0f;


								  for (k=k1; k<k2; k++)

								  {


								// Select Quantization table


								    t = 0;

								    if (WrkRate == Rate63)

								    {

								      if ((Sfc & 1) == 0)

								      {

								        if (Olp-Pstep+k >= SubFrLen-2)

								          t = 1;

								      }

								      else

								      {

								        if (Olp >= SubFrLen-2)

								          t = 1;

								      }

								    }

								    else

								      t = 1;


									/* If Bound=170 and SC_GAIN=TRUE, use 170subset table.

									   Else, use full table with limited Bound.*/

								    if (t==1 && (flags & SC_GAIN) && Bound[t]==NbFilt170)

								      t = 2;


								// Search for maximum


									sPnt = AcbkGainTablePtr[t];

								  	Ptr  = &CorVct[k*20];


								    for (i=0; i < Bound[t]; i++)

								    {

								      Acc0 = Ptr[0]*sPnt[0] + Ptr[1]*sPnt[1] +

								        Ptr[2]*sPnt[2] + Ptr[3]*sPnt[3] +

								        Ptr[4]*sPnt[4] + Ptr[5]*sPnt[5] +

								        Ptr[6]*sPnt[6] + Ptr[7]*sPnt[7] +

								        Ptr[8]*sPnt[8] + Ptr[9]*sPnt[9] +

								        Ptr[10]*sPnt[10] + Ptr[11]*sPnt[11] +

								        Ptr[12]*sPnt[12] + Ptr[13]*sPnt[13] +

								        Ptr[14]*sPnt[14] + Ptr[15]*sPnt[15] +

								        Ptr[16]*sPnt[16] + Ptr[17]*sPnt[17] +

								        Ptr[18]*sPnt[18] + Ptr[19]*sPnt[19];


								      sPnt += 20;


								      if (asint(Acc0) > asint(Max))  // integer cmp, since Max is not negative.

								      {

								        Max = Acc0;


								        if (t==2)

								          Gid = GainScramble[i];

								        else

								          Gid = i;


								        Lid = k;

								      }

								    }

								  }


								// Modify Olp for even sub frames


								  if ((Sfc & 1) == 0)

								  {

								    Olp = Olp - Pstep + Lid;

								    Lid = Pstep;

								  }


								// Save Lag, Gain and Olp


								  (*Line).Sfs[Sfc].AcLg = Lid;

								  (*Line).Sfs[Sfc].AcGn = Gid;

								  (*Line).Olp[Sfc>>1] = Olp;


								// Decode the Acbk contribution and subtract it


								  Decod_Acbk(RezBuf, PrevExc, Olp, Lid, Gid, WrkRate);


								  for (i=0; i < SubFrLen; i++)

								    Tv[i] -= DotRev(RezBuf,ImpResp,i+1);


								}


								//-----------------------------------------------------------------

								void  Get_Rez(float *Tv, float *PrevExc, int Lag)

								{

								  int  i,n,div,mod;

								  float *src,*dst;


								  for (i=0; i < ClPitchOrd/2; i++)

								    Tv[i] = PrevExc[PitchMax - Lag - ClPitchOrd/2 + i];


								  n = SubFrLen+ClPitchOrd/2;

								  div = n/Lag;

								  mod = n%Lag;


								  dst = &Tv[ClPitchOrd/2];

								  src = &PrevExc[PitchMax-Lag];

								  for (i=0; i<div; i++)

								  {

								    memcpy(dst,src,4*Lag);

								    dst += Lag;

								  }

								  memcpy(dst,src,4*mod);

								}


								//-----------------------------------------------------------------

								void  Decod_Acbk(float *Tv, float *PrevExc, int Olp, int Lid, int Gid, enum Crate WrkRate)

								{

								  int  i;


								  float  RezBuf[SubFrLen+ClPitchOrd-1];

								  float *sPnt;


								  Get_Rez(RezBuf, PrevExc, (Olp + Lid) - Pstep);


								// Select Quantization tables


								  i = 0;

								  if (WrkRate == Rate63)

								  {

									if (Olp >= (SubFrLen-2))

								      i++;

								  }

								  else

								    i=1;


								  sPnt = AcbkGainTablePtr[i] + Gid*20;


								// Compute output vector


								  for (i=0; i < SubFrLen; i++)

								    Tv[i] = RezBuf[i]*sPnt[0] + RezBuf[i+1]*sPnt[1] + RezBuf[i+2]*sPnt[2] +

								      RezBuf[i+3]*sPnt[3] + RezBuf[i+4]*sPnt[4];

								}


								//-----------------------------------------------

								int  Comp_Info(float Buff[60], int Olp)

								{

								  int  i;


								  float  Acc0;


								  float  Tenr;

								  float  Ccr,Enr;

								  int  Indx;


								  if (Olp > (PitchMax-3))

								    Olp = (PitchMax-3);


								  Indx = Olp;

								  Ccr =  0.0f;


								  for (i=Olp-3; i <= Olp+3; i++)

								  {

								    Acc0 = DotProd(&Buff[PitchMax+Frame-2*SubFrLen],

								      &Buff[PitchMax+Frame-2*SubFrLen-i],2*SubFrLen);


								    if (Acc0 > Ccr)

								    {

								      Ccr = Acc0;

								      Indx = i;

								    }

								  }


								// Compute target energy


								    Tenr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen],

								      &Buff[PitchMax+Frame-2*SubFrLen],2*SubFrLen);


								// Compute best energy


								    Enr = DotProd(&Buff[PitchMax+Frame-2*SubFrLen-Indx],

								      &Buff[PitchMax+Frame-2*SubFrLen-Indx],2*SubFrLen);


								  if (Ccr <= 0.0f)

								    return 0;


								  if (((0.125f*Enr*Tenr) - (Ccr*Ccr)) < 0.0f)

								    return Indx;

								  else

								    return 0;

								}


								//------------------------------------------------------------------

								void    Regen(float *DataBuff, float *Buff, int Lag, float Gain,

								              int Ecount, int *Sd)

								{

								  int  i;


								// Test for clearing


								  if (Ecount >= ErrMaxNum)

								  {

								    for (i = 0; i < Frame; i++)

									  DataBuff[i] = 0.0f;

									for (i = 0; i < Frame+PitchMax; i++)

									  Buff[i] = 0.0f;

								  }

								  else

								  {


								// Interpolate accordingly to the voicing estimation


								    if (Lag != 0)

								    {

								      // Voiced case

								      for (i = 0; i < Frame; i++)

								        Buff[PitchMax+i] = Buff[PitchMax-Lag+i];

								      for (i = 0; i < Frame; i++)

								        DataBuff[i] = Buff[PitchMax+i] = Buff[PitchMax+i] *  0.75f;

								    }

								    else

								    {


								//Unvoiced case


								      for (i = 0; i < Frame; i++)

								        DataBuff[i] = Gain*(float)Rand_lbc(Sd)*(1.0f/16384.0f);


								//Clear buffer to reset memory


								      for (i = 0; i < Frame+PitchMax; i++)

								        Buff[i] = 0.0f;

								     }

								  }

								}


								//------------------------------------------------------

								//Comp_Lpf


								//------------------------------------------------------

								//Find_B


								//------------------------------------------------------

								//Find_F


								//------------------------------------------------------

								//Get_Ind


								//------------------------------------------------------

								//Filt_Lpf


								//---------------------------------------------------------------

								int search_T0 (int T0, int Gid, float *gain_T0)

								{


									int T0_mod;


									T0_mod = T0+epsi170[Gid];

								    *gain_T0 = gain170[Gid];


									return(T0_mod);

								}


								/*

								**

								** Function:    Update_Err()

								**

								** Description:   Estimation of the excitation error associated

								**          to the excitation signal when it is disturbed at

								**          the decoder, the disturbing signal being filtered

								**          by the long term synthesis filters

								**          one value for (SubFrLen/2) samples

								**          Updates the table CodStat.Err

								**

								** Links to text:   Section

								**

								** Arguments:

								**

								**  int Olp    Center value for pitch delay

								**  int AcLg   Offset value for pitch delay

								**  int AcGn   Index of Gain LT filter

								**

								** Outputs: None

								**

								** Return value:  None

								**

								*/


								#define MAX 256.0f


								void Update_Err(int Olp, int AcLg, int AcGn, CODDEF *CodStat)

								{

								  int i, iz, temp2;

								  int Lag;

								  float Worst1, Worst0, wtemp;

								  float beta,*ptr_tab;


								  Lag = Olp - Pstep + AcLg;


								  /* Select Quantization tables */

								  i = 0 ;

								  ptr_tab = tabgain85;

								  if ( CodStat->WrkRate == Rate63 ) {

								    if ( Olp >= (SubFrLen-2) ) ptr_tab = tabgain170;

								  }

								  else {

								    ptr_tab = tabgain170;

								  }

								  beta = ptr_tab[(int)AcGn];


								  if(Lag <= (SubFrLen/2))

								  {

								    Worst0 = CodStat->Err[0]*beta + Err0;

								    Worst1 = Worst0;

								  }

								  else

								  {

								    iz = (Lag*1092) >> 15;

								    temp2 = 30*(iz+1);


								    if (temp2 != Lag)

								    {

								      if(iz == 1)

								      {

								        Worst0 = CodStat->Err[0]*beta + Err0;

								        Worst1 = CodStat->Err[1]*beta + Err0;


								        if (Worst0 > Worst1)

								          Worst1 = Worst0;

								        else

								          Worst0 = Worst1;

								      }

								      else

								      {

								        wtemp = CodStat->Err[iz-1]*beta + Err0;

								        Worst0 = CodStat->Err[iz-2]*beta + Err0;

								        if (wtemp > Worst0) Worst0 = wtemp;

								        Worst1 = CodStat->Err[iz]*beta + Err0;

								        if (wtemp > Worst1) Worst1 = wtemp;

								      }

								    }

								    else

								    {

								      Worst0 = CodStat->Err[iz-1]*beta + Err0;

								      Worst1 = CodStat->Err[iz]*beta + Err0;

								    }

								  }


								  if (Worst0 > MAX) Worst0 = MAX;

								  if (Worst1 > MAX) Worst1 = MAX;


								  for(i=4; i>=2; i--)

								    CodStat->Err[i] = CodStat->Err[i-2];


								  CodStat->Err[0] = Worst0;

								  CodStat->Err[1] = Worst1;


								  return;

								}


								/*

								**

								** Function:    Test_Err()

								**

								** Description:   Check the error excitation maximum for

								**          the subframe and computes an index iTest used to

								**          calculate the maximum nb of filters (in Find_Acbk) :

								**          Bound = Min(Nmin + iTest x pas, Nmax) , with

								**          AcbkGainTable085 : pas = 2, Nmin = 51, Nmax = 85

								**          AcbkGainTable170 : pas = 4, Nmin = 93, Nmax = 170

								**          iTest depends on the relative difference between

								**          errmax and a fixed threshold

								**

								** Links to text:   Section

								**

								** Arguments:

								**

								**  Word16 Lag1    1st long term Lag of the tested zone

								**  Word16 Lag2    2nd long term Lag of the tested zone

								**

								** Outputs: None

								**

								** Return value:

								**  Word16      index iTest used to compute Acbk number of filters

								*/


								int Test_Err(int Lag1, int Lag2, CODDEF *CodStat)

								{

								  int i, i1, i2;

								  int zone1, zone2, iTest;

								  float Err_max;


								  i2 = Lag2 + ClPitchOrd/2;

								  zone2 = i2/30;


								  i1 = - SubFrLen + 1 + Lag1 - ClPitchOrd/2;

								  if (i1 <= 0) i1 = 1;

								  zone1 = i1/30;


								  Err_max = -1.0f;

								  for(i=zone2; i>=zone1; i--)

								  {

								    if (CodStat->Err[i] > Err_max)

								      Err_max = CodStat->Err[i];

								  }

								  if((Err_max > ThreshErr) || (CodStat->SinDet < 0 ) )

								  {

								    iTest = 0;

								    //ount_clip++;

								  }

								  else

								  {

								    iTest = (int)(ThreshErr - Err_max);

								  }


								  return(iTest);

								}


								#if COMPILE_MMX


								#if ASM_FACBK


								int DotMMX60(short *ind, short *oud)

								{

								int dotprod;


								#define reg0  mm0

								#define reg1  mm1

								#define reg2  mm2

								#define acc0  mm6


								#define inx	  esi

								#define oux	  edi

								#define dot   eax

								#define jcnt  ebx


								#define l(n)  ASM movq    reg##n,QP[inx+8*n]

								#define m(n)  ASM pmaddwd reg##n,QP[oux+8*n]

								#define a(n)  ASM paddd   acc0,reg##n


								  ASM

								  {

								    mov		inx,ind;

								    mov		oux,oud;

									mov		jcnt,5;

								  }


								//Begin loop


								 ASM pxor	acc0,acc0;

								 ASM pxor	reg1,reg1;   //make first a(1) a nop

								 ASM pxor	reg2,reg2;   //make first a(2) a nop


								inner:

								//------------------

								l(0);

										a(1);

								m(0);

										l(1);

												a(2);

										m(1);

												l(2);

								a(0);

												m(2);

								//-------------------


								ASM add inx,24;

								ASM add oux,24;


								ASM sub jcnt,1;

								ASM jg inner;


								a(1);

								a(2);


								ASM

								{

									//Add the two halves of acc0

								    movq  reg0,acc0;

								    psrlq acc0,32;

								    paddd acc0,reg0;

									movd  dot,acc0; //store

									mov   dotprod,dot

								}


								ASM emms;


								 return(dotprod);

								#undef reg0

								#undef reg1

								#undef reg2

								#undef acc0


								#undef inx

								#undef oux

								#undef dot

								#undef jcnt


								#undef l

								#undef m

								#undef a


								}


								#else


								int DotMMX60(short *in, short *out)

								{

								int dotprod;

								int j;


								dotprod=0;

								for(i=0; i < 60; i++)

								{


								  dotprod += in[j]*out[j];


								}


								return(dotprod);

								}


								#endif


								#if ASM_FACBK


								void DupRezBuf(short *rezbuf, short *reztemp)

								{

								  #define reg0	mm0

								  #define reg1	mm1

								  #define reg2	mm2

								  #define reg3	mm3


								  #define rbuf	edi

								  #define rztmp esi


									//rezbuf duplication operations

								  #define cr(r0,r1) ASM movq reg##r0,reg##r1

								  #define uph(r0)   ASM punpckhwd reg##r0,reg##r0

								  #define upl(r0)   ASM punpcklwd reg##r0,reg##r0

								  #define sto(r0,i) ASM movq QP[rztmp+8*i],reg##r0

								  #define sl(r0)	ASM psllw reg##r0,1

								  #define l(r0)		ASM movq reg##r0,QP[rbuf]


								  //Duplicate first 4 rezbuf values 4 times each

								  //	and store into 4 QWORDS in reztemp

								  //Multiply by two while we're at it

								  ASM mov rbuf,rezbuf;

								  ASM mov rztmp,reztemp;


									l(0);

									sl(0);

									cr(2,0);

									 uph(0);

									upl(2);

									 cr(1,0);

									cr(3,2);

									 uph(0);

									sto(0,0);

									 upl(1);

									sto(1,1);

									 uph(2);

									sto(2,2);

									 upl(3);

									sto(3,3);


								ASM emms;


								}

								  #undef reg0

								  #undef reg1

								  #undef reg2

								  #undef reg3


								  #undef rbuf

								  #undef rztmp


								  #undef cr

								  #undef uph

								  #undef upl

								  #undef sto

								  #undef sl

								  #undef l


								#endif


								#if ASM_FACBK


								void FBufCalcInt(short *fi, short *fo, short *impresp, short *reztemp, int n)

								{

								  #define reg0	mm0

								  #define reg1	mm1

								  #define reg2	mm2

								  #define reg3	mm3

								  #define reg4	mm4

								  #define reg5	mm5

								  #define reg6	mm6

								  #define reg7	mm1

								  #define reg8	mm7


								  #define fbufi	esi

								  #define rbuf	edi

								  #define imp	edx

								  #define fbufo	ebx

								  #define jcnt	ecx

								  #define rzv   eax


								  //Diagonal array operations

								  #define l1(r0,j)  ASM movq reg##r0,QP[fbufi+8*j]

								  #define l2(r0,j)  ASM movq reg##r0,QP[fbufi+8+8*j]

								  #define c3(r0)    ASM movq reg##r0,QP[rbuf+8*rzv]

								  #define m1(r0,j)  ASM pmulhw reg##r0,QP[imp+8+8*j]

								  #define a1(r0,r1) ASM paddsw reg##r0,reg##r1

								  #define sto(r0,j) ASM movq QP[fbufo+8+8*j], reg##r0

								  #define s1(r0)    ASM psrlq reg##r0,48

								  #define s2(r0)    ASM psllq reg##r0,16

								  #define or(r0,r1) ASM por reg##r0,reg##r1


								//Loop setup

								ASM

								{

									mov rbuf,reztemp

									mov jcnt,5;

									mov fbufi,fi;

									mov fbufo,fo;

									mov imp,impresp;

									mov rzv,n

								}

								//Compute initial values

								//Zero-th QWORD is different


								ASM

								{

									movq	reg0,QP[fbufo];

									psllq	reg0,48;

									psrlq	reg0,48;


								//zero-th part of fbufo now in reg0

									movq    reg2,QP[rbuf+8*rzv];

									pmulhw	reg2,QP[imp+2];

									paddsw	reg2,QP[fbufi];

									psllq	reg2,16;

									por		reg0,reg2;


									movq	QP[fbufo],reg0;

								}

								//begin loop

									l2(0,0);

									l1(1,0);

									s2(0);

									s1(1);

									c3(2);

									m1(2,0);

											l2(3,1);

											l1(4,1);

											s2(3);

											s1(4);

									or(0,1);


								inner:

								 //-------------------------

													l2(6,2);

									a1(0,2);

											c3(5);

											m1(5,1);

											or(3,4);

													l1(7,2);

													s2(6);

									sto(0,0);

													s1(7);

									l2(0,3);

											a1(3,5);

													c3(8);

													m1(8,2);

													or(6,7);

									l1(1,3);

									s2(0);

											sto(3,1);

									s1(1);

											l2(3,4);

													a1(6,8);

									c3(2);

									m1(2,3);

									or(0,1);

											l1(4,4);

											s2(3);

													sto(6,2);

											s1(4);

								 //-------------------------


								 ASM add fbufo,24;

								 ASM add fbufi,24;

								 ASM add imp,24;


								 ASM sub jcnt,1;

								 ASM jg inner;


								 ASM emms;

								}

								  #undef reg0

								  #undef reg1

								  #undef reg2

								  #undef reg3

								  #undef reg4

								  #undef reg5

								  #undef reg6

								  #undef reg7

								  #undef reg8


								  #undef fbufi

								  #undef rbuf

								  #undef imp

								  #undef fbufo

								  #undef jcnt

								  #undef rzv


								  #undef l1

								  #undef l2

								  #undef c3

								  #undef m1

								  #undef a1

								  #undef sto

								  #undef s1

								  #undef s2

								  #undef or


								#else


								void FBufCalcInt(short *fi, short *fo, short *impresp, short *rezbuf, short *reztemp, int n)

								{

								  long Acc0l;

								  int j;


								  #define MAX16  32767

								  #define MIN16 -32768


									for(j=1; j<SubFrLen; j++)

									{

										Acc0l = fi[j-1];

										Acc0l += (((rezbuf[4-n]<<1)*impresp[j]))>>16;

										if	   (Acc0l > MAX16) Acc0l = MAX16;

									    else if(Acc0l < MIN16) Acc0l = MIN16;

										fo[j] = (short)(Acc0l);

									}

								}

								#endif


								#if ASM_FACBK

								//#if 0


								void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max)

								{


								#define reg0  mm0

								#define reg1  mm1

								#define reg2  mm2

								#define reg3  mm3

								#define reg4  mm4

								#define acc1  mm5

								#define acc0  mm6

								#define gdx	  mm3

								#define gd	  mm7

								#define icx	  mm2


								#define lp	  esi

								#define sp	  edi

								#define maxx  eax

								#define gidx  edx

								#define icnt  ebx


								// In the following macros, 'n' is the column number.

								#define l(n)  ASM movq    reg##n,QP[lp+8*n]

								#define m(n)  ASM pmaddwd reg##n,QP[sp+8*n]

								#define a(n)  ASM paddd   acc0,reg##n


								  ASM

								  {

								    mov		sp,spint;

								    mov		lp,lpint;

									mov		icnt,numvecs;

									mov		gidx,gid;

									mov	    maxx,max;

								  }


								  ASM movd	gd,numvecs;//load gd with top codebook index

								  ASM movd  acc1,DP[maxx];//load acc1 with previous max


								//Begin loop


								outer:

								//inner:

								  ASM pxor  acc0,acc0;

								  ASM pxor	reg1,reg1;   //make first a(1) a nop

								  ASM pxor	reg2,reg2;   //make first a(2) a nop

								//--------------------------

								l(0);

												a(1);

								m(0);

										l(1);

														a(2);

										m(1);

												l(2);

								a(0);

												m(2);

														l(3);

										a(1);

														m(3);

								l(4);

												a(2);

								m(4);


								ASM add	 sp,40;

														a(3);

								ASM movq  gdx,gd;

								ASM movd  icx,icnt;


								a(4);


								  ASM

								  {

								    movq  reg0,acc0;

								    psrlq acc0,32;


									pxor  gd,icx;//gd=MASK

								    paddd acc0,reg0;


									movq    reg0,acc0; //copy acc0

									movq    reg1,acc1; //copy old max


									pxor    reg1,acc0

									pcmpgtd reg0,acc1; //reg0=0xFF or 0x00

									pand    reg1,reg0; //reg1=MASK or 0x00

									pxor    acc1,reg1; //acc1=acc0 or acc1


									pand	gd,reg0; //gd=MASK or 0x00

									pxor	gd,gdx;  //gd=icnt or previous value


								    sub icnt,1;

								    jg  outer;

								  }


								  ASM movd  reg0,numvecs;

								  ASM psubd reg0,gd;

								  ASM movd  DP[gidx],reg0;//return gid

								  ASM movd  DP[maxx],acc1;//return max

								  ASM emms;


								}

								#undef reg0

								#undef reg1

								#undef reg2

								#undef reg3

								#undef reg4

								#undef acc1

								#undef acc0

								#undef gdx

								#undef gd

								#undef icx


								#undef lp

								#undef sp

								#undef maxx

								#undef gidx

								#undef icnt


								#undef l

								#undef m

								#undef a


								#else


								void CodeBkSrch(short *lpint, short *spint, int numvecs, int *gid, int *max)

								{

								int acc0;

								int i,j;


								for(i=0; i < numvecs; i++)

								{

									acc0 = 0;


									for(j=0; j<20; j++)

										acc0 += lpint[j]*spint[j];


									if (acc0 > *max)

									{

										*max = acc0;

										*gid = i;

									}


									spint += 20;

								}


								}


								#endif


								#endif //COMPILE_MMX