|
|
//
// ITU-T G.723 Floating Point Speech Coder ANSI C Source Code. Version 1.00
// copyright (c) 1995, AudioCodes, DSP Group, France Telecom,
// Universite de Sherbrooke, Intel Corporation. All rights reserved.
//
#include <stdio.h>
#include <math.h>
#include "opt.h"
#include "typedef.h"
#include "cst_lbc.h"
#include "tab_lbc.h"
#include "util_lbc.h"
#include "lsp.h"
#include "timer.h"
#include "mmxutil.h"
#if COMPILE_MMX
// This file includes all the Lsp related functions
//--------------------------------------------------------------
int mult(short x, short y) { return ( ((int)x)*((int)y) >> 16 ); } //--------------------------------------------------------------
int LspSearchInt(short *Lspw, short *LspTab) {
#if ASM_SVQ
int mem8000[2] = {0x80008000,0x80008000}, zero[2] = {0,0}; short maxes[4],mx; int retu; int *ptr,tmp,t,i,k;
#define lsp esi
#define tab edi
#define idx edx
#define pidx eax // packed index: n+3 n+2 n+1 n
#define maxi ecx
#define r0 mm0
#define r1 mm1
#define r2 mm2
#define r3 mm3
#define r4 mm4
#define max mm5
// Expand Lspw table so it's 0000 1111 2222 3333 4444 5555 6666 7777
ptr = (int *)Lspw; k = 14; for (i=7; i>=0; i--) { t = Lspw[i]; t = t & 0xffff; tmp = t; tmp = (tmp << 16) | t; ptr[k] = ptr[k+1] = tmp; k -= 2; }
// Need LspTab to be ordered 0 4 8 12 1 5 9 13 etc.
// so that lo word of each qword sees 0123, next sees 4567, etc.
//
// 'idx' counts iterations, so it goes 0 to 256 in steps of 4.
// The 4 LspTab sets at any given time are 8*idx, 8*idx+8, +16, +24
// Lspw[n] is lsp+8*n, as defined below
#define a(n) [tab+8*idx+8*n]
#define b(n) [lsp+8*n]
ASM { push lsp; push tab; push idx; push pidx; push maxi;
mov lsp,Lspw; mov tab,LspTab; xor idx,idx; xor maxi,maxi; mov pidx,003020100h; movq max,mem8000;
/*
The code below is interleaved with k=1. The structure is: four levels of indentation, one for each of the 4 terms of the sum. The instructions completely left-justified are the part of the loop that's wrapped around on itself. Note that the code reads 8 bytes past the end of the LspTab that's passed to it. This is made o.k. by making the table that's passed to it have 8 dummy bytes at the end. */
// Start up the pipeline
movq r0,a(0); movq r1,r0; pmulhw r0,b(4); psubsw r1,b(0); movq r2,a(1); movq r3,r2; psllw r0,2; pmulhw r0,r1; pmulhw r2,b(5); psubsw r3,b(1); movq r1,a(2); psllw r2,2; pmulhw r2,r3; movq r3,r1; pmulhw r1,b(6); psubsw r3,b(2); paddw r0,r2; movq r2,a(3); psllw r1,2; pmulhw r1,r3; movq r3,r2; pmulhw r2,b(7); psubsw r3,b(3); paddw r1,r0;
loop1: movq r0,a(4); movq r4,r1; // save accum so not wiped out by first half of loop
psllw r2,2; movq r1,r0; pmulhw r2,r3; pmulhw r0,b(4);
psubsw r1,b(0); paddw r4,r2; // now final answer is in r4
movq r2,a(5);
paddw r4,mem8000 // make final sum unsigned
movq r3,r2; // 0123
psllw r0,2; psubusw max,r4 // start to compute max
pmulhw r0,r1; // 0.23
pmulhw r2,b(5); paddw max,r4 // max now done
psubsw r3,b(1); pcmpeqw r4,max // now 1111's means a new max was found
movq r1,a(6); // 0123
packsswb r4,r4; // put all fields in low 32 bits
psllw r2,2; movd ebx,r4; pmulhw r2,r3; // 012.
xor ebx,0ffffffffh; // invert mask
and maxi,ebx; // get old index to keep
movq r3,r1; // 0123
pmulhw r1,b(6);
psubsw r3,b(2);
xor ebx,0ffffffffh; // invert mask
paddw r0,r2; // 01.3
and ebx,pidx; // get new index
psllw r1,2; movq r2,a(7);// 0123
pmulhw r1,r3; // 012.
movq r3,r2; // 0123
or maxi,ebx; // now maxi is done
pmulhw r2,b(7);
psubsw r3,b(3);
paddw r1,r0; // .123
add idx,4;
add pidx,004040404h;
cmp idx,256; jl loop1;
psubw max,mem8000; mov retu,maxi; movq maxes,max;
pop maxi; pop pidx; pop idx; pop tab; pop lsp; }
// find which of the 4 maxes is the max, and return the appropriate
// one of the 4 maxindices.
mx = maxes[0]; t = 0; if (maxes[1] >= mx) { mx = maxes[1]; t = 8; } if (maxes[2] >= mx) { mx = maxes[2]; t = 16; } if (maxes[3] >= mx) { mx = maxes[3]; t = 24; }
ASM emms; retu = (retu >> t) & 0xff;
return(retu);
#undef a
#undef b
#undef idx
#undef lsp
#undef tab
#undef max
#undef maxi
#else // if assembly code not selected, use C code
int Indx[4],i,s,ret; short Max[4],Err,mx; short m0,m1,m2,m3,m4,m5,m6,m7,t;
for (i=0; i<4; i++) { Max[i] = Indx[i] = 0; } for (i=0; i < LspCbSize; i++) { s = (i&3);
m0 = mult(Lspw[4],LspTab[s+0]); t = LspTab[s+0]-Lspw[0]; m1 = mult(t,m0<<2); m2 = mult(Lspw[5],LspTab[s+4]); t = LspTab[s+4]-Lspw[1]; m3 = mult(t,m2<<2); m4 = mult(Lspw[6],LspTab[s+8]); t = LspTab[s+8]-Lspw[2]; m5 = mult(t,m4<<2); m6 = mult(Lspw[7],LspTab[s+12]); t = LspTab[s+12]-Lspw[3]; m7 = mult(t,m6<<2);
Err = m1+m3+m5+m7;
if (Err >= Max[s]) { Max[s] = Err; Indx[s] = i; }
if (s==3) LspTab += 16; } mx = Max[0]; ret = Indx[0]; if (Max[1] >= mx) { mx = Max[1]; ret = Indx[1]; } if (Max[2] >= mx) { mx = Max[2]; ret = Indx[2]; } if (Max[3] >= mx) { mx = Max[3]; ret = Indx[3]; }
return(ret);
#endif
} //--------------------------------------------------------------
Word32 Svq_Int(float *Lsp, float *Wvect) { #define LSP_SCALE 256
int i;
Word32 Rez; int z; short Wint[LpcOrder],LspTemp[LpcOrder]; DECLARE_SHORT(Lspw,32);
ALIGN_ARRAY(Lspw);
for (i=0; i<LpcOrder; i++) { z = (int)(LSP_SCALE*2*Lsp[i]); if (z > 32767) LspTemp[i] = 32767; else if (z < -32768) LspTemp[i] = -32768; else LspTemp[i] = z; } FloatToShortScaled(Wvect,Wint,10,0); Rez = (Word32) 0;
// For each of the 3 bands
Lspw[0] = LspTemp[0]; Lspw[1] = LspTemp[1]; Lspw[2] = LspTemp[2]; Lspw[3] = 0; Lspw[4] = -Wint[0]; Lspw[5] = -Wint[1]; Lspw[6] = -Wint[2]; Lspw[7] = 0; Rez = LspSearchInt(Lspw,LspTableInt);
Lspw[0] = LspTemp[3]; Lspw[1] = LspTemp[4]; Lspw[2] = LspTemp[5]; Lspw[3] = 0; Lspw[4] = -Wint[3]; Lspw[5] = -Wint[4]; Lspw[6] = -Wint[5]; Lspw[7] = 0; Rez = (Rez<<8) + LspSearchInt(Lspw,&LspTableInt[1024]);
Lspw[0] = LspTemp[6]; Lspw[1] = LspTemp[7]; Lspw[2] = LspTemp[8]; Lspw[3] = LspTemp[9]; Lspw[4] = -Wint[6]; Lspw[5] = -Wint[7]; Lspw[6] = -Wint[8]; Lspw[7] = -Wint[9]; Rez = (Rez<<8) + LspSearchInt(Lspw,&LspTableInt[2048]);
return Rez;
} #endif
//---------------------------------------------------------------
float Polynomial(float *Lpq, int CosPtr) { return(Lpq[LpcOrder]*CosineTable[0] + Lpq[LpcOrder-2]*CosineTable[CosPtr] + Lpq[LpcOrder-4]*CosineTable[(CosPtr*2)&(CosineTableSize-1)] + Lpq[LpcOrder-6]*CosineTable[(CosPtr*3)&(CosineTableSize-1)] + Lpq[LpcOrder-8]*CosineTable[(CosPtr*4)&(CosineTableSize-1)] + Lpq[LpcOrder-10]*CosineTable[(CosPtr*5)&(CosineTableSize-1)]); }
//--------------------------------------------------------------
void AtoLsp(float *LspVect, float *Lpc, float *PrevLsp) { int i,j,k; int LspCnt; float Lpq[LpcOrder+2]; float PrevVal,CurrVal,AbsPrev,AbsCurr;
// Small additional bandwidth expansion
for (i=0; i < LpcOrder; i++) LspVect[i] = Lpc[i]*BandExpTable[i];
// Compute Lp and Lq
Lpq[0] = Lpq[1] = 1.0f;
for (i=0; i < LpcOrder/2; i++) { Lpq[2*i+2] = -Lpq[2*i+0] - LspVect[i] - LspVect[LpcOrder-1-i]; Lpq[2*i+3] = Lpq[2*i+1] - LspVect[i] + LspVect[LpcOrder-1-i]; } Lpq[LpcOrder+0] *= 0.5f; Lpq[LpcOrder+1] *= 0.5f;
// Do first evaluation
k = 0; LspCnt = 0; PrevVal = Polynomial(Lpq,0);
for (i=1; i < CosineTableSize/2; i++) { // Evaluate the polynomial
CurrVal = Polynomial(&Lpq[k],i);
// Test for sign change
if ((asint(CurrVal) ^ asint(PrevVal)) < 0) { AbsPrev = (float)fabs(PrevVal); AbsCurr = (float)fabs(CurrVal);
LspVect[LspCnt++] = (i-1 + AbsPrev/(AbsPrev+AbsCurr));
// Check if all found
if (LspCnt == LpcOrder) break;
// Switch the pointer, evaluate again
k ^= 1; CurrVal = Polynomial(&Lpq[k],i); } PrevVal = CurrVal; }
// Check if all Lsp found
if (LspCnt != LpcOrder) { for (j=0; j < LpcOrder; j++) LspVect[j] = PrevLsp[j]; } return; } //--------------------------------------------------------------
Word32 Lsp_Qnt(float *CurrLsp, float *PrevLsp, int UseMMX) { int i;
float Wvect[LpcOrder]; float Min,Tmp;
// Compute the weighting vector
Wvect[0] = 1.0f/(CurrLsp[1] - CurrLsp[0]); Wvect[LpcOrder-1] = 1.0f/(CurrLsp[LpcOrder-1] - CurrLsp[LpcOrder-2]);
for (i=1; i < LpcOrder-1; i++) { Min = CurrLsp[i+1] - CurrLsp[i]; Tmp = CurrLsp[i] - CurrLsp[i-1]; if (Tmp < Min) Min = Tmp;
if (Min > 0.0f) Wvect[i] = 1.0f/Min; else Wvect[i] = 1.0f; }
// Generate predicted vector as (DC-removed-Curr) - b*(DC-removed-Prev)
CurrLsp[0] = (CurrLsp[0] - LspDcTable[0]) - LspPred0*(PrevLsp[0] - LspDcTable[0]); CurrLsp[1] = (CurrLsp[1] - LspDcTable[1]) - LspPred0*(PrevLsp[1] - LspDcTable[1]); CurrLsp[2] = (CurrLsp[2] - LspDcTable[2]) - LspPred0*(PrevLsp[2] - LspDcTable[2]); CurrLsp[3] = (CurrLsp[3] - LspDcTable[3]) - LspPred0*(PrevLsp[3] - LspDcTable[3]); CurrLsp[4] = (CurrLsp[4] - LspDcTable[4]) - LspPred0*(PrevLsp[4] - LspDcTable[4]); CurrLsp[5] = (CurrLsp[5] - LspDcTable[5]) - LspPred0*(PrevLsp[5] - LspDcTable[5]); CurrLsp[6] = (CurrLsp[6] - LspDcTable[6]) - LspPred0*(PrevLsp[6] - LspDcTable[6]); CurrLsp[7] = (CurrLsp[7] - LspDcTable[7]) - LspPred0*(PrevLsp[7] - LspDcTable[7]); CurrLsp[8] = (CurrLsp[8] - LspDcTable[8]) - LspPred0*(PrevLsp[8] - LspDcTable[8]); CurrLsp[9] = (CurrLsp[9] - LspDcTable[9]) - LspPred0*(PrevLsp[9] - LspDcTable[9]);
// Do the SVQ
#if COMPILE_MMX
if (UseMMX) return Svq_Int(CurrLsp, Wvect); else #endif
return Lsp_Svq(CurrLsp, Wvect); }
//--------------------------------------------------------------
Word32 Lsp_Svq(float *Lsp, float *Wvect) { int i,k;
Word32 Rez; int Indx,Start,Dim; float *LspQntPnt; float Max,Err,lsp0,lsp1,lsp2,lsp3,w0,w1,w2,w3; float LspTemp[LpcOrder];
for (i=0; i<LpcOrder; i++) LspTemp[i] = 2.0f*Lsp[i]; Rez = (Word32) 0;
// For each of the 3 bands
for (k=0; k < LspQntBands; k++) {
// Initialize the search
Max = 0.0f; //-1.0f;
Indx = 0; LspQntPnt = BandQntTable[k]; Start = BandInfoTable[k][0]; Dim = BandInfoTable[k][1];
lsp0 = LspTemp[Start+0]; lsp1 = LspTemp[Start+1]; lsp2 = LspTemp[Start+2]; w0 = Wvect[Start+0]; w1 = Wvect[Start+1]; w2 = Wvect[Start+2]; if (k < 2) { for (i=0; i < LspCbSize; i++) { Err = (lsp0 - LspQntPnt[0])*w0*LspQntPnt[0] + (lsp1 - LspQntPnt[1])*w1*LspQntPnt[1] + (lsp2 - LspQntPnt[2])*w2*LspQntPnt[2];
LspQntPnt += 3;
if (asint(Err) > asint(Max)) { Max = Err; Indx = i; } } } else { lsp3 = LspTemp[Start+3]; w3 = Wvect[Start+3]; for (i=0; i < LspCbSize; i++) { Err = (lsp0 - LspQntPnt[0])*w0*LspQntPnt[0] + (lsp1 - LspQntPnt[1])*w1*LspQntPnt[1] + (lsp2 - LspQntPnt[2])*w2*LspQntPnt[2] + (lsp3 - LspQntPnt[3])*w3*LspQntPnt[3];
LspQntPnt += 4;
if (asint(Err) > asint(Max)) { Max = Err; Indx = i; } } } Rez = (Rez << 8) | Indx; }
return Rez; }
//--------------------------------------------------------------
Flag Lsp_Inq(float *Lsp, float *PrevLsp, Word32 LspId, int Crc) { int i,j;
float *LspQntPnt; float Lprd,Scon,Tmpf,Scon2; int Tmp; Flag Test;
if (Crc == 0) { Scon = 2.0f; Lprd = LspPred0; } else { LspId = (Word32) 0; Scon = 4.0f; Lprd = LspPred1; } Scon2 = Scon - 0.03125f;
// Reconstruct the LSP vector
for (i=LspQntBands-1; i >= 0; i--) { Tmp = LspId & (Word32) 0x000000ff; LspId >>= 8;
LspQntPnt = BandQntTable[i];
for (j=0; j < BandInfoTable[i][1]; j++) Lsp[BandInfoTable[i][0] + j] = LspQntPnt[Tmp*BandInfoTable[i][1] + j]; }
// Add predicted vector and DC to decoded vector
for (j=0; j < LpcOrder; j++) Lsp[j] = Lsp[j] + (PrevLsp[j] - LspDcTable[j])*Lprd + LspDcTable[j];
// Perform the stability check
for (i=0; i < LpcOrder; i++) {
// Test the first and last one
if (Lsp[0] < 3.0) Lsp[0] = 3.0f;
if (Lsp[LpcOrder-1] > 252.0f) Lsp[LpcOrder-1] = 252.0f;
// Test the others
for (j=1; j < LpcOrder; j++) { Tmpf = Scon + Lsp[j-1] - Lsp[j]; if (Tmpf > 0) { Tmpf *= 0.5f; Lsp[j-1] -= Tmpf; Lsp[j] += Tmpf; } } // Test if stable
Test = False; for (j=1; j < LpcOrder; j++) if ((Lsp[j] - Lsp[j-1]) < Scon2) Test = True;
if (Test == False) break; } if (Test == True) for (j=0; j < LpcOrder; j++) Lsp[j] = PrevLsp[j]; return Test; }
//--------------------------------------------------------------
void Lsp_Int(float *QntLpc, float *CurrLsp, float *PrevLsp) { int i,j;
float *Dpnt; float Fac[4] = {0.25f, 0.5f, 0.75f, 1.0f};
Dpnt = QntLpc; for (i=0; i < SubFrames; i++) { // Interpolate
for (j=0; j < LpcOrder; j++) Dpnt[j] = (1.0f - Fac[i])*PrevLsp[j] + Fac[i]*CurrLsp[j];
// Convert to Lpc
LsptoA(Dpnt); Dpnt += LpcOrder; }
// Copy the Lsp vector
for (i=0; i < LpcOrder; i++) PrevLsp[i] = CurrLsp[i]; }
//--------------------------------------------------------------
void LsptoA(float *Lsp) { int i,j;
float P[LpcOrder/2+1]; float Q[LpcOrder/2+1]; float Fac[(LpcOrder/2)-2] = {1.0f,0.5f,0.25f};
// Convert Lsp's to cosines
for (i=0; i < LpcOrder; i++) { j = MyFloor(Lsp[i]); Lsp[i] = -(CosineTable[j] + (CosineTable[j+1]-CosineTable[j])*(Lsp[i]-j)); }
// Init P and Q. Note that P,Q * 2^26 correspond to fixed-point code
P[0] = 0.5f; P[1] = Lsp[0] + Lsp[2]; P[2] = 1.0f + 2.0f*Lsp[0]*Lsp[2];
Q[0] = 0.5f; Q[1] = Lsp[1] + Lsp[3]; Q[2] = 1.0f + 2.0f*Lsp[1]*Lsp[3];
// Compute all the others
for (i=2; i < LpcOrder/2; i++) { P[i+1] = P[i-1] + P[i]*Lsp[2*i+0]; Q[i+1] = Q[i-1] + Q[i]*Lsp[2*i+1];
// All update
for (j=i; j >= 2; j--) { P[j] = P[j-1]*Lsp[2*i+0] + 0.5f*(P[j]+P[j-2]); Q[j] = Q[j-1]*Lsp[2*i+1] + 0.5f*(Q[j]+Q[j-2]); }
// Update PQ[01]
P[0] = P[0]*0.5f; Q[0] = Q[0]*0.5f;
P[1] = (P[1] + Lsp[2*i+0]*Fac[i-2])*0.5f; Q[1] = (Q[1] + Lsp[2*i+1]*Fac[i-2])*0.5f; }
// Convert to Lpc
for (i=0; i < LpcOrder/2; i++) { Lsp[i] = (-P[i] - P[i+1] + Q[i] - Q[i+1])*8.0f; Lsp[LpcOrder-1-i] = (-P[i] - P[i+1] - Q[i] + Q[i+1])*8.0f; } }
|