Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1009 lines
24 KiB

//cb53.c - 5.3 rate codebook code
#include "opt.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <memory.h>
#include "typedef.h"
#include "cst_lbc.h"
#include "tab_lbc.h"
#include "util_lbc.h"
#include "exc_lbc.h"
#include "timer.h"
#include "mmxutil.h"
void fourPulseFlt (float *rr, float *Dn, float thres, int ip[], int *shiftPtr);
//--------------------------------------------------------
int extra;
void reset_max_time(void)
{
extra = 120;
}
//------------------------------------------------------------
int ACELP_LBC_code(float X[], float h[], int T0, float code[],
int *ind_gain, int *shift, int *sign, float gain_T0, int flags)
{
int i, index;
float gain_q;
float Dn[SubFrLen2], tmp_code[SubFrLen2];
float rr[DIM_RR];
// Include fixed-gain pitch contribution into impulse resp. h[]
if (T0 < SubFrLen-2)
for (i = T0; i < SubFrLen; i++)
h[i] += gain_T0*h[i-T0];
// Compute correlations of h[] needed for the codebook search
Cor_h(h, rr);
// Compute correlation of target vector with impulse response.
Cor_h_X(h, X, Dn);
// Find codebook index
index = D4i64_LBC(Dn, rr, h, tmp_code, rr, shift, sign, flags);
// Compute innovation vector gain.
// Include fixed-gain pitch contribution into code[].
*ind_gain = G_code(X, rr, &gain_q);
for (i=0; i < SubFrLen; i++)
code[i] = tmp_code[i]*gain_q;
if(T0 < SubFrLen-2)
for (i=T0; i < SubFrLen; i++)
code[i] += code[i-T0]*gain_T0;
return index;
}
//---------------------------------------------------------------
void Cor_h(float *H, float *rr)
{
// Compute correlations of h[] needed for the codebook search.
// h[] :Impulse response.
// rr[] :Correlations.
float *rri0i0, *rri1i1, *rri2i2, *rri3i3;
float *rri0i1, *rri0i2, *rri0i3;
float *rri1i2, *rri1i3, *rri2i3;
float *p0, *p1, *p2, *p3;
float cor, *h2;
int i, k, m, t;
float h[SubFrLen2];
for(i=0; i<SubFrLen; i++)
h[i+4] = H[i];
for(i=0; i<4; i++)
h[i] = 0.0f;
// Init pointers
rri0i0 = rr;
rri1i1 = rri0i0 + NB_POS;
rri2i2 = rri1i1 + NB_POS;
rri3i3 = rri2i2 + NB_POS;
rri0i1 = rri3i3 + NB_POS;
rri0i2 = rri0i1 + MSIZE;
rri0i3 = rri0i2 + MSIZE;
rri1i2 = rri0i3 + MSIZE;
rri1i3 = rri1i2 + MSIZE;
rri2i3 = rri1i3 + MSIZE;
// Compute rri0i0[], rri1i1[], rri2i2[] and rri3i3[]
cor = 0.0f;
m = 0;
for(i=NB_POS-1; i>=0; i--)
{
cor += h[m+0]*h[m+0] + h[m+1]*h[m+1]; rri3i3[i] = cor*0.5f;
cor += h[m+2]*h[m+2] + h[m+3]*h[m+3]; rri2i2[i] = cor*0.5f;
cor += h[m+4]*h[m+4] + h[m+5]*h[m+5]; rri1i1[i] = cor*0.5f;
cor += h[m+6]*h[m+6] + h[m+7]*h[m+7]; rri0i0[i] = cor*0.5f;
m += 8;
}
// Compute elements of: rri0i1[], rri0i3[], rri1i2[] and rri2i3[]
h2 = h+2;
p3 = rri2i3 + MSIZE-1;
p2 = rri1i2 + MSIZE-1;
p1 = rri0i1 + MSIZE-1;
p0 = rri0i3 + MSIZE-2;
for (k=0; k<NB_POS; k++)
{
cor = 0.0f;
m = 0;
t = 0;
for(i=k+1; i<NB_POS; i++)
{
cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
cor += h[m+6]*h2[m+6] + h[m+7]*h2[m+7]; p0[t] = cor;
t -= (NB_POS+1);
m += 8;
}
cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
h2 += STEP;
p3 -= NB_POS;
p2 -= NB_POS;
p1 -= NB_POS;
p0 -= 1;
}
// Compute elements of: rri0i2[], rri1i3[]
h2 = h+4;
p3 = rri1i3 + MSIZE-1;
p2 = rri0i2 + MSIZE-1;
p1 = rri1i3 + MSIZE-2;
p0 = rri0i2 + MSIZE-2;
for (k=0; k<NB_POS; k++)
{
cor = 0.0f;
m = 0;
t = 0;
for(i=k+1; i<NB_POS; i++)
{
cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
cor += h[m+6]*h2[m+6] + h[m+7]*h2[m+7]; p0[t] = cor;
t -= (NB_POS+1);
m += 8;
}
cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
h2 += STEP;
p3 -= NB_POS;
p2 -= NB_POS;
p1 -= 1;
p0 -= 1;
}
// Compute elements of: rri0i1[], rri0i3[], rri1i2[] and rri2i3[]
h2 = h+6;
p3 = rri0i3 + MSIZE-1;
p2 = rri2i3 + MSIZE-2;
p1 = rri1i2 + MSIZE-2;
p0 = rri0i1 + MSIZE-2;
for (k=0; k<NB_POS; k++)
{
cor = 0.0f;
m = 0;
t = 0;
for(i=k+1; i<NB_POS; i++)
{
cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
cor += h[m+2]*h2[m+2] + h[m+3]*h2[m+3]; p2[t] = cor;
cor += h[m+4]*h2[m+4] + h[m+5]*h2[m+5]; p1[t] = cor;
cor += h[m+6]*h2[m+6] + h[m+7]*h2[m+7]; p0[t] = cor;
t -= (NB_POS+1);
m += 8;
}
cor += h[m+0]*h2[m+0] + h[m+1]*h2[m+1]; p3[t] = cor;
h2 += STEP;
p3 -= NB_POS;
p2 -= 1;
p1 -= 1;
p0 -= 1;
}
return;
}
//---------------------------------------------------------------------------
void Cor_h_X(float h[],float X[],float D[])
{
int i;
for (i=0; i < SubFrLen; i++)
D[i] = DotProd(&X[i],h,(SubFrLen-i));
return;
}
//-------------------------------------------------------------------------
Find_Pulse4(float *Dn,float *rri3i3,float *ptr_ri0i3,float *ptr_ri1i3,
float *ptr_ri2i3,float *ptr, float ps2,float alp2,float *psbest,float *abest)
{
int k,bestk;
float ps3;
float a[16];
for (k=0; k<8; k++)
{
ps3 = ps2 + *ptr;
a[k] = alp2 + rri3i3[k] + ptr_ri0i3[k] + ptr_ri1i3[k] + ptr_ri2i3[k];
a[k+8] = ps3 * ps3;
ptr += STEP;
}
bestk = -1;
for (k=0; k<8; k++)
{
if((a[k+8] * (*abest)) > ((*psbest) * a[k]))
{
*psbest = a[k+8];
*abest = a[k];
bestk = k;
}
}
return(bestk);
}
//-------------------------------------------------------------------------
// routine D4i64_LBC
// ~~~~~~~~~
// Algebraic codebook for LBC.
// -> 17 bits; 4 pulses in a frame of 60 samples
//
// The code length is 60, containing 4 nonzero pulses i0, i1, i2, i3.
// Each pulses can have 8 possible positions (positive or negative):
//
// i0 (+-1) : 0, 8, 16, 24, 32, 40, 48, 56
// i1 (+-1) : 2, 10, 18, 26, 34, 42, 50, 58
// i2 (+-1) : 4, 12, 20, 28, 36, 44, 52, (60)
// i3 (+-1) : 6, 14, 22, 30, 38, 46, 54, (62)
//
// All the pulse can be shift by one.
// The last position of the last 2 pulse falls outside the
// frame and signifies that the pulse is not present.
//
// Input arguments:
//
// Dn[] Correlation between target vector and impulse response h[]
// rr[] Correlations of impulse response h[]
// h[] Impulse response of filters
//
// Output arguments:
//
// cod[] Selected algebraic codeword
// y[] Filtered codeword
// code_shift Shift of the codeword
// sign Signs of the 4 pulses.
//
// return: Index of selected codevector
//
// The threshold control if a section of the innovative
// codebook should be searched or not.
//
//--------------------------------------------------------------------
int D4i64_LBC(float Dn[], float rr[], float h[], float cod[],
float y[], int *code_shift, int *sign, int flags)
{
int ip[4];
int i0, i1, i2, i3, ip0, ip1, ip2, ip3;
int i, j;
int shif;
float means, max0, max1, max2, thres;
float *rri0i0,*rri1i1,*rri2i2,*rri3i3;
float *rri0i1,*rri0i2,*rri0i3;
float *rri1i2,*rri1i3,*rri2i3;
// float *ptr_ri0i0,*ptr_ri1i1,*ptr_ri2i2;
float *ptr_ri0i1,*ptr_ri0i2,*ptr_ri0i3;
float *ptr_ri1i2,*ptr_ri1i3,*ptr_ri2i3;
int p_sign[SubFrLen2/2];
// float p_sign[SubFrLen2/2],p_sign2[SubFrLen2/2];
// Init pointers
rri0i0 = rr;
rri1i1 = rri0i0 + NB_POS;
rri2i2 = rri1i1 + NB_POS;
rri3i3 = rri2i2 + NB_POS;
rri0i1 = rri3i3 + NB_POS;
rri0i2 = rri0i1 + MSIZE;
rri0i3 = rri0i2 + MSIZE;
rri1i2 = rri0i3 + MSIZE;
rri1i3 = rri1i2 + MSIZE;
rri2i3 = rri1i3 + MSIZE;
// Extend the backward filtered target vector by zeros
for (i=SubFrLen; i < SubFrLen2; i++)
Dn[i] = 0.0f;
// Chose the sign of the impulse.
for (i=0; i<SubFrLen; i+=2)
{
if((Dn[i] + Dn[i+1]) >= 0.0f)
{
p_sign[i/2] = 0x00000000;
// p_sign[i/2] = 1.0f;
// p_sign2[i/2] = 2.0f;
}
else
{
p_sign[i/2] = 0x80000000;
// p_sign[i/2] = -1.0f;
// p_sign2[i/2] = -2.0f;
Dn[i] = -Dn[i];
Dn[i+1] = -Dn[i+1];
}
}
p_sign[30] = p_sign[31] = 0x00000000;
// p_sign[30] = p_sign[31] = 1.0f;
// p_sign2[30] = p_sign2[31] = 2.0f;
// - Compute the search threshold after three pulses
// odd positions
// Find maximum of Dn[i0]+Dn[i1]+Dn[i2]
max0 = Dn[0];
max1 = Dn[2];
max2 = Dn[4];
for (i=8; i < SubFrLen; i+=STEP)
{
if (Dn[i] > max0) max0 = Dn[i];
if (Dn[i+2] > max1) max1 = Dn[i+2];
if (Dn[i+4] > max2) max2 = Dn[i+4];
}
max0 = max0 + max1 + max2;
// Find means of Dn[i0]+Dn[i1]+Dn[i]
means = 0.0f;
for (i=0; i < SubFrLen; i+=STEP)
means += Dn[i+4] + Dn[i+2] + Dn[i];
means *= 0.125f;
if (flags & SC_THRES)
thres = means*0.25f + max0*0.75f;
else
thres = means + (max0-means)*0.5f;
// even positions
// Find maximum of Dn[i0]+Dn[i1]+Dn[i2]
max0 = Dn[1];
max1 = Dn[3];
max2 = Dn[5];
for (i=9; i < SubFrLen; i+=STEP)
{
if (Dn[i] > max0) max0 = Dn[i];
if (Dn[i+2] > max1) max1 = Dn[i+2];
if (Dn[i+4] > max2) max2 = Dn[i+4];
}
max0 = max0 + max1 + max2;
// Find means of Dn[i0]+Dn[i1]+Dn[i2]
means = 0.0f;
for (i=1; i < SubFrLen; i+=STEP)
means += Dn[i+4] + Dn[i+2] + Dn[i];
means *= 0.125f;
if (flags & SC_THRES)
max1 = means*0.25f + max0*0.75f;
else
max1 = means + (max0-means)*0.5f;
// Keep maximum threshold between odd and even position
if(max1 > thres) thres = max1;
// Modification of rrixiy[] to take signs into account.
//TIMER_STAMP(a);
ptr_ri0i1 = rri0i1;
ptr_ri0i2 = rri0i2;
ptr_ri0i3 = rri0i3;
for(i0=0; i0<SubFrLen/2; i0+=STEP/2)
{
for(i1=2/2; i1<SubFrLen/2; i1+=STEP/2)
{
(int)*ptr_ri0i1++ = (asint(*ptr_ri0i1) ^ p_sign[i0] ^ p_sign[i1]);
(int)*ptr_ri0i2++ = (asint(*ptr_ri0i2) ^ p_sign[i0] ^ p_sign[i1+1]);
(int)*ptr_ri0i3++ = (asint(*ptr_ri0i3) ^ p_sign[i0] ^ p_sign[i1+2]);
}
}
ptr_ri1i2 = rri1i2;
ptr_ri1i3 = rri1i3;
for(i1=2/2; i1<SubFrLen/2; i1+=STEP/2)
{
for(i2=4/2; i2<SubFrLen2/2; i2+=STEP/2)
{
(int)*ptr_ri1i2++ = (asint(*ptr_ri1i2) ^ p_sign[i1] ^ p_sign[i2]);
(int)*ptr_ri1i3++ = (asint(*ptr_ri1i3) ^ p_sign[i1] ^ p_sign[i2+1]);
}
}
ptr_ri2i3 = rri2i3;
for(i2=4/2; i2<SubFrLen2/2; i2+=STEP/2)
{
for(i3=6/2; i3<SubFrLen2/2; i3+=STEP/2)
(int)*ptr_ri2i3++ = (asint(*ptr_ri2i3) ^ p_sign[i2] ^ p_sign[i3]);
}
//TIMER_STAMP(b);
fourPulseFlt(rr, Dn, thres, ip, code_shift);
//TIMER_STAMP(c);
ip0 = ip[0];
ip1 = ip[1];
ip2 = ip[2];
ip3 = ip[3];
shif = *code_shift;
// Set the sign of impulses
i0 = (p_sign[(ip0 >> 1)]>=0?1:-1);
i1 = (p_sign[(ip1 >> 1)]>=0?1:-1);
i2 = (p_sign[(ip2 >> 1)]>=0?1:-1);
i3 = (p_sign[(ip3 >> 1)]>=0?1:-1);
// Find the codeword corresponding to the selected positions
for(i=0; i<SubFrLen; i++)
cod[i] = 0.0f;
if(shif > 0)
{
ip0++;
ip1++;
ip2++;
ip3++;
}
//printf("%3d %3d %3d %3d\n",ip0*i0,ip1*i1,ip2*i2,ip3*i3);
cod[ip0] = (float)i0;
cod[ip1] = (float)i1;
if(ip2<SubFrLen)
cod[ip2] = (float)i2;
if(ip3<SubFrLen)
cod[ip3] = (float)i3;
// find the filtered codeword
for (i=0; i < SubFrLen; i++)
y[i] = 0.0f;
if(i0 > 0)
for(i=ip0, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] + h[j];
else
for(i=ip0, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] - h[j];
if(i1 > 0)
for(i=ip1, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] + h[j];
else
for(i=ip1, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] - h[j];
if(ip2<SubFrLen)
{
if(i2 > 0)
for(i=ip2, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] + h[j];
else
for(i=ip2, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] - h[j];
}
if(ip3<SubFrLen)
{
if(i3 > 0)
for(i=ip3, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] + h[j];
else
for(i=ip3, j=0; i<SubFrLen; i++, j++)
y[i] = y[i] - h[j];
}
// find codebook index; 17-bit address
*code_shift = shif;
*sign = 0;
if(i0 > 0) *sign += 1;
if(i1 > 0) *sign += 2;
if(i2 > 0) *sign += 4;
if(i3 > 0) *sign += 8;
i = ((ip3 >> 3) << 9) + ((ip2 >> 3) << 6) + ((ip1 >> 3) << 3) + (ip0 >> 3);
//TIMER_STAMP(d);
return i;
}
//--------------------------------------------------------------------
int G_code(float X[], float Y[], float *gain_q)
{
int i;
float xy, yy, gain_nq;
int gain;
float dist, dist_min;
// Compute scalar product <X[],Y[]>
xy = DotProd(X,Y,SubFrLen);
// Be sure xy < yy
if(xy <= 0)
{
gain = 0;
*gain_q =FcbkGainTable[gain];
return(gain);
}
// Compute scalar product <Y[],Y[]>
yy = DotProd(Y,Y,SubFrLen);
if (yy != 0.0f)
gain_nq = xy/yy * 0.5f;
else
gain_nq = 0.0f;
gain = 0;
dist_min = (float)fabs(gain_nq - FcbkGainTable[0]);
for (i=1; i <NumOfGainLev; i++)
{
dist = (float)fabs(gain_nq - FcbkGainTable[i]);
if (dist < dist_min)
{
dist_min = dist;
gain = i;
}
}
*gain_q = FcbkGainTable[gain];
return(gain);
}
//-------------------------------------------------------------------
// Search the optimum positions of the four pulses which maximize
// square(correlation) / energy
// The search is performed in four nested loops. At each loop, one
// pulse contribution is added to the correlation and energy.
//
// The fourth loop is entered only if the correlation due to the
// contribution of the first three pulses exceeds the preset
// threshold.
//-------------------------------------------------------------------
void fourPulseFlt (float *rr, float *Dn, float thres, int ip[], int *shifPtr){
// Default values
int ip0 = 0;
int ip1 = 2;
int ip2 = 4;
int ip3 = 6;
int shif = 0;
int i0, i1, i2;
int k, time;
int shift, bestk, lasti2, inc;
float psc = 0.0f;
float alpha = 1.0f;
float ps0, ps1, ps2, alp0;
float alp1, alp2;
float ps0a, ps1a, ps2a;
float *ptr_ri0i0,*ptr_ri1i1,*ptr_ri2i2;
float *ptr_ri0i1,*ptr_ri0i2,*ptr_ri0i3;
float *ptr_ri1i2,*ptr_ri1i3,*ptr_ri2i3;
float *rri0i0,*rri1i1,*rri2i2,*rri3i3;
float *rri0i1,*rri0i2,*rri0i3;
float *rri1i2,*rri1i3,*rri2i3;
float a[16];
float t1,t2,*pntr;
float dmax4, dmax5, dmax2, dmax3; //used for bypass
#if !OPT_PULSE4
int i3;
float ps3;
#endif
time = max_time + extra;
// Four loops to search innovation code.
// Init. pointers that depend on first loop
rri0i0 = rr;
rri1i1 = rri0i0 + NB_POS;
rri2i2 = rri1i1 + NB_POS;
rri3i3 = rri2i2 + NB_POS;
rri0i1 = rri3i3 + NB_POS;
rri0i2 = rri0i1 + MSIZE;
rri0i3 = rri0i2 + MSIZE;
rri1i2 = rri0i3 + MSIZE;
rri1i3 = rri1i2 + MSIZE;
rri2i3 = rri1i3 + MSIZE;
ptr_ri0i0 = rri0i0;
ptr_ri0i1 = rri0i1;
ptr_ri0i2 = rri0i2;
ptr_ri0i3 = rri0i3;
// Compute the Dn max's
dmax2 = dmax3 = dmax4 = dmax5 = -1000000.0f; //i.e., large negative number
for (k = 2; k<SubFrLen2; k+=STEP)
{
if (Dn[k] > dmax2) dmax2 = Dn[k];
if (Dn[k+1] > dmax3) dmax3 = Dn[k+1];
if (Dn[k+2] > dmax4) dmax4 = Dn[k+2];
if (Dn[k+3] > dmax5) dmax5 = Dn[k+3];
}
// first pulse loop
for (i0=0; i0 < SubFrLen; i0 +=STEP)
{
ps0 = Dn[i0];
ps0a = Dn[i0+1];
alp0 = *ptr_ri0i0++;
// Init. pointers that depand on second loop
ptr_ri1i1 = rri1i1;
ptr_ri1i2 = rri1i2;
ptr_ri1i3 = rri1i3;
ps1 = ps0 + dmax2 + dmax4;
ps1a = ps0a + dmax3 + dmax5;
if (asint(ps1) < asint(thres) && asint(ps1a) < asint(thres))
{
ptr_ri0i1 += NB_POS;
goto skipsecond;
}
// second pulse loop
for (i1=2; i1 < SubFrLen; i1 +=STEP)
{
ps1 = ps0 + Dn[i1];
ps1a = ps0a + Dn[i1+1];
alp1 = alp0 + *ptr_ri1i1++ + *ptr_ri0i1++;
// Init. pointers that depend on third loop
ptr_ri2i2 = rri2i2;
ptr_ri2i3 = rri2i3;
lasti2 = 4;
ps2 = ps1 + dmax4;
ps2a = ps1a + dmax5;
if (asint(ps2) < asint(thres) && asint(ps2a) < asint(thres))
{
i2 = 68;
goto skipthird;
}
// third pulse loop
for (i2 = 4; i2 < SubFrLen2; i2 +=STEP)
{
ps2 = ps1 + Dn[i2];
ps2a = ps1a + Dn[i2+1];
// Threshold test and 4th pulse loop. Since the probability of
// entering this is low, we cram as much of the 3rd-pulse-loop
// logic inside the threshold test. So the computation of shift,
// the choice of ps2 vs ps2a, the computation of alp2, and the
// incrementing of the 02,12,22 pointers are all done there.
if (asint(ps2) > asint(thres) || asint(ps2a) > asint(thres))
{
shift = 0;
if(asint(ps2a) > asint(ps2))
{
shift = 1;
ps2 = ps2a;
}
inc = (i2 - lasti2) >> 3;
lasti2 = i2;
ptr_ri0i2 += inc;
ptr_ri1i2 += inc;
ptr_ri2i2 += inc;
alp2 = alp1 + *ptr_ri2i2 + *ptr_ri0i2 + *ptr_ri1i2;
pntr = &Dn[6+shift];
#if OPT_PULSE4
ASM
{
push esi;
push ebx;
mov esi,pntr;
;// First half of first loop
fld DP [esi+4*8*0];
fld DP [esi+4*8*1];
fld DP [esi+4*8*2];
fld DP [esi+4*8*3];
fxch ST(3);
fadd ps2;
fxch ST(2);
fadd ps2;
fxch ST(1);
fadd ps2;
fxch ST(3);
fadd ps2;
fxch ST(2);
fmul ST,ST(0);
fxch ST(1);
fmul ST,ST(0);
fxch ST(3);
fmul ST,ST(0);
fxch ST(2);
fmul ST,ST(0);
fxch ST(1);
fstp a[4*8];
fxch ST(2);
fstp a[4*9];
fstp a[4*10];
fstp a[4*11];
;// Second half of first loop
fld DP [esi+4*8*4];
fld DP [esi+4*8*5];
fld DP [esi+4*8*6];
fld DP [esi+4*8*7];
fxch ST(3);
fadd ps2;
fxch ST(2);
fadd ps2;
fxch ST(1);
fadd ps2;
fxch ST(3);
fadd ps2;
fxch ST(2);
fmul ST,ST(0);
fxch ST(1);
fmul ST,ST(0);
fxch ST(3);
fmul ST,ST(0);
fxch ST(2);
fmul ST,ST(0);
fxch ST(1);
fstp a[4*12];
fxch ST(2);
fstp a[4*13];
fstp a[4*14];
fstp a[4*15];
;// First half of second loop
mov eax,rri3i3;
mov ebx,ptr_ri0i3;
mov ecx,ptr_ri1i3;
mov edx,ptr_ri2i3;
fld alp2;
fld alp2;
fld alp2;
fld alp2;
fxch ST(3);
fadd DP [eax+4*0];
fxch ST(2);
fadd DP [eax+4*1];
fxch ST(1);
fadd DP [eax+4*2];
fxch ST(3);
fadd DP [eax+4*3];
fxch ST(2);
fadd DP [ebx+4*0];
fxch ST(1);
fadd DP [ebx+4*1];
fxch ST(3);
fadd DP [ebx+4*2];
fxch ST(2);
fadd DP [ebx+4*3];
fxch ST(1);
fadd DP [ecx+4*0];
fxch ST(3);
fadd DP [ecx+4*1];
fxch ST(2);
fadd DP [ecx+4*2];
fxch ST(1);
fadd DP [ecx+4*3];
fxch ST(3);
fadd DP [edx+4*0];
fxch ST(2);
fadd DP [edx+4*1];
fxch ST(1);
fadd DP [edx+4*2];
fxch ST(3);
fadd DP [edx+4*3];
fxch ST(2);
fstp a[4*0];
fstp a[4*1];
fxch ST(1);
fstp a[4*2];
fstp a[4*3];
;// Second half of second loop
fld alp2;
fld alp2;
fld alp2;
fld alp2;
fxch ST(3);
fadd DP [eax+4*4];
fxch ST(2);
fadd DP [eax+4*5];
fxch ST(1);
fadd DP [eax+4*6];
fxch ST(3);
fadd DP [eax+4*7];
fxch ST(2);
fadd DP [ebx+4*4];
fxch ST(1);
fadd DP [ebx+4*5];
fxch ST(3);
fadd DP [ebx+4*6];
fxch ST(2);
fadd DP [ebx+4*7];
fxch ST(1);
fadd DP [ecx+4*4];
fxch ST(3);
fadd DP [ecx+4*5];
fxch ST(2);
fadd DP [ecx+4*6];
fxch ST(1);
fadd DP [ecx+4*7];
fxch ST(3);
fadd DP [edx+4*4];
fxch ST(2);
fadd DP [edx+4*5];
fxch ST(1);
fadd DP [edx+4*6];
fxch ST(3);
fadd DP [edx+4*7];
fxch ST(2);
fstp a[4*4];
fstp a[4*5];
fxch ST(1);
fstp a[4*6];
fstp a[4*7];
pop ebx;
pop esi;
}
#else
for (k=0; k<8; k++)
{
ps3 = ps2 + *pntr;
pntr += STEP;
a[k+8] = ps3 * ps3;
}
for (k=0; k<8; k++)
a[k] = alp2 + rri3i3[k] + ptr_ri0i3[k] + ptr_ri1i3[k] + ptr_ri2i3[k];
#endif
bestk = -1;
for (k=0; k<8; k++)
{
t1 = a[k+8] * alpha;
t2 = psc * a[k];
if (asint(t1) > asint(t2))
{
psc = a[k+8];
alpha = a[k];
bestk = k;
}
}
if (bestk >= 0)
{
ip0 = i0;
ip1 = i1;
ip2 = i2;
ip3 = 6 + (bestk << 3);
shif = shift;
//#define t32 4294967296.0f
// printf(" %3d %3d %3d %3d %d %f %f %f\n",ip0,ip1,ip2,ip3,shift,psc/thres/thres,alpha/thres,(float)psc/(float)alpha/thres);
}
time--;
if(time <= 0)
goto end_search;
}
ptr_ri2i3 += NB_POS;
}
skipthird:
inc = (i2 - lasti2) >> 3;
ptr_ri0i2 += inc;
ptr_ri1i2 += inc;
ptr_ri2i2 += inc;
// end of for i2 =
ptr_ri0i2 -= NB_POS;
ptr_ri1i3 += NB_POS;
}
skipsecond:
// end of for i1 =
ptr_ri0i2 += NB_POS;
ptr_ri0i3 += NB_POS;
}
// end of for i0 =
end_search:
extra = time;
ip[0] = ip0;
ip[1] = ip1;
ip[2] = ip2;
ip[3] = ip3;
*shifPtr = shif;
return;
}