// SAC MMx utilities #include #include "mmxutil.h" #include "opt.h" #define I2FTEST 0 #if I2FTEST #include "stdio.h" #endif //------------------------------------------------------ int IsMMX() // does the processor I'm running have MMX(tm) technology? { int retu; #ifdef _ALPHA_ return 0; #endif #ifdef _X86_ __asm { push ebx pushfd pop edx mov eax,edx xor edx,200000h push edx popfd pushfd pop edx // // DON'T do this. This clears EAX, but the code is relying // on edx being 0 in the bail out case!!! // // -mikeg // // xor eax,edx // // xor edx,eax //This is the right way je no_cpuid mov eax,1 _emit 0x0f //CPUID magic incantation _emit 0xa2 and edx,000800000h shr edx,23 no_cpuid: mov retu,edx pop ebx } return(retu); #endif } //------------------------------------------------------ /* The following 4 routines make an 8-byte-aligned 'output' array from an 'input' array with various alignments. MakeAlignedN assumes that 'input' starts on an address equal to N mod 8. For now we only handle even N. */ //------------------------------------------------------ void MakeAligned0(void *input, void *output, int numbytes) { memcpy(output,input,numbytes); } //------------------------------------------------------ void MakeAligned2(void *input, void *output, int numbytes) { memcpy(output,input,numbytes); } //------------------------------------------------------ void MakeAligned4(void *input, void *output, int numbytes) { memcpy(output,input,numbytes); } //------------------------------------------------------ void MakeAligned6(void *input, void *output, int numbytes) { memcpy(output,input,numbytes); } //------------------------------------------------------ int FloatToShortScaled(float *input, short *output, int len, int guard) { int max; /* Convert an array of floats to an array of shorts with dynamic scaling. If guard=0 the array is scaled so that the largest power of 2 contained in the input comes out as 16384, which means all values fit in 16 bits without overflow. If guard>0 the outputs are shifted an extra 'guard' bits to the right. */ max = FloatMaxExp(input, len); ScaleFloatToShort(input, output, len, max + guard); return max; } int FloatToIntScaled(float *input, int *output, int len, int guard) { int max; /* Convert an array of floats to an array of shorts with dynamic scaling. If guard=0 the array is scaled so that the largest power of 2 contained in the input comes out as 2^30, which means all values fit in 32 bits without overflow. If guard>0 the outputs are shifted an extra 'guard' bits to the right. */ max = FloatMaxExp(input, len); ScaleFloatToInt(input, output, len, max + guard); return max; } int FloatMaxExp(float *input, int len) { int max; #if ASM_FTOSS ASM { mov esi,input; xor eax,eax; mov ebx,len; xor edi,edi; // max loop2: mov ecx,DP[esi+4*eax]; mov edx,DP[esi+4*eax+4]; and ecx,07f800000h; and edx,07f800000h; cmp edi,ecx; jge skip1; mov edi,ecx; skip1: cmp edi,edx; jge skip2; mov edi,edx; skip2: add eax,2; cmp eax,ebx; jl loop2; mov max,edi; } #else int exp,i; max = 0; for (i=0; i max) max = exp; } #endif return max >> 23; } void ScaleFloatToShort(float *input, short *output, int len, int newmax) { int i; float scale; /* If max exponent is 14, we want a scale factor of 1, since then values will be at most +/- 32727. So scale factor multiplier should be 2^(14 - max - guard). But 'max' has the exponent bias built in, so we must add BIAS once to the exponent to get a "real" exponent. But then we want a FP exponent that has bias, so we need to add BIAS again! So we get 2^(2*BIAS+14 - max - guard). 2*BIAS+14 is 254 + 14 = 252+12, so it's 0x86000000 (first 9 bits 1 0000 1100) */ i = 0x86000000 - (newmax << 23); scale = (*(float *)&i); #if ASM_FTOSS ASM { mov esi,input; mov edi,output; xor eax,eax; mov ebx,len; loop1: fld DP[esi+4*eax]; fmul scale; fld DP[esi+4*eax+4]; fmul scale; fxch(1); fistp WP[edi+2*eax]; fistp WP[edi+2*eax+2]; add eax,2; cmp eax,ebx; jl loop1; } #else for (i=0; i0; i-=2) { x = in[i]; y = in[i+1]; out[2*(i+1)] = y; out[2*(i+1)+1] = (y<<16 | x>>16); x = in[i-1]; y = in[i]; out[2*i] = y; out[2*i+1] = (y<<16 | x>>16); } //odd ends for (i++; i>=0; i--) { x = (i>0)?in[i-1]:0; y = in[i]; out[2*i] = y; out[2*i+1] = (y<<16 | x>>16); } return; } void ShortToFloatScale(short *x, float scale, int N, float *y) { /* short i; float yy[100]; for (i=0; i