|
|
// Mix.cpp
// Copyright (c) Microsoft Corporation 1996, 1998
// Mix engines for MSSynth
#ifdef DMSYNTH_MINIPORT
#include "common.h"
#define STR_MODULENAME "DMusicMix:"
#else
#include "simple.h"
#include <mmsystem.h>
#include "synth.h"
#endif
///////////////////////////////////////////////////////
// Modifications
// member m_nChannels => parameter dwBufferCount
//
// Changed number of arguments into Filtered mixers
//
// Remove range checking after filter
#pragma warning(disable : 4101 4102 4146)
#ifdef _ALPHA_
extern "C" { int __ADAWI(short, short *); }; #pragma intrinsic(__ADAWI)
#define ALPHA_OVERFLOW 2
#define ALPHA_NEGATIVE 8
#else // !_ALPHA_
// TODO -- overflow detection for ia64 (+ axp64?)
#endif // !_ALPHA_
#ifdef DMSYNTH_MINIPORT
#pragma code_seg("PAGE")
#endif // DMSYNTH_MINIPORT
#define USE_MMX
#define USE_MMX_FILTERED
#ifdef i386 // {
DWORD CDigitalAudio::MixMulti8( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI, dwJ; DWORD dwPosition; long lMInterp; long lM; long lA;//, lB;
DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; char * pcWave = (char *) m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; } #if 1 // {
DWORD l_nChannels = dwBufferCount; #if 1 // {
DWORD a; DWORD One_Channel_1, One_Channel_2; // Code address locations.
#ifdef USE_MMX // {
typedef __int64 QWORD; QWORD OneMask = 0x0000000010001000; QWORD fffMask = 0x00000fff00000fff; QWORD ffffMask = 0x0000ffff0000ffff; DWORD UseMmx; DWORD MmxVolume[2]; int Use_MMX = m_sfMMXEnabled;
_asm { lea edi, $L43865
// Turned off
cmp Use_MMX, 0 je AssignMmxLabel
// != 2 channels
mov esi, DWORD PTR l_nChannels cmp esi, 2 jne AssignMmxLabel
// Ok, init and use MMX
lea edi, UseMmxLabel
pxor mm0, mm0 movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
AssignMmxLabel: mov DWORD PTR UseMmx, edi
} #endif // }
_asm { mov edi, DWORD PTR l_nChannels
cmp edi, 8 jna Start1
lea esi, $L44008 jmp Do_One_Channel_2
// Put this code more than 127 bytes away from the references.
overflow_x: js overflow_y mov WORD PTR [esi+ebx*2], 0x8000 jmp edi
overflow_y: mov WORD PTR [esi+ebx*2], 0x7fff jmp edi
Start1: test edi, edi jne Start2
lea esi, $L43860 jmp Do_One_Channel_2
Start2: lea eax, $L43851 lea edx, $L43853
sub edx, eax mov esi, 8
sub esi, edi imul esi, edx add esi, eax
Do_One_Channel_2: mov DWORD PTR One_Channel_1, esi
// Create second jump table location.
lea esi, $L43876 lea ecx, $L43880
sub ecx, esi
push ecx // Span between branches.
mov eax, 8 sub eax, DWORD PTR l_nChannels
jge Start3 lea ecx, $L44009 jmp Done_Do_Channel_2
Start3: cmp eax, 8 jne Start4
lea ecx, $L43866 jmp Done_Do_Channel_2
Start4: imul ecx, eax add ecx, esi
Done_Do_Channel_2: mov DWORD PTR One_Channel_2, ecx
mov ecx, DWORD PTR dwLength xor ebx, ebx // dwI
test ecx, ecx jbe Exit_$L43841
mov ecx, DWORD PTR ppBuffer sub ecx, 4
// ecx == ppBuffer
// ebx == dwI
// edi == l_nChannels
$L44021:
mov edx, DWORD PTR pfSamplePos cmp edx, DWORD PTR pfSampleLength jl SHORT $L43842
mov eax, DWORD PTR pfLoopLength test eax, eax je Exit_$L43841
sub edx, eax mov DWORD PTR pfSamplePos, edx
$L43842: mov edx, DWORD PTR dwIncDelta mov eax, DWORD PTR pfPFract
dec edx
mov DWORD PTR dwIncDelta, edx jne $L43860
mov edx, DWORD PTR dwDeltaPeriod mov esi, DWORD PTR pfDeltaPitch
mov DWORD PTR dwIncDelta, edx add eax, esi
mov DWORD PTR pfPFract, eax
sar eax, 8 mov DWORD PTR pfPitch, eax
mov esi, DWORD PTR vfDeltaVolume jmp One_Channel_1
// ONE_CHANNEL
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
$L44008:
mov DWORD PTR dwI, ebx lea ebx, DWORD PTR [edi*4-4] add edi, -8 ; fffffff8H $L43849:
lea eax, DWORD PTR vfVFract[ebx] mov ecx, DWORD PTR [esi+ebx] sub ebx, 4 add DWORD PTR [eax], ecx mov eax, DWORD PTR [eax] sar eax, 8 mov DWORD PTR vfVolume[ebx+4], eax dec edi jne SHORT $L43849
mov edi, DWORD PTR l_nChannels mov ecx, DWORD PTR ppBuffer
mov ebx, DWORD PTR dwI sub ecx, 4 } #define ONE_CHANNEL_VOLUME(dwJ) \
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \ _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \ _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { mov eax, DWORD PTR vfVFract[0] }; \ _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \ _asm { mov DWORD PTR vfVFract[0], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
$L43851: ONE_CHANNEL_VOLUME(8) $L43853: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
$L43860: _asm { ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
mov esi, DWORD PTR pfPitch mov eax, DWORD PTR pfSampleLength
dec esi sub eax, DWORD PTR pfSamplePos
add eax, esi cdq idiv DWORD PTR pfPitch
mov edx, DWORD PTR dwLength sub edx, ebx
cmp edx, eax jae SHORT $L43863 mov eax, edx
$L43863: mov edx, DWORD PTR dwIncDelta cmp edx, eax jae SHORT $L43864 mov eax, edx
$L43864:
; 309 : ; 310 : for (a += dwI; dwI < a; dwI++)
inc edx
sub edx, eax add eax, ebx
mov DWORD PTR dwIncDelta, edx cmp ebx, eax
mov DWORD PTR a, eax jae $L43867
#ifdef USE_MMX // {
// Try to handle two positions at once.
lea edx, [eax-3] cmp ebx, edx jge $L43865
jmp UseMmx
UseMmxLabel: // Ok, there are at least two samples to handle.
movd mm1, DWORD PTR pfPitch psllq mm1, 32 // Pitch, 0
movd mm2, DWORD PTR pfSamplePos punpckldq mm2, mm2 // SamplePos, SamplePos
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
punpckhdq mm1, mm1 // Pitch, Pitch
pslld mm1, 1 // Pitch * 2, Pitch * 2
mov eax, DWORD PTR pcWave #if 0
movq mm4, QWORD PTR vfVolume pand mm4, QWORD PTR ffffMask movq mm5, mm4 pslld mm4, 16 por mm4, mm5 psllw mm4, 3 movq QWORD PTR MmxVolume, mm4 #endif
TwoAtATime:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch;
movq mm4, mm2 psrad mm4, 12 // dwPosition + Pitch, dwPosition
; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
movd esi, mm4 // dwPosition
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
// movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
// Instead for byte codes
mov si, WORD PTR [eax+esi] movd mm6, esi punpcklbw mm5, mm6 psraw mm5, 8 movd esi, mm4 // movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
// Instead for byte codes
mov si, WORD PTR [eax+esi] movd mm6, esi punpcklbw mm4, mm6 psraw mm4, 8 // This code could be combined with code above, a bit.
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
movq mm4, mm2 pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
packssdw mm4, mm0 movq mm6, mm3 psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
punpcklwd mm6, mm4 paddd mm2, mm1 // Next iteration
pmaddwd mm6, mm5 #if 1
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
psrad mm6, 12 // lMIntrep2, lMInterp
// pand mm6, QWORD PTR ffffMask
// pand mm5, QWORD PTR ffffMask // 16 bits only.
movq mm4, mm5 mov esi, DWORD PTR [ecx+4]
punpckldq mm4, mm4 pmaddwd mm4, mm6 psrad mm4, 5 packssdw mm4, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm4 movd DWORD PTR [esi+ebx*2], mm7
// CHANNEL 2
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
mov esi, DWORD PTR [ecx+8]
pmaddwd mm5, mm6 psrad mm5, 5 packssdw mm5, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm5 movd DWORD PTR [esi+ebx*2], mm7
#else // There is noise here, probably due to the signed nature of the multiply.
psrad mm6, 12 // lMIntrep2, lMInterp
movq mm5, QWORD PTR MmxVolume packssdw mm6, mm0 punpckldq mm6, mm6 pmulhw mm6, mm5 mov esi, DWORD PTR [ecx+4] movd mm7, DWORD PTR [esi+ebx*2] mov esi, DWORD PTR [ecx+8] movd mm4, DWORD PTR [esi+ebx*2] punpckldq mm4, mm7 paddsw mm4, mm6 movd DWORD PTR [esi+ebx*2], mm4 punpckhdq mm4, mm4 mov esi, DWORD PTR [ecx+4] movd DWORD PTR [esi+ebx*2], mm4
#endif
add ebx, 2
cmp ebx, edx jb TwoAtATime
movd DWORD PTR pfSamplePos, mm2 #endif // }
$L43865:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch; ; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
mov esi, DWORD PTR pfPitch mov edx, DWORD PTR pfSamplePos
mov eax, DWORD PTR pcWave mov edi, edx
add esi, edx and edi, 4095
sar edx, 12 mov DWORD PTR pfSamplePos, esi
movsx esi, BYTE PTR [eax+edx] movsx eax, BYTE PTR [eax+edx+1]
sub eax, esi
imul eax, edi
sar eax, 12 mov edi, One_Channel_2
// ebx, ecx, edx are used in switch branches
add eax, esi // lMInterp
jmp edi
// ONE_CHANNEL
// lM = lMInterp * vfVolume[dwJ - 1];
// lM >>= 5;
// ppBuffer[dwJ - 1][dwI] += (short) lM;
$L44009:
; 342 : default: ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
mov edi, DWORD PTR l_nChannels
// ecx ppBuffer
// eax lMInterp
// edi counter
// ebx dwI
$L43874: mov edx, DWORD PTR vfVolume[edi*4-4] mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
imul edx, eax sar edx, 5 add WORD PTR [esi+ebx*2], dx
jno no_overflow mov WORD PTR [esi+ebx*2], 0x7fff js no_overflow mov WORD PTR [esi+ebx*2], 0x8000
no_overflow: dec edi cmp edi, 8 jne SHORT $L43874
lea edi, $L43876 }
#define ONE_CHANNEL_VOLUME(dwJ) \
_asm { lea edx, vfVolume } \ _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \ _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 5 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { lea edx, vfVolume } \ _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \ _asm { mov esi, DWORD PTR [ecx + 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 5 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
$L43876: ONE_CHANNEL_VOLUME(8); $L43880: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
$L43866: _asm { mov eax, DWORD PTR a inc ebx
cmp ebx, eax jb $L43865
mov edi, DWORD PTR l_nChannels $L43867: cmp ebx, DWORD PTR dwLength jb $L44021 Exit_$L43841: pop eax mov DWORD PTR dwI, ebx
#ifdef USE_MMX
mov edi, UseMmx cmp edi, UseMmxLabel jne NoMmxCleanupLabel
emms NoMmxCleanupLabel: #endif
} #else // }{
for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8;
#if 1
#define ONE_CHANNEL_VOLUME(dwJ) \
vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1]; \ vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
switch (l_nChannels) { default: for (dwJ = l_nChannels; dwJ > 8; dwJ--) { ONE_CHANNEL_VOLUME(dwJ); } case 8: ONE_CHANNEL_VOLUME(8); case 7: ONE_CHANNEL_VOLUME(7); case 6: ONE_CHANNEL_VOLUME(6); case 5: ONE_CHANNEL_VOLUME(5); case 4: ONE_CHANNEL_VOLUME(4); case 3: ONE_CHANNEL_VOLUME(3); case 2: ONE_CHANNEL_VOLUME(2); case 1: ONE_CHANNEL_VOLUME(1); case 0:; } #undef ONE_CHANNEL_VOLUME
#else
for (dwJ = 0; dwJ < l_nChannels; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; } #endif
}
#if 1 // {
DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch; DWORD b = dwLength - dwI;
if (b < a) a = b; if (dwIncDelta < a) a = dwIncDelta;
dwIncDelta -= a - 1; a += dwI;
for (; dwI < a; dwI++) { dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA; #if 1 // {
#if 1
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 5; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ long b = ppBuffer[dwJ - 1][dwI]; \ if ((short)b != b) { \ if ((long)b < 0) b = 0x8000; \ else b = 0x7fff; \ ppBuffer[dwJ - 1][dwI] = (short) b; \ } \ } #else
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 5; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ } #endif
switch (l_nChannels) { default: for (dwJ = l_nChannels; dwJ > 8; dwJ--) { ONE_CHANNEL_VOLUME(dwJ); } case 8: ONE_CHANNEL_VOLUME(8); case 7: ONE_CHANNEL_VOLUME(7); case 6: ONE_CHANNEL_VOLUME(6); case 5: ONE_CHANNEL_VOLUME(5); case 4: ONE_CHANNEL_VOLUME(4); case 3: ONE_CHANNEL_VOLUME(3); case 2: ONE_CHANNEL_VOLUME(2); case 1: ONE_CHANNEL_VOLUME(1); case 0:; } #undef ONE_CHANNEL_VOLUME
#else // }{
for (dwJ = 0; dwJ < l_nChannels; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 5; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} #endif // }
} #else // }{
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA; #if 1
#if 1
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 5; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ long b = ppBuffer[dwJ - 1][dwI]; \ if ((short)b != b) { \ if ((long)b < 0) b = 0x8000; \ else b = 0x7fff; \ ppBuffer[dwJ - 1][dwI] = (short) b; \ } \ } #else
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 5; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ } #endif
switch (l_nChannels) { default: for (dwJ = l_nChannels; dwJ > 8; dwJ--) { ONE_CHANNEL_VOLUME(dwJ); } case 8: ONE_CHANNEL_VOLUME(8); case 7: ONE_CHANNEL_VOLUME(7); case 6: ONE_CHANNEL_VOLUME(6); case 5: ONE_CHANNEL_VOLUME(5); case 4: ONE_CHANNEL_VOLUME(4); case 3: ONE_CHANNEL_VOLUME(3); case 2: ONE_CHANNEL_VOLUME(2); case 1: ONE_CHANNEL_VOLUME(1); case 0:; } #undef ONE_CHANNEL_VOLUME
#else
for (dwJ = 0; dwJ < l_nChannels; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 5; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} #endif
dwI++; #endif // }
} #endif // }
#else // }{
for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; } }
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lMInterp = pcWave[dwPosition]; // pcWave
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 5;
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} dwI++; } #endif // }
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
return (dwI); } DWORD CDigitalAudio::MixMulti8Filter( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength, COEFF cfdK, COEFF cfdB1, COEFF cfdB2) { DWORD dwI, dwJ; DWORD dwPosition; long lMInterp; long lM; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; char * pcWave = (char *) m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; COEFF cfK = m_cfLastK; COEFF cfB1 = m_cfLastB1; COEFF cfB2 = m_cfLastB2;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
DWORD dMM6[2];
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
#if 1 // {
DWORD l_nChannels = dwBufferCount; DWORD a; DWORD One_Channel_1, One_Channel_2; // Code address locations.
long l_lPrevPrevSample = m_lPrevPrevSample, l_lPrevSample = m_lPrevSample;
#ifdef USE_MMX_FILTERED // {
typedef __int64 QWORD; QWORD OneMask = 0x0000000010001000; QWORD fffMask = 0x00000fff00000fff; QWORD ffffMask = 0x0000ffff0000ffff; DWORD UseMmx; DWORD MmxVolume[2]; int Use_MMX = m_sfMMXEnabled;
_asm { lea edi, $L43865
// Turned off
cmp Use_MMX, 0 je AssignMmxLabel
// != 2 channels
mov esi, DWORD PTR l_nChannels cmp esi, 2 jne AssignMmxLabel
// Ok, init and use MMX
lea edi, UseMmxLabel
pxor mm0, mm0 movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
AssignMmxLabel: mov DWORD PTR UseMmx, edi
} #endif // }
_asm { mov edi, DWORD PTR l_nChannels
cmp edi, 8 jna Start1
lea esi, $L44008 jmp Do_One_Channel_2
// Put this code more than 127 bytes away from the references.
overflow_x: js overflow_y mov WORD PTR [esi+ebx*2], 0x8000 jmp edi
overflow_y: mov WORD PTR [esi+ebx*2], 0x7fff jmp edi
Start1: test edi, edi jne Start2
lea esi, $L43860 jmp Do_One_Channel_2
Start2: lea eax, $L43851 lea edx, $L43853
sub edx, eax mov esi, 8
sub esi, edi imul esi, edx add esi, eax
Do_One_Channel_2: mov DWORD PTR One_Channel_1, esi
// Create second jump table location.
lea esi, $L43876 lea ecx, $L43880
sub ecx, esi
push ecx // Span between branches.
mov eax, 8 sub eax, DWORD PTR l_nChannels
jge Start3 lea ecx, $L44009 jmp Done_Do_Channel_2
Start3: cmp eax, 8 jne Start4
lea ecx, $L43866 jmp Done_Do_Channel_2
Start4: imul ecx, eax add ecx, esi
Done_Do_Channel_2: mov DWORD PTR One_Channel_2, ecx
mov ecx, DWORD PTR dwLength xor ebx, ebx // dwI
test ecx, ecx jbe Exit_$L43841
mov ecx, DWORD PTR ppBuffer sub ecx, 4
// ecx == ppBuffer
// ebx == dwI
// edi == l_nChannels
$L44021:
mov edx, DWORD PTR pfSamplePos cmp edx, DWORD PTR pfSampleLength jl SHORT $L43842
mov eax, DWORD PTR pfLoopLength test eax, eax je Exit_$L43841
sub edx, eax mov DWORD PTR pfSamplePos, edx
$L43842: mov edx, DWORD PTR dwIncDelta mov eax, DWORD PTR pfPFract
dec edx
mov DWORD PTR dwIncDelta, edx jne $L43860
mov edx, DWORD PTR dwDeltaPeriod mov esi, DWORD PTR pfDeltaPitch
mov DWORD PTR dwIncDelta, edx add eax, esi
mov DWORD PTR pfPFract, eax
sar eax, 8 mov DWORD PTR pfPitch, eax
mov esi, DWORD PTR vfDeltaVolume jmp One_Channel_1
// ONE_CHANNEL
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
$L44008:
mov DWORD PTR dwI, ebx lea ebx, DWORD PTR [edi*4-4] add edi, -8 ; fffffff8H $L43849:
lea eax, DWORD PTR vfVFract[ebx] mov ecx, DWORD PTR [esi+ebx] sub ebx, 4 add DWORD PTR [eax], ecx mov eax, DWORD PTR [eax] sar eax, 8 mov DWORD PTR vfVolume[ebx+4], eax dec edi jne SHORT $L43849
mov edi, DWORD PTR l_nChannels mov ecx, DWORD PTR ppBuffer
mov ebx, DWORD PTR dwI sub ecx, 4 } #define ONE_CHANNEL_VOLUME(dwJ) \
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \ _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \ _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { mov eax, DWORD PTR vfVFract[0] }; \ _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \ _asm { mov DWORD PTR vfVFract[0], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
$L43851: ONE_CHANNEL_VOLUME(8) $L43853: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
_asm { // cfK += cfdK;
// cfB1 += cfdB1;
// cfB2 += cfdB2;
mov eax, DWORD PTR cfdK mov edx, DWORD PTR cfdB1 mov esi, DWORD PTR cfdB2 add DWORD PTR cfK, eax
add DWORD PTR cfB1, edx add DWORD PTR cfB2, esi
$L43860: ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
mov esi, DWORD PTR pfPitch mov eax, DWORD PTR pfSampleLength
dec esi sub eax, DWORD PTR pfSamplePos
add eax, esi cdq idiv DWORD PTR pfPitch
mov edx, DWORD PTR dwLength sub edx, ebx
cmp edx, eax jae SHORT $L43863 mov eax, edx
$L43863: mov edx, DWORD PTR dwIncDelta cmp edx, eax jae SHORT $L43864 mov eax, edx
$L43864:
; 309 : ; 310 : for (a += dwI; dwI < a; dwI++)
inc edx
sub edx, eax add eax, ebx
mov DWORD PTR dwIncDelta, edx cmp ebx, eax
mov DWORD PTR a, eax jae $L43867
#ifdef USE_MMX_FILTERED // {
// Try to handle two positions at once.
lea edx, [eax-3] cmp ebx, edx jge $L43865
jmp UseMmx
UseMmxLabel: // Ok, there are at least two samples to handle.
movd mm1, DWORD PTR pfPitch psllq mm1, 32 // Pitch, 0
movd mm2, DWORD PTR pfSamplePos punpckldq mm2, mm2 // SamplePos, SamplePos
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
punpckhdq mm1, mm1 // Pitch, Pitch
pslld mm1, 1 // Pitch * 2, Pitch * 2
mov eax, DWORD PTR pcWave #if 0
movq mm4, QWORD PTR vfVolume pand mm4, QWORD PTR ffffMask movq mm5, mm4 pslld mm4, 16 por mm4, mm5 psllw mm4, 3 movq QWORD PTR MmxVolume, mm4 #endif
TwoAtATime:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch;
movq mm4, mm2 psrad mm4, 12 // dwPosition + Pitch, dwPosition
; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
movd esi, mm4 // dwPosition
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
// movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
// Instead for byte codes
mov si, WORD PTR [eax+esi] movd mm6, esi punpcklbw mm5, mm6 psraw mm5, 8 movd esi, mm4 // movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
// Instead for byte codes
mov si, WORD PTR [eax+esi] movd mm6, esi punpcklbw mm4, mm6 psraw mm4, 8 // This code could be combined with code above, a bit.
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
movq mm4, mm2 pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
packssdw mm4, mm0 movq mm6, mm3 psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
punpcklwd mm6, mm4 paddd mm2, mm1 // Next iteration
pmaddwd mm6, mm5 #if 1
psrad mm6, 12 // lMIntrep2, lMInterp
#if 1
// eax, ebx, ecx, edx, esi are used. edi is free...
push eax push ecx push edx
movq QWORD PTR dMM6, mm6
mov eax, DWORD PTR dMM6 imul DWORD PTR cfK // edx:eax
mov ecx, eax mov eax, DWORD PTR l_lPrevPrevSample
mov edi, edx // esi:ecx
imul DWORD PTR cfB2
sub ecx, eax mov eax, DWORD PTR l_lPrevSample
sbb edi, edx mov DWORD PTR l_lPrevPrevSample, eax
imul DWORD PTR cfB1
add eax, ecx adc edx, edi
//>>>>> MOD:PETCHEY
// shld eax, edx, 2
//>>>>> should be
shld edx, eax, 2 mov eax, edx
mov DWORD PTR dMM6, eax mov DWORD PTR l_lPrevSample, eax
// 2nd sample
mov eax, DWORD PTR dMM6+4 imul DWORD PTR cfK // edx:eax
mov ecx, eax mov eax, DWORD PTR l_lPrevPrevSample
mov edi, edx // esi:ecx
imul DWORD PTR cfB2
sub ecx, eax mov eax, DWORD PTR l_lPrevSample
sbb edi, edx mov DWORD PTR l_lPrevPrevSample, eax
imul DWORD PTR cfB1
add eax, ecx adc edx, edi
//>>>>> MOD:PETCHEY
// shld eax, edx, 2
//>>>>> should be
shld edx, eax, 2 mov eax, edx
mov DWORD PTR dMM6+4, eax mov DWORD PTR l_lPrevSample, eax
movq mm6, QWORD PTR dMM6
pop edx pop ecx pop eax #endif
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
// pand mm6, QWORD PTR ffffMask
// packssdw mm6, mm0 // Saturate to 16 bits, instead.
// punpcklwd mm6, mm0
// pand mm5, QWORD PTR ffffMask // 16 bits only.
movq mm4, mm5 mov esi, DWORD PTR [ecx+4]
punpckldq mm4, mm4 pmaddwd mm4, mm6 psrad mm4, 5 packssdw mm4, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm4 movd DWORD PTR [esi+ebx*2], mm7
// CHANNEL 2
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
mov esi, DWORD PTR [ecx+8]
pmaddwd mm5, mm6 psrad mm5, 5 packssdw mm5, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm5 movd DWORD PTR [esi+ebx*2], mm7
#else // There is noise here, probably due to the signed nature of the multiply.
psrad mm6, 12 // lMIntrep2, lMInterp
movq mm5, QWORD PTR MmxVolume packssdw mm6, mm0 punpckldq mm6, mm6 pmulhw mm6, mm5 mov esi, DWORD PTR [ecx+4] movd mm7, DWORD PTR [esi+ebx*2] mov esi, DWORD PTR [ecx+8] movd mm4, DWORD PTR [esi+ebx*2] punpckldq mm4, mm7 paddsw mm4, mm6 movd DWORD PTR [esi+ebx*2], mm4 punpckhdq mm4, mm4 mov esi, DWORD PTR [ecx+4] movd DWORD PTR [esi+ebx*2], mm4
#endif
add ebx, 2
cmp ebx, edx jb TwoAtATime
movd DWORD PTR pfSamplePos, mm2 #endif // }
$L43865:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch; ; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
mov esi, DWORD PTR pfPitch mov edx, DWORD PTR pfSamplePos
mov eax, DWORD PTR pcWave mov edi, edx
add esi, edx and edi, 4095
sar edx, 12 mov DWORD PTR pfSamplePos, esi
movsx esi, BYTE PTR [eax+edx] movsx eax, BYTE PTR [eax+edx+1]
sub eax, esi
imul eax, edi
sar eax, 12 mov edi, One_Channel_2
// ebx, ecx, edx are used in switch branches
add eax, esi // lMInterp
// lMInterp =
// MulDiv(lMInterp, cfK, (1 << 30))
// - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30))
// + MulDiv(m_lPrevSample, cfB1, (1 << 30))
push ecx imul DWORD PTR cfK // edx:eax
mov ecx, eax mov eax, DWORD PTR l_lPrevPrevSample
mov esi, edx // esi:ecx
imul DWORD PTR cfB2
sub ecx, eax mov eax, DWORD PTR l_lPrevSample
sbb esi, edx mov DWORD PTR l_lPrevPrevSample, eax
imul DWORD PTR cfB1
add eax, ecx // esi:eax
adc esi, edx
pop ecx // shrd eax, esi, 30
//>>>>> MOD:PETCHEY
// shld eax, esi, 2
//>>>>> should be
shld esi, eax, 2 mov eax, esi
//>>>>>>>>>>>> removed dp
#if 0
// if (lMInterp < -32767) lMInterp = -32767;
// else if (lMInterp > 32767) lMInterp = 32767;
cmp eax, -32767 jl Less_than cmp eax, 32767 jg Greater_than #endif
// m_lPrevPrevSample = m_lPrevSample;
// m_lPrevSample = lMInterp;
mov DWORD PTR l_lPrevSample, eax jmp edi
Less_than: mov eax, -32767 mov DWORD PTR l_lPrevSample, eax jmp edi
Greater_than: mov eax, 32767 mov DWORD PTR l_lPrevSample, eax jmp edi
// ONE_CHANNEL
// lM = lMInterp * vfVolume[dwJ - 1];
// lM >>= 5;
// ppBuffer[dwJ - 1][dwI] += (short) lM;
$L44009:
; 342 : default: ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
mov edi, DWORD PTR l_nChannels
// ecx ppBuffer
// eax lMInterp
// edi counter
// ebx dwI
$L43874: mov edx, DWORD PTR vfVolume[edi*4-4] mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
imul edx, eax sar edx, 5 add WORD PTR [esi+ebx*2], dx
jno no_overflow mov WORD PTR [esi+ebx*2], 0x7fff js no_overflow mov WORD PTR [esi+ebx*2], 0x8000
no_overflow: dec edi cmp edi, 8 jne SHORT $L43874
lea edi, $L43876 }
#define ONE_CHANNEL_VOLUME(dwJ) \
_asm { lea edx, vfVolume } \ _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \ _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 5 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { lea edx, vfVolume } \ _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \ _asm { mov esi, DWORD PTR [ecx + 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 5 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
$L43876: ONE_CHANNEL_VOLUME(8); $L43880: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
$L43866: _asm { mov eax, DWORD PTR a inc ebx
cmp ebx, eax jb $L43865
mov edi, DWORD PTR l_nChannels $L43867: cmp ebx, DWORD PTR dwLength jb $L44021 Exit_$L43841: pop eax mov DWORD PTR dwI, ebx
#ifdef USE_MMX_FILTERED
mov edi, UseMmx cmp edi, UseMmxLabel jne NoMmxCleanupLabel
emms NoMmxCleanupLabel: #endif
} m_lPrevPrevSample = l_lPrevPrevSample; m_lPrevSample = l_lPrevSample; #else // }{
for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; }
cfK += cfdK; cfB1 += cfdB1; cfB2 += cfdB2; } dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lMInterp = pcWave[dwPosition]; // pcWave
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
// Filter
//
lMInterp = MulDiv(lMInterp, cfK, (1 << 30)) - MulDiv(m_lPrevSample, cfB1, (1 << 30)) + MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
m_lPrevPrevSample = m_lPrevSample; m_lPrevSample = lMInterp;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 5;
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} dwI++; } #endif // }
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
return (dwI); }
#if 0
DWORD CDigitalAudio::MixMulti16( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI, dwJ; DWORD dwPosition; long lA;//, lB;
long lM; long lMInterp; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; } }
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 13; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; #endif
no_oflow: ; } dwI++; } m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; } return (dwI); } #else
DWORD CDigitalAudio::MixMulti16( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI, dwJ; DWORD dwPosition; long lA;//, lB;
long lM; long lMInterp; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
#if 1 // {
DWORD l_nChannels = dwBufferCount; DWORD a; DWORD One_Channel_1, One_Channel_2; // Code address locations.
#ifdef USE_MMX // {
typedef __int64 QWORD; QWORD OneMask = 0x0000000010001000; QWORD fffMask = 0x00000fff00000fff; QWORD ffffMask = 0x0000ffff0000ffff; DWORD UseMmx; DWORD MmxVolume[2]; int Use_MMX = m_sfMMXEnabled;
_asm { lea edi, $L43865
// Turned off
cmp Use_MMX, 0 je AssignMMXLabel
// != 2 channels
mov esi, DWORD PTR l_nChannels cmp esi, 2 jne AssignMmxLabel
// Ok, init and use MMX
lea edi, UseMmxLabel
pxor mm0, mm0 movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
AssignMmxLabel: mov DWORD PTR UseMmx, edi
} #endif // }
_asm { mov edi, DWORD PTR l_nChannels
cmp edi, 8 jna Start1
lea esi, $L44008 jmp Do_One_Channel_2
// Put this code more than 127 bytes away from the references.
overflow_x: js overflow_y mov WORD PTR [esi+ebx*2], 0x8000 jmp edi
overflow_y: mov WORD PTR [esi+ebx*2], 0x7fff jmp edi
Start1: test edi, edi jne Start2
lea esi, $L43860 jmp Do_One_Channel_2
Start2: lea eax, $L43851 lea edx, $L43853
sub edx, eax mov esi, 8
sub esi, edi imul esi, edx add esi, eax
Do_One_Channel_2: mov DWORD PTR One_Channel_1, esi
// Create second jump table location.
lea esi, $L43876 lea ecx, $L43880
sub ecx, esi
push ecx // Span between branches.
mov eax, 8 sub eax, DWORD PTR l_nChannels
jge Start3 lea ecx, $L44009 jmp Done_Do_Channel_2
Start3: cmp eax, 8 jne Start4
lea ecx, $L43866 jmp Done_Do_Channel_2
Start4: imul ecx, eax add ecx, esi
Done_Do_Channel_2: mov DWORD PTR One_Channel_2, ecx
mov ecx, DWORD PTR dwLength xor ebx, ebx // dwI
test ecx, ecx jbe Exit_$L43841
mov ecx, DWORD PTR ppBuffer sub ecx, 4
// ecx == ppBuffer
// ebx == dwI
// edi == l_nChannels
$L44021:
mov edx, DWORD PTR pfSamplePos cmp edx, DWORD PTR pfSampleLength jl SHORT $L43842
mov eax, DWORD PTR pfLoopLength test eax, eax je Exit_$L43841
sub edx, eax mov DWORD PTR pfSamplePos, edx
$L43842: mov edx, DWORD PTR dwIncDelta mov eax, DWORD PTR pfPFract
dec edx
mov DWORD PTR dwIncDelta, edx jne $L43860
mov edx, DWORD PTR dwDeltaPeriod mov esi, DWORD PTR pfDeltaPitch
mov DWORD PTR dwIncDelta, edx add eax, esi
mov DWORD PTR pfPFract, eax
sar eax, 8 mov DWORD PTR pfPitch, eax
mov esi, DWORD PTR vfDeltaVolume jmp One_Channel_1
// ONE_CHANNEL
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
$L44008:
mov DWORD PTR dwI, ebx lea ebx, DWORD PTR [edi*4-4] add edi, -8 ; fffffff8H $L43849:
lea eax, DWORD PTR vfVFract[ebx] mov ecx, DWORD PTR [esi+ebx] sub ebx, 4 add DWORD PTR [eax], ecx mov eax, DWORD PTR [eax] sar eax, 8 mov DWORD PTR vfVolume[ebx+4], eax dec edi jne SHORT $L43849
mov edi, DWORD PTR l_nChannels mov ecx, DWORD PTR ppBuffer
mov ebx, DWORD PTR dwI sub ecx, 4 } #define ONE_CHANNEL_VOLUME(dwJ) \
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \ _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \ _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { mov eax, DWORD PTR vfVFract[0] }; \ _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \ _asm { mov DWORD PTR vfVFract[0], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm { mov DWORD PTR [edx], eax };
$L43851: ONE_CHANNEL_VOLUME(8) $L43853: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
$L43860: _asm { ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
mov esi, DWORD PTR pfPitch mov eax, DWORD PTR pfSampleLength
dec esi sub eax, DWORD PTR pfSamplePos
add eax, esi cdq idiv DWORD PTR pfPitch
mov edx, DWORD PTR dwLength sub edx, ebx
cmp edx, eax jae SHORT $L43863 mov eax, edx
$L43863: mov edx, DWORD PTR dwIncDelta cmp edx, eax jae SHORT $L43864 mov eax, edx
$L43864:
; 309 : ; 310 : for (a += dwI; dwI < a; dwI++)
inc edx
sub edx, eax add eax, ebx
mov DWORD PTR dwIncDelta, edx cmp ebx, eax
mov DWORD PTR a, eax jae $L43867
#ifdef USE_MMX // {
// Try to handle two positions at once.
lea edx, [eax-3] cmp ebx, edx jge $L43865
jmp UseMmx
UseMmxLabel: // Ok, there are at least two samples to handle.
movd mm1, DWORD PTR pfPitch psllq mm1, 32 // Pitch, 0
movd mm2, DWORD PTR pfSamplePos punpckldq mm2, mm2 // SamplePos, SamplePos
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
punpckhdq mm1, mm1 // Pitch, Pitch
pslld mm1, 1 // Pitch * 2, Pitch * 2
mov eax, DWORD PTR pcWave #if 0
movq mm4, QWORD PTR vfVolume pand mm4, QWORD PTR ffffMask movq mm5, mm4 pslld mm4, 16 por mm4, mm5 psllw mm4, 3 movq QWORD PTR MmxVolume, mm4 #endif
TwoAtATime:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch;
movq mm4, mm2 psrad mm4, 12 // dwPosition + Pitch, dwPosition
; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
movd esi, mm4 // dwPosition
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
// Instead for byte codes
// mov si, WORD PTR [eax+esi]
// movd mm6, esi
// punpcklbw mm5, mm6
// psarw mm5, 8
movd esi, mm4 movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
// Instead for byte codes
// mov si, WORD PTR [eax+esi]
// movd mm6, esi
// punpcklbw mm4, mm6
// psarw mm4, 8
// This code could be combined with code above, a bit.
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
movq mm4, mm2 pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
packssdw mm4, mm0 movq mm6, mm3 psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
punpcklwd mm6, mm4 paddd mm2, mm1 // Next iteration
pmaddwd mm6, mm5 #if 1
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
psrad mm6, 12 // lMIntrep2, lMInterp
// pand mm6, QWORD PTR ffffMask
// pand mm5, QWORD PTR ffffMask // 16 bits only.
movq mm4, mm5 mov esi, DWORD PTR [ecx+4]
punpckldq mm4, mm4 pmaddwd mm4, mm6 psrad mm4, 13 packssdw mm4, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm4 movd DWORD PTR [esi+ebx*2], mm7
// CHANNEL 2
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
mov esi, DWORD PTR [ecx+8]
pmaddwd mm5, mm6 psrad mm5, 13 packssdw mm5, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm5 movd DWORD PTR [esi+ebx*2], mm7
#else // There is noise here, probably due to the signed nature of the multiply.
psrad mm6, 12 // lMIntrep2, lMInterp
movq mm5, QWORD PTR MmxVolume packssdw mm6, mm0 punpckldq mm6, mm6 pmulhw mm6, mm5 mov esi, DWORD PTR [ecx+4] movd mm7, DWORD PTR [esi+ebx*2] mov esi, DWORD PTR [ecx+8] movd mm4, DWORD PTR [esi+ebx*2] punpckldq mm4, mm7 paddsw mm4, mm6 movd DWORD PTR [esi+ebx*2], mm4 punpckhdq mm4, mm4 mov esi, DWORD PTR [ecx+4] movd DWORD PTR [esi+ebx*2], mm4
#endif
add ebx, 2
cmp ebx, edx jb TwoAtATime
movd DWORD PTR pfSamplePos, mm2 #endif // }
$L43865:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch; ; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
mov esi, DWORD PTR pfPitch mov edx, DWORD PTR pfSamplePos
mov eax, DWORD PTR pcWave mov edi, edx
add esi, edx and edi, 4095
sar edx, 12 mov DWORD PTR pfSamplePos, esi
movsx esi, WORD PTR [eax+edx*2] movsx eax, WORD PTR [eax+edx*2+2]
sub eax, esi
imul eax, edi
sar eax, 12 mov edi, One_Channel_2
// ebx, ecx, edx are used in switch branches
add eax, esi // lMInterp
jmp edi
// ONE_CHANNEL
// lM = lMInterp * vfVolume[dwJ - 1];
// lM >>= 13;
// ppBuffer[dwJ - 1][dwI] += (short) lM;
$L44009:
; 342 : default: ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
mov edi, DWORD PTR l_nChannels
// ecx ppBuffer
// eax lMInterp
// edi counter
// ebx dwI
$L43874: mov edx, DWORD PTR vfVolume[edi*4-4] mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
imul edx, eax sar edx, 13 add WORD PTR [esi+ebx*2], dx
jno no_overflow mov WORD PTR [esi+ebx*2], 0x7fff js no_overflow mov WORD PTR [esi+ebx*2], 0x8000
no_overflow: dec edi cmp edi, 8 jne SHORT $L43874
lea edi, $L43876 }
#define ONE_CHANNEL_VOLUME(dwJ) \
_asm { lea edx, vfVolume } \ _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \ _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 13 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { lea edx, vfVolume } \ _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \ _asm { mov esi, DWORD PTR [ecx + 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 13 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
$L43876: ONE_CHANNEL_VOLUME(8); $L43880: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
$L43866: _asm { mov eax, DWORD PTR a inc ebx
cmp ebx, eax jb $L43865
mov edi, DWORD PTR l_nChannels $L43867: cmp ebx, DWORD PTR dwLength jb $L44021 Exit_$L43841: pop eax mov DWORD PTR dwI, ebx
#ifdef USE_MMX
mov edi, UseMmx cmp edi, UseMmxLabel jne NoMmxCleanupLabel
emms NoMmxCleanupLabel: #endif
} #else // }{
for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8;
#if 1
#define ONE_CHANNEL_VOLUME(dwJ) \
vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1]; \ vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
switch (l_nChannels) { default: for (dwJ = l_nChannels; dwJ > 8; dwJ--) { ONE_CHANNEL_VOLUME(dwJ); } case 8: ONE_CHANNEL_VOLUME(8); case 7: ONE_CHANNEL_VOLUME(7); case 6: ONE_CHANNEL_VOLUME(6); case 5: ONE_CHANNEL_VOLUME(5); case 4: ONE_CHANNEL_VOLUME(4); case 3: ONE_CHANNEL_VOLUME(3); case 2: ONE_CHANNEL_VOLUME(2); case 1: ONE_CHANNEL_VOLUME(1); case 0:; } #undef ONE_CHANNEL_VOLUME
#else
for (dwJ = 0; dwJ < l_nChannels; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; } #endif
}
#if 1 // {
DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch; DWORD b = dwLength - dwI;
if (b < a) a = b; if (dwIncDelta < a) a = dwIncDelta;
dwIncDelta -= a - 1; a += dwI;
for (; dwI < a; dwI++) { dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA; #if 1 // {
#if 1
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 13; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ long b = ppBuffer[dwJ - 1][dwI]; \ if ((short)b != b) { \ if ((long)b < 0) b = 0x8000; \ else b = 0x7fff; \ ppBuffer[dwJ - 1][dwI] = (short) b; \ } \ } #else
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 13; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ } #endif
switch (l_nChannels) { default: for (dwJ = l_nChannels; dwJ > 8; dwJ--) { ONE_CHANNEL_VOLUME(dwJ); } case 8: ONE_CHANNEL_VOLUME(8); case 7: ONE_CHANNEL_VOLUME(7); case 6: ONE_CHANNEL_VOLUME(6); case 5: ONE_CHANNEL_VOLUME(5); case 4: ONE_CHANNEL_VOLUME(4); case 3: ONE_CHANNEL_VOLUME(3); case 2: ONE_CHANNEL_VOLUME(2); case 1: ONE_CHANNEL_VOLUME(1); case 0:; } #undef ONE_CHANNEL_VOLUME
#else // }{
for (dwJ = 0; dwJ < l_nChannels; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 13; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} #endif // }
} #else // }{
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA; #if 1
#if 1
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 13; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ long b = ppBuffer[dwJ - 1][dwI]; \ if ((short)b != b) { \ if ((long)b < 0) b = 0x8000; \ else b = 0x7fff; \ ppBuffer[dwJ - 1][dwI] = (short) b; \ } \ } #else
#define ONE_CHANNEL_VOLUME(dwJ) \
{ \ lM = lMInterp * vfVolume[dwJ - 1]; \ lM >>= 13; \ ppBuffer[dwJ - 1][dwI] += (short) lM;\ } #endif
switch (l_nChannels) { default: for (dwJ = l_nChannels; dwJ > 8; dwJ--) { ONE_CHANNEL_VOLUME(dwJ); } case 8: ONE_CHANNEL_VOLUME(8); case 7: ONE_CHANNEL_VOLUME(7); case 6: ONE_CHANNEL_VOLUME(6); case 5: ONE_CHANNEL_VOLUME(5); case 4: ONE_CHANNEL_VOLUME(4); case 3: ONE_CHANNEL_VOLUME(3); case 2: ONE_CHANNEL_VOLUME(2); case 1: ONE_CHANNEL_VOLUME(1); case 0:; } #undef ONE_CHANNEL_VOLUME
#else
for (dwJ = 0; dwJ < l_nChannels; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 13; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} #endif
dwI++; #endif // }
} #endif // }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
return (dwI); } #endif
DWORD CDigitalAudio::MixMulti16Filter( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength, COEFF cfdK, COEFF cfdB1, COEFF cfdB2) { DWORD dwI, dwJ; DWORD dwPosition; long lA;//, lB;
long lM; long lMInterp; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; COEFF cfK = m_cfLastK; COEFF cfB1 = m_cfLastB1; COEFF cfB2 = m_cfLastB2; DWORD dMM6[2]; // Handle filter...
DWORD dMM4[2]; // Handle filter...
DWORD dMM5[2]; // Handle filter...
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
#if 1 // {
DWORD l_nChannels = dwBufferCount; DWORD a; DWORD One_Channel_1, One_Channel_2; // Code address locations.
long l_lPrevPrevSample = m_lPrevPrevSample, l_lPrevSample = m_lPrevSample;
#ifdef USE_MMX_FILTERED // {
typedef __int64 QWORD; QWORD OneMask = 0x0000000010001000; QWORD fffMask = 0x00000fff00000fff; QWORD ffffMask = 0x0000ffff0000ffff; DWORD UseMmx; DWORD MmxVolume[2]; int Use_MMX = m_sfMMXEnabled;
_asm { lea edi, $L43865
// Turned off
cmp Use_MMX, 0 je AssignMMXLabel
// != 2 channels
mov esi, DWORD PTR l_nChannels cmp esi, 2 jne AssignMmxLabel
// Ok, init and use MMX
lea edi, UseMmxLabel
pxor mm0, mm0 movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
AssignMmxLabel: mov DWORD PTR UseMmx, edi } #endif // }
_asm { mov edi, DWORD PTR l_nChannels
cmp edi, 8 jna Start1
lea esi, $L44008 jmp Do_One_Channel_2
// Put this code more than 127 bytes away from the references.
overflow_x: js overflow_y mov WORD PTR [esi+ebx*2], 0x8000 jmp edi
overflow_y: mov WORD PTR [esi+ebx*2], 0x7fff jmp edi
Start1: test edi, edi jne Start2
lea esi, $L43860 jmp Do_One_Channel_2
Start2: lea eax, $L43851 lea edx, $L43853
sub edx, eax mov esi, 8
sub esi, edi imul esi, edx add esi, eax
Do_One_Channel_2: mov DWORD PTR One_Channel_1, esi
// Create second jump table location.
lea esi, $L43876 lea ecx, $L43880
sub ecx, esi
push ecx // Span between branches.
mov eax, 8 sub eax, DWORD PTR l_nChannels
jge Start3 lea ecx, $L44009 jmp Done_Do_Channel_2
Start3: cmp eax, 8 jne Start4
lea ecx, $L43866 jmp Done_Do_Channel_2
Start4: imul ecx, eax add ecx, esi
Done_Do_Channel_2: mov DWORD PTR One_Channel_2, ecx
mov ecx, DWORD PTR dwLength xor ebx, ebx // dwI
test ecx, ecx jbe Exit_$L43841
mov ecx, DWORD PTR ppBuffer sub ecx, 4
// ecx == ppBuffer - 4
// ebx == dwI
// edi == l_nChannels
$L44021:
mov edx, DWORD PTR pfSamplePos cmp edx, DWORD PTR pfSampleLength jl SHORT $L43842
mov eax, DWORD PTR pfLoopLength test eax, eax je Exit_$L43841
sub edx, eax mov DWORD PTR pfSamplePos, edx
$L43842: mov edx, DWORD PTR dwIncDelta mov eax, DWORD PTR pfPFract
dec edx
mov DWORD PTR dwIncDelta, edx jne $L43860
mov edx, DWORD PTR dwDeltaPeriod mov esi, DWORD PTR pfDeltaPitch
mov DWORD PTR dwIncDelta, edx add eax, esi
mov DWORD PTR pfPFract, eax
sar eax, 8 mov DWORD PTR pfPitch, eax
mov esi, DWORD PTR vfDeltaVolume jmp One_Channel_1
// ONE_CHANNEL
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
$L44008:
mov DWORD PTR dwI, ebx lea ebx, DWORD PTR [edi*4-4] add edi, -8 ; fffffff8H $L43849:
lea eax, DWORD PTR vfVFract[ebx] mov ecx, DWORD PTR [esi+ebx] sub ebx, 4 add DWORD PTR [eax], ecx mov eax, DWORD PTR [eax] sar eax, 8 mov DWORD PTR vfVolume[ebx+4], eax dec edi jne SHORT $L43849
mov edi, DWORD PTR l_nChannels mov ecx, DWORD PTR ppBuffer
mov ebx, DWORD PTR dwI sub ecx, 4 } #define ONE_CHANNEL_VOLUME(dwJ) \
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \ _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \ _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { mov eax, DWORD PTR vfVFract[0] }; \ _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \ _asm { mov DWORD PTR vfVFract[0], eax }; \ _asm { sar eax, 8 }; \ _asm { lea edx, vfVolume }; \ _asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
$L43851: ONE_CHANNEL_VOLUME(8) $L43853: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
_asm { // cfK += cfdK;
// cfB1 += cfdB1;
// cfB2 += cfdB2;
mov eax, DWORD PTR cfdK mov edx, DWORD PTR cfdB1 mov esi, DWORD PTR cfdB2 add DWORD PTR cfK, eax
add DWORD PTR cfB1, edx add DWORD PTR cfB2, esi
$L43860: ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
mov esi, DWORD PTR pfPitch mov eax, DWORD PTR pfSampleLength
dec esi sub eax, DWORD PTR pfSamplePos
add eax, esi cdq idiv DWORD PTR pfPitch
mov edx, DWORD PTR dwLength sub edx, ebx
cmp edx, eax jae SHORT $L43863 mov eax, edx
$L43863: mov edx, DWORD PTR dwIncDelta cmp edx, eax jae SHORT $L43864 mov eax, edx
$L43864:
; 309 : ; 310 : for (a += dwI; dwI < a; dwI++)
inc edx
sub edx, eax add eax, ebx
mov DWORD PTR dwIncDelta, edx cmp ebx, eax
mov DWORD PTR a, eax jae $L43867
#ifdef USE_MMX_FILTERED // {
// Try to handle two positions at once.
lea edx, [eax-3] cmp ebx, edx jge $L43865
jmp UseMmx
UseMmxLabel: // Ok, there are at least two samples to handle.
movd mm1, DWORD PTR pfPitch psllq mm1, 32 // Pitch, 0
movd mm2, DWORD PTR pfSamplePos punpckldq mm2, mm2 // SamplePos, SamplePos
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
punpckhdq mm1, mm1 // Pitch, Pitch
pslld mm1, 1 // Pitch * 2, Pitch * 2
mov eax, DWORD PTR pcWave #if 0
movq mm4, QWORD PTR vfVolume pand mm4, QWORD PTR ffffMask movq mm5, mm4 pslld mm4, 16 por mm4, mm5 psllw mm4, 3 movq QWORD PTR MmxVolume, mm4 #endif
TwoAtATime:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch;
movq mm4, mm2 psrad mm4, 12 // dwPosition + Pitch, dwPosition
; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
movd esi, mm4 // dwPosition
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
// Instead for byte codes
// mov si, WORD PTR [eax+esi]
// movd mm6, esi
// punpcklbw mm5, mm6
// psarw mm5, 8
movd esi, mm4 movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
// Instead for byte codes
// mov si, WORD PTR [eax+esi]
// movd mm6, esi
// punpcklbw mm4, mm6
// psarw mm4, 8
// This code could be combined with code above, a bit.
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
movq mm4, mm2 pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
packssdw mm4, mm0 movq mm6, mm3 psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
punpcklwd mm6, mm4 paddd mm2, mm1 // Next iteration
pmaddwd mm6, mm5 #if 1 // {
psrad mm6, 12 // lMIntrep2, lMInterp
#if 1 // {
// eax, ebx, ecx, edx, esi are used. edi is free...
push eax push ecx push edx
movq QWORD PTR dMM6, mm6
mov eax, DWORD PTR dMM6 imul DWORD PTR cfK // edx:eax
mov ecx, eax mov eax, DWORD PTR l_lPrevPrevSample
mov edi, edx // esi:ecx
imul DWORD PTR cfB2
sub ecx, eax mov eax, DWORD PTR l_lPrevSample
sbb edi, edx mov DWORD PTR l_lPrevPrevSample, eax
imul DWORD PTR cfB1
add eax, ecx adc edx, edi
//>>>>> MOD:PETCHEY
// shld eax, edx, 2
//>>>>> should be
shld edx, eax, 2 mov eax, edx
mov DWORD PTR dMM6, eax mov DWORD PTR l_lPrevSample, eax
// 2nd sample
mov eax, DWORD PTR dMM6+4 imul DWORD PTR cfK // edx:eax
mov ecx, eax mov eax, DWORD PTR l_lPrevPrevSample
mov edi, edx // esi:ecx
imul DWORD PTR cfB2
sub ecx, eax mov eax, DWORD PTR l_lPrevSample
sbb edi, edx mov DWORD PTR l_lPrevPrevSample, eax
imul DWORD PTR cfB1
add eax, ecx adc edx, edi
//>>>>> MOD:PETCHEY
// shld eax, edx, 2
//>>>>> should be
shld edx, eax, 2 mov eax, edx
mov DWORD PTR dMM6+4, eax mov DWORD PTR l_lPrevSample, eax
movq mm6, QWORD PTR dMM6
pop edx pop ecx pop eax #endif // }
#define DO_32BIT_MULTIPLY
#ifndef DO_32BIT_MULTIPLY
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
// pand mm5, QWORD PTR ffffMask // 16 bits only.
#endif
// pand mm6, QWORD PTR ffffMask
#ifndef DO_32BIT_MULTIPLY
movq mm4, mm5 #endif
mov esi, DWORD PTR [ecx+4]
#ifndef DO_32BIT_MULTIPLY
punpckldq mm4, mm4 #endif
#ifdef DO_32BIT_MULTIPLY
mov edi, DWORD PTR vfVolume imul edi, DWORD PTR dMM6 sar edi, 13 mov DWORD PTR dMM4, edi
mov edi, DWORD PTR vfVolume imul edi, DWORD PTR dMM6+4 sar edi, 13 mov DWORD PTR dMM4+4, edi
movq mm4, QWORD PTR dMM4 #else
pmaddwd mm4, mm6 psrad mm4, 13 #endif
packssdw mm4, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm4 movd DWORD PTR [esi+ebx*2], mm7
// CHANNEL 2
#ifndef DO_32BIT_MULTIPLY
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
#endif
mov esi, DWORD PTR [ecx+8]
#ifdef DO_32BIT_MULTIPLY
mov edi, DWORD PTR vfVolume+4 imul edi, DWORD PTR dMM6 sar edi, 13 mov DWORD PTR dMM5, edi
mov edi, DWORD PTR vfVolume+4 imul edi, DWORD PTR dMM6+4 sar edi, 13 mov DWORD PTR dMM5+4, edi
movq mm5, QWORD PTR dMM5 #else
pmaddwd mm5, mm6 psrad mm5, 13 #endif
packssdw mm5, mm0
movd mm7, DWORD PTR [esi+ebx*2] paddsw mm7, mm5 movd DWORD PTR [esi+ebx*2], mm7
#else // }{ There is noise here, probably due to the signed nature of the multiply.
// NOTE the filter is NOT implemented here....
psrad mm6, 12 // lMIntrep2, lMInterp
movq mm5, QWORD PTR MmxVolume packssdw mm6, mm0 punpckldq mm6, mm6 pmulhw mm6, mm5 mov esi, DWORD PTR [ecx+4] movd mm7, DWORD PTR [esi+ebx*2] mov esi, DWORD PTR [ecx+8] movd mm4, DWORD PTR [esi+ebx*2] punpckldq mm4, mm7 paddsw mm4, mm6 movd DWORD PTR [esi+ebx*2], mm4 punpckhdq mm4, mm4 mov esi, DWORD PTR [ecx+4] movd DWORD PTR [esi+ebx*2], mm4
#endif // }
add ebx, 2
cmp ebx, edx jb TwoAtATime
movd DWORD PTR pfSamplePos, mm2 #endif // }
$L43865:
; dwPosition = pfSamplePos >> 12; ; dwFract = pfSamplePos & 0xFFF; ; pfSamplePos += pfPitch; ; lA = (long) pcWave[dwPosition]; ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
mov esi, DWORD PTR pfPitch mov edx, DWORD PTR pfSamplePos
mov eax, DWORD PTR pcWave mov edi, edx
add esi, edx and edi, 4095
sar edx, 12 mov DWORD PTR pfSamplePos, esi
movsx esi, WORD PTR [eax+edx*2] movsx eax, WORD PTR [eax+edx*2+2]
sub eax, esi
imul eax, edi
sar eax, 12 mov edi, One_Channel_2
// ebx, ecx, edx are used in switch branches
add eax, esi // lMInterp
#if 1
// lMInterp =
// MulDiv(lMInterp, cfK, (1 << 30))
// - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30))
// + MulDiv(m_lPrevSample, cfB1, (1 << 30))
push ecx imul DWORD PTR cfK // edx:eax
mov ecx, eax mov eax, DWORD PTR l_lPrevPrevSample
mov esi, edx // esi:ecx
imul DWORD PTR cfB2
sub ecx, eax mov eax, DWORD PTR l_lPrevSample
sbb esi, edx mov DWORD PTR l_lPrevPrevSample, eax
imul DWORD PTR cfB1
add eax, ecx // adc esi, edx
adc edx, esi
pop ecx // shrd eax, edx, 30
// mov esi,0x40000000
// idiv esi
//>>>>> MOD:PETCHEY
// shld eax, edx, 2
//>>>>> should be
shld edx, eax, 2 mov eax, edx #endif
//>>>>>>>>>>>> removed dp
#if 0
// if (lMInterp < -32767) lMInterp = -32767;
// else if (lMInterp > 32767) lMInterp = 32767;
cmp eax, -32767 jl Less_than cmp eax, 32767 jg Greater_than #endif
// m_lPrevPrevSample = m_lPrevSample;
// m_lPrevSample = lMInterp;
mov DWORD PTR l_lPrevSample, eax jmp edi
//>>>>>>>>>>>> removed dp
#if 0
Less_than: mov eax, -32767 mov DWORD PTR l_lPrevSample, eax jmp edi
Greater_than: mov eax, 32767 mov DWORD PTR l_lPrevSample, eax jmp edi #endif
// ONE_CHANNEL
// lM = lMInterp * vfVolume[dwJ - 1];
// lM >>= 13;
// ppBuffer[dwJ - 1][dwI] += (short) lM;
$L44009:
; 342 : default: ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
mov edi, DWORD PTR l_nChannels
// ecx ppBuffer
// eax lMInterp
// edi counter
// ebx dwI
$L43874: mov edx, DWORD PTR vfVolume[edi*4-4] mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
imul edx, eax sar edx, 13 add WORD PTR [esi+ebx*2], dx
jno no_overflow mov WORD PTR [esi+ebx*2], 0x7fff js no_overflow mov WORD PTR [esi+ebx*2], 0x8000
no_overflow: dec edi cmp edi, 8 jne SHORT $L43874
lea edi, $L43876 }
#define ONE_CHANNEL_VOLUME(dwJ) \
_asm { lea edx, vfVolume } \ _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \ _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 13 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
//-------------------------------------------------------------------------
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
// This lovely hack makes sure that all the instructions
// are the same length for the case (dwJ - 1) == 0. Code depends on this
// by calculating instruction offsets based on having 8 identical blocks.
//
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
//
//-------------------------------------------------------------------------
#define ONE_CHANNEL_VOLUME_1 \
_asm { lea edx, vfVolume } \ _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \ _asm { mov esi, DWORD PTR [ecx + 4] } \ _asm { imul edx, eax } \ _asm { sar edx, 13 } \ _asm { add edi, [esp] } \ \ _asm { add WORD PTR [esi+ebx*2], dx } \ _asm { jo FAR overflow_x }
$L43876: ONE_CHANNEL_VOLUME(8); $L43880: ONE_CHANNEL_VOLUME(7); ONE_CHANNEL_VOLUME(6); ONE_CHANNEL_VOLUME(5); ONE_CHANNEL_VOLUME(4); ONE_CHANNEL_VOLUME(3); ONE_CHANNEL_VOLUME(2); ONE_CHANNEL_VOLUME_1; #undef ONE_CHANNEL_VOLUME
#undef ONE_CHANNEL_VOLUME_1
$L43866: _asm { mov eax, DWORD PTR a inc ebx
cmp ebx, eax jb $L43865
mov edi, DWORD PTR l_nChannels $L43867: cmp ebx, DWORD PTR dwLength jb $L44021 Exit_$L43841: pop eax mov DWORD PTR dwI, ebx
#ifdef USE_MMX_FILTERED
mov edi, UseMmx cmp edi, UseMmxLabel jne NoMmxCleanupLabel
emms
NoMmxCleanupLabel: #endif
}
m_lPrevPrevSample = l_lPrevPrevSample; m_lPrevSample = l_lPrevSample; #else // }{
for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; }
cfK += cfdK; cfB1 += cfdB1; cfB2 += cfdB2; }
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
// Filter
//
// z = k*s - b1*z1 - b2*b2
// We store the negative of b1 in the table, so we flip the sign again by
// adding here
//
lMInterp = MulDiv(lMInterp, cfK, (1 << 30)) + MulDiv(m_lPrevSample, cfB1, (1 << 30)) - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
//>>>>>>>>>>>> removed dp
#if 0
if (lMInterp < -32767) lMInterp = -32767; else if (lMInterp > 32767) lMInterp = 32767; #endif
m_lPrevPrevSample = m_lPrevSample; m_lPrevSample = lMInterp;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 13; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; _asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
} dwI++; } #endif // }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
m_cfLastK = cfK; m_cfLastB1 = cfB1; m_cfLastB2 = cfB2;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
return (dwI); }
#else // }{ all assembly code
DWORD CDigitalAudio::MixMulti8( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI, dwJ; DWORD dwPosition; long lMInterp; long lM; long lA;//, lB;
DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; char * pcWave = (char *) m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; } for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; } }
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch; lMInterp = pcWave[dwPosition]; // pcWave
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 5;
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; #ifdef i386
_asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
#endif
} dwI++; }
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
return (dwI); } DWORD CDigitalAudio::MixMulti8Filter( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength, COEFF cfdK, COEFF cfdB1, COEFF cfdB2) { DWORD dwI, dwJ; DWORD dwPosition; long lMInterp; long lM; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; char * pcWave = (char *) m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; COEFF cfK = m_cfLastK; COEFF cfB1 = m_cfLastB1; COEFF cfB2 = m_cfLastB2;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
DWORD dMM6[2];
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
for (dwI = 0; dwI < dwLength; ) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; }
cfK += cfdK; cfB1 += cfdB1; cfB2 += cfdB2; } dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lMInterp = pcWave[dwPosition]; // pcWave
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
// Filter
//
lMInterp = MulDiv(lMInterp, cfK, (1 << 30)) - MulDiv(m_lPrevSample, cfB1, (1 << 30)) + MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
m_lPrevPrevSample = m_lPrevSample; m_lPrevSample = lMInterp;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 5;
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; #ifdef i386
_asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
#endif
} dwI++; }
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
return (dwI); }
DWORD CDigitalAudio::MixMulti16( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength) { DWORD dwI = 0; DWORD dwJ = 0; DWORD dwPosition = 0; long lA = 0;//, lB;
long lM = 0; long lMInterp = 0; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8;
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; } }
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 13; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; #ifdef i386
_asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
#endif
} dwI++; } m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; } return (dwI); }
DWORD CDigitalAudio::MixMulti16Filter( short *ppBuffer[], DWORD dwBufferCount, DWORD dwLength, DWORD dwDeltaPeriod, VFRACT vfDeltaVolume[], VFRACT vfLastVolume[], PFRACT pfDeltaPitch, PFRACT pfSampleLength, PFRACT pfLoopLength, COEFF cfdK, COEFF cfdB1, COEFF cfdB2) { DWORD dwI, dwJ; DWORD dwPosition; long lA;//, lB;
long lM; long lMInterp; DWORD dwIncDelta = dwDeltaPeriod; VFRACT dwFract; short * pcWave = m_pnWave; PFRACT pfSamplePos = m_pfLastSample; PFRACT pfPitch = m_pfLastPitch; PFRACT pfPFract = pfPitch << 8; COEFF cfK = m_cfLastK; COEFF cfB1 = m_cfLastB1; COEFF cfB2 = m_cfLastB2; DWORD dMM6[2]; // Handle filter...
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
for (dwI = 0; dwI < dwBufferCount; dwI++) { vfVolume[dwI] = vfLastVolume[dwI]; vfVFract[dwI] = vfVolume[dwI] << 8; }
for (dwI = 0; dwI < dwLength;) { if (pfSamplePos >= pfSampleLength) { if (pfLoopLength) pfSamplePos -= pfLoopLength; else break; } dwIncDelta--; if (!dwIncDelta) { dwIncDelta = dwDeltaPeriod; pfPFract += pfDeltaPitch; pfPitch = pfPFract >> 8; for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfVFract[dwJ] += vfDeltaVolume[dwJ]; vfVolume[dwJ] = vfVFract[dwJ] >> 8; }
cfK += cfdK; cfB1 += cfdB1; cfB2 += cfdB2; }
dwPosition = pfSamplePos >> 12; dwFract = pfSamplePos & 0xFFF; pfSamplePos += pfPitch;
lA = (long) pcWave[dwPosition]; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
// Filter
//
// z = k*s - b1*z1 - b2*b2
// We store the negative of b1 in the table, so we flip the sign again by
// adding here
//
lMInterp = MulDiv(lMInterp, cfK, (1 << 30)) + MulDiv(m_lPrevSample, cfB1, (1 << 30)) - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
//>>>>>>>>>>>> removed dp
#if 0
if (lMInterp < -32767) lMInterp = -32767; else if (lMInterp > 32767) lMInterp = 32767; #endif
m_lPrevPrevSample = m_lPrevSample; m_lPrevSample = lMInterp;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { lM = lMInterp * vfVolume[dwJ]; lM >>= 13; // Signal bumps up to 12 bits.
// Keep this around so we can use it to generate new assembly code (see below...)
#if 1
{ long x = ppBuffer[dwJ][dwI]; x += lM;
if (x != (short)x) { if (x > 32767) x = 32767; else x = -32768; }
ppBuffer[dwJ][dwI] = (short)x; } #else
ppBuffer[dwJ][dwI] += (short) lM; #ifdef i386
_asm{jno no_oflow} ppBuffer[dwJ][dwI] = 0x7fff; _asm{js no_oflow} ppBuffer[dwJ][dwI] = (short) 0x8000; no_oflow: ; #endif
#endif
} dwI++; }
m_pfLastPitch = pfPitch; m_pfLastSample = pfSamplePos;
m_cfLastK = cfK; m_cfLastB1 = cfB1; m_cfLastB2 = cfB2;
for (dwJ = 0; dwJ < dwBufferCount; dwJ++) { vfLastVolume[dwJ] = vfVolume[dwJ]; }
return (dwI); }
#endif // }
|