mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3999 lines
84 KiB
3999 lines
84 KiB
// Mix.cpp
|
|
// Copyright (c) Microsoft Corporation 1996, 1998
|
|
// Mix engines for MSSynth
|
|
|
|
#ifdef DMSYNTH_MINIPORT
|
|
#include "common.h"
|
|
#define STR_MODULENAME "DMusicMix:"
|
|
#else
|
|
#include "simple.h"
|
|
#include <mmsystem.h>
|
|
#include "synth.h"
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////
|
|
// Modifications
|
|
// member m_nChannels => parameter dwBufferCount
|
|
//
|
|
// Changed number of arguments into Filtered mixers
|
|
//
|
|
// Remove range checking after filter
|
|
|
|
#pragma warning(disable : 4101 4102 4146)
|
|
|
|
#ifdef _ALPHA_
|
|
|
|
extern "C" {
|
|
int __ADAWI(short, short *);
|
|
};
|
|
#pragma intrinsic(__ADAWI)
|
|
|
|
#define ALPHA_OVERFLOW 2
|
|
#define ALPHA_NEGATIVE 8
|
|
|
|
#else // !_ALPHA_
|
|
// TODO -- overflow detection for ia64 (+ axp64?)
|
|
#endif // !_ALPHA_
|
|
#ifdef DMSYNTH_MINIPORT
|
|
#pragma code_seg("PAGE")
|
|
#endif // DMSYNTH_MINIPORT
|
|
|
|
#define USE_MMX
|
|
#define USE_MMX_FILTERED
|
|
|
|
#ifdef i386 // {
|
|
DWORD CDigitalAudio::MixMulti8(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lMInterp;
|
|
long lM;
|
|
long lA;//, lB;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
char * pcWave = (char *) m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
#if 1 // {
|
|
DWORD l_nChannels = dwBufferCount;
|
|
#if 1 // {
|
|
DWORD a;
|
|
DWORD One_Channel_1, One_Channel_2; // Code address locations.
|
|
#ifdef USE_MMX // {
|
|
typedef __int64 QWORD;
|
|
QWORD OneMask = 0x0000000010001000;
|
|
QWORD fffMask = 0x00000fff00000fff;
|
|
QWORD ffffMask = 0x0000ffff0000ffff;
|
|
DWORD UseMmx;
|
|
DWORD MmxVolume[2];
|
|
int Use_MMX = m_sfMMXEnabled;
|
|
|
|
_asm {
|
|
lea edi, $L43865
|
|
|
|
// Turned off
|
|
cmp Use_MMX, 0
|
|
je AssignMmxLabel
|
|
|
|
// != 2 channels
|
|
mov esi, DWORD PTR l_nChannels
|
|
cmp esi, 2
|
|
jne AssignMmxLabel
|
|
|
|
// Ok, init and use MMX
|
|
|
|
lea edi, UseMmxLabel
|
|
|
|
pxor mm0, mm0
|
|
movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
|
|
|
|
AssignMmxLabel:
|
|
mov DWORD PTR UseMmx, edi
|
|
|
|
}
|
|
#endif // }
|
|
|
|
_asm {
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
cmp edi, 8
|
|
jna Start1
|
|
|
|
lea esi, $L44008
|
|
jmp Do_One_Channel_2
|
|
|
|
// Put this code more than 127 bytes away from the references.
|
|
|
|
overflow_x:
|
|
js overflow_y
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
jmp edi
|
|
|
|
overflow_y:
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
jmp edi
|
|
|
|
Start1:
|
|
test edi, edi
|
|
jne Start2
|
|
|
|
lea esi, $L43860
|
|
jmp Do_One_Channel_2
|
|
|
|
Start2:
|
|
lea eax, $L43851
|
|
lea edx, $L43853
|
|
|
|
sub edx, eax
|
|
mov esi, 8
|
|
|
|
sub esi, edi
|
|
imul esi, edx
|
|
add esi, eax
|
|
|
|
Do_One_Channel_2:
|
|
mov DWORD PTR One_Channel_1, esi
|
|
|
|
// Create second jump table location.
|
|
|
|
lea esi, $L43876
|
|
lea ecx, $L43880
|
|
|
|
sub ecx, esi
|
|
|
|
push ecx // Span between branches.
|
|
|
|
mov eax, 8
|
|
sub eax, DWORD PTR l_nChannels
|
|
|
|
jge Start3
|
|
|
|
lea ecx, $L44009
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start3:
|
|
cmp eax, 8
|
|
jne Start4
|
|
|
|
lea ecx, $L43866
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start4:
|
|
imul ecx, eax
|
|
add ecx, esi
|
|
|
|
Done_Do_Channel_2:
|
|
mov DWORD PTR One_Channel_2, ecx
|
|
|
|
|
|
mov ecx, DWORD PTR dwLength
|
|
xor ebx, ebx // dwI
|
|
|
|
test ecx, ecx
|
|
jbe Exit_$L43841
|
|
|
|
mov ecx, DWORD PTR ppBuffer
|
|
sub ecx, 4
|
|
|
|
// ecx == ppBuffer
|
|
// ebx == dwI
|
|
// edi == l_nChannels
|
|
$L44021:
|
|
|
|
mov edx, DWORD PTR pfSamplePos
|
|
cmp edx, DWORD PTR pfSampleLength
|
|
jl SHORT $L43842
|
|
|
|
mov eax, DWORD PTR pfLoopLength
|
|
test eax, eax
|
|
je Exit_$L43841
|
|
|
|
sub edx, eax
|
|
mov DWORD PTR pfSamplePos, edx
|
|
|
|
$L43842:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
mov eax, DWORD PTR pfPFract
|
|
|
|
dec edx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
jne $L43860
|
|
|
|
mov edx, DWORD PTR dwDeltaPeriod
|
|
mov esi, DWORD PTR pfDeltaPitch
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
add eax, esi
|
|
|
|
mov DWORD PTR pfPFract, eax
|
|
|
|
sar eax, 8
|
|
mov DWORD PTR pfPitch, eax
|
|
|
|
mov esi, DWORD PTR vfDeltaVolume
|
|
jmp One_Channel_1
|
|
|
|
// ONE_CHANNEL
|
|
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
|
|
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
|
|
|
|
$L44008:
|
|
|
|
mov DWORD PTR dwI, ebx
|
|
lea ebx, DWORD PTR [edi*4-4]
|
|
add edi, -8 ; fffffff8H
|
|
$L43849:
|
|
|
|
lea eax, DWORD PTR vfVFract[ebx]
|
|
mov ecx, DWORD PTR [esi+ebx]
|
|
sub ebx, 4
|
|
add DWORD PTR [eax], ecx
|
|
mov eax, DWORD PTR [eax]
|
|
sar eax, 8
|
|
mov DWORD PTR vfVolume[ebx+4], eax
|
|
dec edi
|
|
jne SHORT $L43849
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
mov ecx, DWORD PTR ppBuffer
|
|
|
|
mov ebx, DWORD PTR dwI
|
|
sub ecx, 4
|
|
}
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
|
|
_asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
|
|
_asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { mov eax, DWORD PTR vfVFract[0] }; \
|
|
_asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
|
|
_asm { mov DWORD PTR vfVFract[0], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
|
|
|
|
$L43851:
|
|
ONE_CHANNEL_VOLUME(8)
|
|
$L43853:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
$L43860:
|
|
_asm {
|
|
; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov eax, DWORD PTR pfSampleLength
|
|
|
|
dec esi
|
|
sub eax, DWORD PTR pfSamplePos
|
|
|
|
add eax, esi
|
|
cdq
|
|
idiv DWORD PTR pfPitch
|
|
|
|
mov edx, DWORD PTR dwLength
|
|
sub edx, ebx
|
|
|
|
cmp edx, eax
|
|
jae SHORT $L43863
|
|
mov eax, edx
|
|
|
|
$L43863:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
cmp edx, eax
|
|
jae SHORT $L43864
|
|
mov eax, edx
|
|
|
|
$L43864:
|
|
|
|
; 309 :
|
|
; 310 : for (a += dwI; dwI < a; dwI++)
|
|
|
|
inc edx
|
|
|
|
sub edx, eax
|
|
add eax, ebx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
cmp ebx, eax
|
|
|
|
mov DWORD PTR a, eax
|
|
jae $L43867
|
|
|
|
#ifdef USE_MMX // {
|
|
// Try to handle two positions at once.
|
|
|
|
lea edx, [eax-3]
|
|
cmp ebx, edx
|
|
jge $L43865
|
|
|
|
jmp UseMmx
|
|
|
|
UseMmxLabel:
|
|
// Ok, there are at least two samples to handle.
|
|
|
|
movd mm1, DWORD PTR pfPitch
|
|
psllq mm1, 32 // Pitch, 0
|
|
movd mm2, DWORD PTR pfSamplePos
|
|
punpckldq mm2, mm2 // SamplePos, SamplePos
|
|
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
|
|
punpckhdq mm1, mm1 // Pitch, Pitch
|
|
pslld mm1, 1 // Pitch * 2, Pitch * 2
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
#if 0
|
|
movq mm4, QWORD PTR vfVolume
|
|
pand mm4, QWORD PTR ffffMask
|
|
movq mm5, mm4
|
|
pslld mm4, 16
|
|
por mm4, mm5
|
|
psllw mm4, 3
|
|
movq QWORD PTR MmxVolume, mm4
|
|
#endif
|
|
|
|
TwoAtATime:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
|
|
movq mm4, mm2
|
|
psrad mm4, 12 // dwPosition + Pitch, dwPosition
|
|
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
|
|
|
|
movd esi, mm4 // dwPosition
|
|
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
|
|
// movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
|
|
// Instead for byte codes
|
|
mov si, WORD PTR [eax+esi]
|
|
movd mm6, esi
|
|
punpcklbw mm5, mm6
|
|
psraw mm5, 8
|
|
movd esi, mm4
|
|
// movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
|
|
// Instead for byte codes
|
|
mov si, WORD PTR [eax+esi]
|
|
movd mm6, esi
|
|
punpcklbw mm4, mm6
|
|
psraw mm4, 8
|
|
// This code could be combined with code above, a bit.
|
|
|
|
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
|
|
movq mm4, mm2
|
|
pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
|
|
packssdw mm4, mm0
|
|
movq mm6, mm3
|
|
psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
|
|
punpcklwd mm6, mm4
|
|
paddd mm2, mm1 // Next iteration
|
|
pmaddwd mm6, mm5
|
|
#if 1
|
|
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
// pand mm6, QWORD PTR ffffMask
|
|
// pand mm5, QWORD PTR ffffMask // 16 bits only.
|
|
|
|
movq mm4, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
|
|
punpckldq mm4, mm4
|
|
pmaddwd mm4, mm6
|
|
psrad mm4, 5
|
|
packssdw mm4, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm4
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
// CHANNEL 2
|
|
|
|
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
|
|
mov esi, DWORD PTR [ecx+8]
|
|
|
|
pmaddwd mm5, mm6
|
|
psrad mm5, 5
|
|
packssdw mm5, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm5
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
#else // There is noise here, probably due to the signed nature of the multiply.
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
movq mm5, QWORD PTR MmxVolume
|
|
packssdw mm6, mm0
|
|
punpckldq mm6, mm6
|
|
pmulhw mm6, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
mov esi, DWORD PTR [ecx+8]
|
|
movd mm4, DWORD PTR [esi+ebx*2]
|
|
punpckldq mm4, mm7
|
|
paddsw mm4, mm6
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
punpckhdq mm4, mm4
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
|
|
#endif
|
|
|
|
add ebx, 2
|
|
|
|
cmp ebx, edx
|
|
jb TwoAtATime
|
|
|
|
movd DWORD PTR pfSamplePos, mm2
|
|
#endif // }
|
|
|
|
$L43865:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov edx, DWORD PTR pfSamplePos
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
mov edi, edx
|
|
|
|
add esi, edx
|
|
and edi, 4095
|
|
|
|
sar edx, 12
|
|
mov DWORD PTR pfSamplePos, esi
|
|
|
|
movsx esi, BYTE PTR [eax+edx]
|
|
movsx eax, BYTE PTR [eax+edx+1]
|
|
|
|
sub eax, esi
|
|
|
|
imul eax, edi
|
|
|
|
sar eax, 12
|
|
mov edi, One_Channel_2
|
|
|
|
// ebx, ecx, edx are used in switch branches
|
|
|
|
add eax, esi // lMInterp
|
|
jmp edi
|
|
|
|
// ONE_CHANNEL
|
|
// lM = lMInterp * vfVolume[dwJ - 1];
|
|
// lM >>= 5;
|
|
// ppBuffer[dwJ - 1][dwI] += (short) lM;
|
|
|
|
$L44009:
|
|
|
|
; 342 : default:
|
|
; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
// ecx ppBuffer
|
|
// eax lMInterp
|
|
// edi counter
|
|
// ebx dwI
|
|
|
|
$L43874:
|
|
mov edx, DWORD PTR vfVolume[edi*4-4]
|
|
mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
|
|
|
|
imul edx, eax
|
|
sar edx, 5
|
|
add WORD PTR [esi+ebx*2], dx
|
|
|
|
jno no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
js no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
|
|
no_overflow:
|
|
dec edi
|
|
cmp edi, 8
|
|
jne SHORT $L43874
|
|
|
|
lea edi, $L43876
|
|
}
|
|
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
|
|
_asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 5 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
|
|
_asm { mov esi, DWORD PTR [ecx + 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 5 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
$L43876:
|
|
ONE_CHANNEL_VOLUME(8);
|
|
$L43880:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
$L43866:
|
|
_asm {
|
|
mov eax, DWORD PTR a
|
|
inc ebx
|
|
|
|
cmp ebx, eax
|
|
jb $L43865
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
$L43867:
|
|
cmp ebx, DWORD PTR dwLength
|
|
jb $L44021
|
|
Exit_$L43841:
|
|
pop eax
|
|
mov DWORD PTR dwI, ebx
|
|
|
|
#ifdef USE_MMX
|
|
mov edi, UseMmx
|
|
cmp edi, UseMmxLabel
|
|
jne NoMmxCleanupLabel
|
|
|
|
emms
|
|
NoMmxCleanupLabel:
|
|
#endif
|
|
}
|
|
#else // }{
|
|
for (dwI = 0; dwI < dwLength;)
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
|
|
#if 1
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1]; \
|
|
vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
|
|
|
|
switch (l_nChannels)
|
|
{
|
|
default:
|
|
for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
{
|
|
ONE_CHANNEL_VOLUME(dwJ);
|
|
}
|
|
case 8: ONE_CHANNEL_VOLUME(8);
|
|
case 7: ONE_CHANNEL_VOLUME(7);
|
|
case 6: ONE_CHANNEL_VOLUME(6);
|
|
case 5: ONE_CHANNEL_VOLUME(5);
|
|
case 4: ONE_CHANNEL_VOLUME(4);
|
|
case 3: ONE_CHANNEL_VOLUME(3);
|
|
case 2: ONE_CHANNEL_VOLUME(2);
|
|
case 1: ONE_CHANNEL_VOLUME(1);
|
|
case 0:;
|
|
}
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#else
|
|
for (dwJ = 0; dwJ < l_nChannels; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#if 1 // {
|
|
DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
|
|
DWORD b = dwLength - dwI;
|
|
|
|
if (b < a) a = b;
|
|
if (dwIncDelta < a) a = dwIncDelta;
|
|
|
|
dwIncDelta -= a - 1;
|
|
a += dwI;
|
|
|
|
for (; dwI < a; dwI++)
|
|
{
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
#if 1 // {
|
|
#if 1
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 5; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
long b = ppBuffer[dwJ - 1][dwI]; \
|
|
if ((short)b != b) { \
|
|
if ((long)b < 0) b = 0x8000; \
|
|
else b = 0x7fff; \
|
|
ppBuffer[dwJ - 1][dwI] = (short) b; \
|
|
} \
|
|
}
|
|
#else
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 5; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
}
|
|
#endif
|
|
switch (l_nChannels)
|
|
{
|
|
default:
|
|
for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
{
|
|
ONE_CHANNEL_VOLUME(dwJ);
|
|
}
|
|
case 8: ONE_CHANNEL_VOLUME(8);
|
|
case 7: ONE_CHANNEL_VOLUME(7);
|
|
case 6: ONE_CHANNEL_VOLUME(6);
|
|
case 5: ONE_CHANNEL_VOLUME(5);
|
|
case 4: ONE_CHANNEL_VOLUME(4);
|
|
case 3: ONE_CHANNEL_VOLUME(3);
|
|
case 2: ONE_CHANNEL_VOLUME(2);
|
|
case 1: ONE_CHANNEL_VOLUME(1);
|
|
case 0:;
|
|
}
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#else // }{
|
|
for (dwJ = 0; dwJ < l_nChannels; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 5; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
#endif // }
|
|
}
|
|
#else // }{
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
#if 1
|
|
#if 1
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 5; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
long b = ppBuffer[dwJ - 1][dwI]; \
|
|
if ((short)b != b) { \
|
|
if ((long)b < 0) b = 0x8000; \
|
|
else b = 0x7fff; \
|
|
ppBuffer[dwJ - 1][dwI] = (short) b; \
|
|
} \
|
|
}
|
|
#else
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 5; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
}
|
|
#endif
|
|
switch (l_nChannels)
|
|
{
|
|
default:
|
|
for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
{
|
|
ONE_CHANNEL_VOLUME(dwJ);
|
|
}
|
|
case 8: ONE_CHANNEL_VOLUME(8);
|
|
case 7: ONE_CHANNEL_VOLUME(7);
|
|
case 6: ONE_CHANNEL_VOLUME(6);
|
|
case 5: ONE_CHANNEL_VOLUME(5);
|
|
case 4: ONE_CHANNEL_VOLUME(4);
|
|
case 3: ONE_CHANNEL_VOLUME(3);
|
|
case 2: ONE_CHANNEL_VOLUME(2);
|
|
case 1: ONE_CHANNEL_VOLUME(1);
|
|
case 0:;
|
|
}
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#else
|
|
for (dwJ = 0; dwJ < l_nChannels; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 5; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
#endif
|
|
dwI++;
|
|
#endif // }
|
|
}
|
|
#endif // }
|
|
#else // }{
|
|
for (dwI = 0; dwI < dwLength; )
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lMInterp = pcWave[dwPosition]; // pcWave
|
|
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 5;
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
#endif // }
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
return (dwI);
|
|
}
|
|
|
|
DWORD CDigitalAudio::MixMulti8Filter(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength,
|
|
COEFF cfdK,
|
|
COEFF cfdB1,
|
|
COEFF cfdB2)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lMInterp;
|
|
long lM;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
char * pcWave = (char *) m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
COEFF cfK = m_cfLastK;
|
|
COEFF cfB1 = m_cfLastB1;
|
|
COEFF cfB2 = m_cfLastB2;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
DWORD dMM6[2];
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
#if 1 // {
|
|
DWORD l_nChannels = dwBufferCount;
|
|
DWORD a;
|
|
DWORD One_Channel_1, One_Channel_2; // Code address locations.
|
|
long l_lPrevPrevSample = m_lPrevPrevSample, l_lPrevSample = m_lPrevSample;
|
|
|
|
#ifdef USE_MMX_FILTERED // {
|
|
typedef __int64 QWORD;
|
|
QWORD OneMask = 0x0000000010001000;
|
|
QWORD fffMask = 0x00000fff00000fff;
|
|
QWORD ffffMask = 0x0000ffff0000ffff;
|
|
DWORD UseMmx;
|
|
DWORD MmxVolume[2];
|
|
int Use_MMX = m_sfMMXEnabled;
|
|
|
|
_asm {
|
|
lea edi, $L43865
|
|
|
|
// Turned off
|
|
cmp Use_MMX, 0
|
|
je AssignMmxLabel
|
|
|
|
// != 2 channels
|
|
mov esi, DWORD PTR l_nChannels
|
|
cmp esi, 2
|
|
jne AssignMmxLabel
|
|
|
|
// Ok, init and use MMX
|
|
|
|
lea edi, UseMmxLabel
|
|
|
|
pxor mm0, mm0
|
|
movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
|
|
|
|
AssignMmxLabel:
|
|
mov DWORD PTR UseMmx, edi
|
|
|
|
}
|
|
#endif // }
|
|
|
|
_asm {
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
cmp edi, 8
|
|
jna Start1
|
|
|
|
lea esi, $L44008
|
|
jmp Do_One_Channel_2
|
|
|
|
// Put this code more than 127 bytes away from the references.
|
|
|
|
overflow_x:
|
|
js overflow_y
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
jmp edi
|
|
|
|
overflow_y:
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
jmp edi
|
|
|
|
Start1:
|
|
test edi, edi
|
|
jne Start2
|
|
|
|
lea esi, $L43860
|
|
jmp Do_One_Channel_2
|
|
|
|
Start2:
|
|
lea eax, $L43851
|
|
lea edx, $L43853
|
|
|
|
sub edx, eax
|
|
mov esi, 8
|
|
|
|
sub esi, edi
|
|
imul esi, edx
|
|
add esi, eax
|
|
|
|
Do_One_Channel_2:
|
|
mov DWORD PTR One_Channel_1, esi
|
|
|
|
// Create second jump table location.
|
|
|
|
lea esi, $L43876
|
|
lea ecx, $L43880
|
|
|
|
sub ecx, esi
|
|
|
|
push ecx // Span between branches.
|
|
|
|
mov eax, 8
|
|
sub eax, DWORD PTR l_nChannels
|
|
|
|
jge Start3
|
|
|
|
lea ecx, $L44009
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start3:
|
|
cmp eax, 8
|
|
jne Start4
|
|
|
|
lea ecx, $L43866
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start4:
|
|
imul ecx, eax
|
|
add ecx, esi
|
|
|
|
Done_Do_Channel_2:
|
|
mov DWORD PTR One_Channel_2, ecx
|
|
|
|
|
|
mov ecx, DWORD PTR dwLength
|
|
xor ebx, ebx // dwI
|
|
|
|
test ecx, ecx
|
|
jbe Exit_$L43841
|
|
|
|
mov ecx, DWORD PTR ppBuffer
|
|
sub ecx, 4
|
|
|
|
// ecx == ppBuffer
|
|
// ebx == dwI
|
|
// edi == l_nChannels
|
|
$L44021:
|
|
|
|
mov edx, DWORD PTR pfSamplePos
|
|
cmp edx, DWORD PTR pfSampleLength
|
|
jl SHORT $L43842
|
|
|
|
mov eax, DWORD PTR pfLoopLength
|
|
test eax, eax
|
|
je Exit_$L43841
|
|
|
|
sub edx, eax
|
|
mov DWORD PTR pfSamplePos, edx
|
|
|
|
$L43842:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
mov eax, DWORD PTR pfPFract
|
|
|
|
dec edx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
jne $L43860
|
|
|
|
mov edx, DWORD PTR dwDeltaPeriod
|
|
mov esi, DWORD PTR pfDeltaPitch
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
add eax, esi
|
|
|
|
mov DWORD PTR pfPFract, eax
|
|
|
|
sar eax, 8
|
|
mov DWORD PTR pfPitch, eax
|
|
|
|
mov esi, DWORD PTR vfDeltaVolume
|
|
jmp One_Channel_1
|
|
|
|
// ONE_CHANNEL
|
|
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
|
|
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
|
|
|
|
$L44008:
|
|
|
|
mov DWORD PTR dwI, ebx
|
|
lea ebx, DWORD PTR [edi*4-4]
|
|
add edi, -8 ; fffffff8H
|
|
$L43849:
|
|
|
|
lea eax, DWORD PTR vfVFract[ebx]
|
|
mov ecx, DWORD PTR [esi+ebx]
|
|
sub ebx, 4
|
|
add DWORD PTR [eax], ecx
|
|
mov eax, DWORD PTR [eax]
|
|
sar eax, 8
|
|
mov DWORD PTR vfVolume[ebx+4], eax
|
|
dec edi
|
|
jne SHORT $L43849
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
mov ecx, DWORD PTR ppBuffer
|
|
|
|
mov ebx, DWORD PTR dwI
|
|
sub ecx, 4
|
|
}
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
|
|
_asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
|
|
_asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { mov eax, DWORD PTR vfVFract[0] }; \
|
|
_asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
|
|
_asm { mov DWORD PTR vfVFract[0], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
|
|
|
|
$L43851:
|
|
ONE_CHANNEL_VOLUME(8)
|
|
$L43853:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
|
|
_asm {
|
|
// cfK += cfdK;
|
|
// cfB1 += cfdB1;
|
|
// cfB2 += cfdB2;
|
|
|
|
mov eax, DWORD PTR cfdK
|
|
mov edx, DWORD PTR cfdB1
|
|
|
|
mov esi, DWORD PTR cfdB2
|
|
add DWORD PTR cfK, eax
|
|
|
|
add DWORD PTR cfB1, edx
|
|
add DWORD PTR cfB2, esi
|
|
|
|
$L43860:
|
|
; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov eax, DWORD PTR pfSampleLength
|
|
|
|
dec esi
|
|
sub eax, DWORD PTR pfSamplePos
|
|
|
|
add eax, esi
|
|
cdq
|
|
idiv DWORD PTR pfPitch
|
|
|
|
mov edx, DWORD PTR dwLength
|
|
sub edx, ebx
|
|
|
|
cmp edx, eax
|
|
jae SHORT $L43863
|
|
mov eax, edx
|
|
|
|
$L43863:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
cmp edx, eax
|
|
jae SHORT $L43864
|
|
mov eax, edx
|
|
|
|
$L43864:
|
|
|
|
; 309 :
|
|
; 310 : for (a += dwI; dwI < a; dwI++)
|
|
|
|
inc edx
|
|
|
|
sub edx, eax
|
|
add eax, ebx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
cmp ebx, eax
|
|
|
|
mov DWORD PTR a, eax
|
|
jae $L43867
|
|
|
|
#ifdef USE_MMX_FILTERED // {
|
|
// Try to handle two positions at once.
|
|
|
|
lea edx, [eax-3]
|
|
cmp ebx, edx
|
|
jge $L43865
|
|
|
|
jmp UseMmx
|
|
|
|
UseMmxLabel:
|
|
// Ok, there are at least two samples to handle.
|
|
|
|
movd mm1, DWORD PTR pfPitch
|
|
psllq mm1, 32 // Pitch, 0
|
|
movd mm2, DWORD PTR pfSamplePos
|
|
punpckldq mm2, mm2 // SamplePos, SamplePos
|
|
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
|
|
punpckhdq mm1, mm1 // Pitch, Pitch
|
|
pslld mm1, 1 // Pitch * 2, Pitch * 2
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
#if 0
|
|
movq mm4, QWORD PTR vfVolume
|
|
pand mm4, QWORD PTR ffffMask
|
|
movq mm5, mm4
|
|
pslld mm4, 16
|
|
por mm4, mm5
|
|
psllw mm4, 3
|
|
movq QWORD PTR MmxVolume, mm4
|
|
#endif
|
|
|
|
TwoAtATime:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
|
|
movq mm4, mm2
|
|
psrad mm4, 12 // dwPosition + Pitch, dwPosition
|
|
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
|
|
|
|
movd esi, mm4 // dwPosition
|
|
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
|
|
// movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
|
|
// Instead for byte codes
|
|
mov si, WORD PTR [eax+esi]
|
|
movd mm6, esi
|
|
punpcklbw mm5, mm6
|
|
psraw mm5, 8
|
|
movd esi, mm4
|
|
// movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
|
|
// Instead for byte codes
|
|
mov si, WORD PTR [eax+esi]
|
|
movd mm6, esi
|
|
punpcklbw mm4, mm6
|
|
psraw mm4, 8
|
|
// This code could be combined with code above, a bit.
|
|
|
|
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
|
|
movq mm4, mm2
|
|
pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
|
|
packssdw mm4, mm0
|
|
movq mm6, mm3
|
|
psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
|
|
punpcklwd mm6, mm4
|
|
paddd mm2, mm1 // Next iteration
|
|
pmaddwd mm6, mm5
|
|
#if 1
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
|
|
#if 1
|
|
// eax, ebx, ecx, edx, esi are used. edi is free...
|
|
push eax
|
|
push ecx
|
|
push edx
|
|
|
|
movq QWORD PTR dMM6, mm6
|
|
|
|
mov eax, DWORD PTR dMM6
|
|
imul DWORD PTR cfK // edx:eax
|
|
|
|
mov ecx, eax
|
|
mov eax, DWORD PTR l_lPrevPrevSample
|
|
|
|
mov edi, edx // esi:ecx
|
|
imul DWORD PTR cfB2
|
|
|
|
sub ecx, eax
|
|
mov eax, DWORD PTR l_lPrevSample
|
|
|
|
sbb edi, edx
|
|
mov DWORD PTR l_lPrevPrevSample, eax
|
|
|
|
imul DWORD PTR cfB1
|
|
|
|
add eax, ecx
|
|
adc edx, edi
|
|
|
|
//>>>>> MOD:PETCHEY
|
|
// shld eax, edx, 2
|
|
//>>>>> should be
|
|
shld edx, eax, 2
|
|
mov eax, edx
|
|
|
|
|
|
mov DWORD PTR dMM6, eax
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
|
|
// 2nd sample
|
|
|
|
mov eax, DWORD PTR dMM6+4
|
|
imul DWORD PTR cfK // edx:eax
|
|
|
|
mov ecx, eax
|
|
mov eax, DWORD PTR l_lPrevPrevSample
|
|
|
|
mov edi, edx // esi:ecx
|
|
imul DWORD PTR cfB2
|
|
|
|
sub ecx, eax
|
|
mov eax, DWORD PTR l_lPrevSample
|
|
|
|
sbb edi, edx
|
|
mov DWORD PTR l_lPrevPrevSample, eax
|
|
|
|
imul DWORD PTR cfB1
|
|
|
|
add eax, ecx
|
|
adc edx, edi
|
|
|
|
//>>>>> MOD:PETCHEY
|
|
// shld eax, edx, 2
|
|
//>>>>> should be
|
|
shld edx, eax, 2
|
|
mov eax, edx
|
|
|
|
mov DWORD PTR dMM6+4, eax
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
|
|
movq mm6, QWORD PTR dMM6
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
#endif
|
|
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
|
|
|
|
// pand mm6, QWORD PTR ffffMask
|
|
|
|
// packssdw mm6, mm0 // Saturate to 16 bits, instead.
|
|
// punpcklwd mm6, mm0
|
|
|
|
// pand mm5, QWORD PTR ffffMask // 16 bits only.
|
|
|
|
movq mm4, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
|
|
punpckldq mm4, mm4
|
|
pmaddwd mm4, mm6
|
|
psrad mm4, 5
|
|
packssdw mm4, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm4
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
// CHANNEL 2
|
|
|
|
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
|
|
mov esi, DWORD PTR [ecx+8]
|
|
|
|
pmaddwd mm5, mm6
|
|
psrad mm5, 5
|
|
packssdw mm5, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm5
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
#else // There is noise here, probably due to the signed nature of the multiply.
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
movq mm5, QWORD PTR MmxVolume
|
|
packssdw mm6, mm0
|
|
punpckldq mm6, mm6
|
|
pmulhw mm6, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
mov esi, DWORD PTR [ecx+8]
|
|
movd mm4, DWORD PTR [esi+ebx*2]
|
|
punpckldq mm4, mm7
|
|
paddsw mm4, mm6
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
punpckhdq mm4, mm4
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
|
|
#endif
|
|
|
|
add ebx, 2
|
|
|
|
cmp ebx, edx
|
|
jb TwoAtATime
|
|
|
|
movd DWORD PTR pfSamplePos, mm2
|
|
#endif // }
|
|
|
|
$L43865:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov edx, DWORD PTR pfSamplePos
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
mov edi, edx
|
|
|
|
add esi, edx
|
|
and edi, 4095
|
|
|
|
sar edx, 12
|
|
mov DWORD PTR pfSamplePos, esi
|
|
|
|
movsx esi, BYTE PTR [eax+edx]
|
|
movsx eax, BYTE PTR [eax+edx+1]
|
|
|
|
sub eax, esi
|
|
|
|
imul eax, edi
|
|
|
|
sar eax, 12
|
|
mov edi, One_Channel_2
|
|
|
|
// ebx, ecx, edx are used in switch branches
|
|
|
|
add eax, esi // lMInterp
|
|
|
|
// lMInterp =
|
|
// MulDiv(lMInterp, cfK, (1 << 30))
|
|
// - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30))
|
|
// + MulDiv(m_lPrevSample, cfB1, (1 << 30))
|
|
|
|
push ecx
|
|
imul DWORD PTR cfK // edx:eax
|
|
|
|
mov ecx, eax
|
|
mov eax, DWORD PTR l_lPrevPrevSample
|
|
|
|
mov esi, edx // esi:ecx
|
|
imul DWORD PTR cfB2
|
|
|
|
sub ecx, eax
|
|
mov eax, DWORD PTR l_lPrevSample
|
|
|
|
sbb esi, edx
|
|
mov DWORD PTR l_lPrevPrevSample, eax
|
|
|
|
imul DWORD PTR cfB1
|
|
|
|
add eax, ecx // esi:eax
|
|
adc esi, edx
|
|
|
|
pop ecx
|
|
// shrd eax, esi, 30
|
|
|
|
//>>>>> MOD:PETCHEY
|
|
// shld eax, esi, 2
|
|
//>>>>> should be
|
|
shld esi, eax, 2
|
|
mov eax, esi
|
|
|
|
//>>>>>>>>>>>> removed dp
|
|
#if 0
|
|
// if (lMInterp < -32767) lMInterp = -32767;
|
|
// else if (lMInterp > 32767) lMInterp = 32767;
|
|
|
|
cmp eax, -32767
|
|
jl Less_than
|
|
cmp eax, 32767
|
|
jg Greater_than
|
|
#endif
|
|
|
|
// m_lPrevPrevSample = m_lPrevSample;
|
|
// m_lPrevSample = lMInterp;
|
|
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
jmp edi
|
|
|
|
Less_than:
|
|
mov eax, -32767
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
jmp edi
|
|
|
|
Greater_than:
|
|
mov eax, 32767
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
jmp edi
|
|
|
|
// ONE_CHANNEL
|
|
// lM = lMInterp * vfVolume[dwJ - 1];
|
|
// lM >>= 5;
|
|
// ppBuffer[dwJ - 1][dwI] += (short) lM;
|
|
|
|
$L44009:
|
|
|
|
; 342 : default:
|
|
; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
// ecx ppBuffer
|
|
// eax lMInterp
|
|
// edi counter
|
|
// ebx dwI
|
|
|
|
$L43874:
|
|
mov edx, DWORD PTR vfVolume[edi*4-4]
|
|
mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
|
|
|
|
imul edx, eax
|
|
sar edx, 5
|
|
add WORD PTR [esi+ebx*2], dx
|
|
|
|
jno no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
js no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
|
|
no_overflow:
|
|
dec edi
|
|
cmp edi, 8
|
|
jne SHORT $L43874
|
|
|
|
lea edi, $L43876
|
|
}
|
|
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
|
|
_asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 5 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
|
|
_asm { mov esi, DWORD PTR [ecx + 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 5 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
$L43876:
|
|
ONE_CHANNEL_VOLUME(8);
|
|
$L43880:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
$L43866:
|
|
_asm {
|
|
mov eax, DWORD PTR a
|
|
inc ebx
|
|
|
|
cmp ebx, eax
|
|
jb $L43865
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
$L43867:
|
|
cmp ebx, DWORD PTR dwLength
|
|
jb $L44021
|
|
Exit_$L43841:
|
|
pop eax
|
|
mov DWORD PTR dwI, ebx
|
|
|
|
#ifdef USE_MMX_FILTERED
|
|
mov edi, UseMmx
|
|
cmp edi, UseMmxLabel
|
|
jne NoMmxCleanupLabel
|
|
|
|
emms
|
|
NoMmxCleanupLabel:
|
|
#endif
|
|
}
|
|
m_lPrevPrevSample = l_lPrevPrevSample;
|
|
m_lPrevSample = l_lPrevSample;
|
|
#else // }{
|
|
for (dwI = 0; dwI < dwLength; )
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
|
|
cfK += cfdK;
|
|
cfB1 += cfdB1;
|
|
cfB2 += cfdB2;
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lMInterp = pcWave[dwPosition]; // pcWave
|
|
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
|
|
|
|
// Filter
|
|
//
|
|
lMInterp =
|
|
MulDiv(lMInterp, cfK, (1 << 30))
|
|
- MulDiv(m_lPrevSample, cfB1, (1 << 30))
|
|
+ MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
|
|
|
|
m_lPrevPrevSample = m_lPrevSample;
|
|
m_lPrevSample = lMInterp;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 5;
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
#endif // }
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
return (dwI);
|
|
}
|
|
|
|
#if 0
|
|
DWORD CDigitalAudio::MixMulti16(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lA;//, lB;
|
|
long lM;
|
|
long lMInterp;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
short * pcWave = m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
for (dwI = 0; dwI < dwLength;)
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 13; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
#endif
|
|
no_oflow: ;
|
|
}
|
|
dwI++;
|
|
}
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
return (dwI);
|
|
}
|
|
#else
|
|
DWORD CDigitalAudio::MixMulti16(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lA;//, lB;
|
|
long lM;
|
|
long lMInterp;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
short * pcWave = m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
#if 1 // {
|
|
DWORD l_nChannels = dwBufferCount;
|
|
DWORD a;
|
|
DWORD One_Channel_1, One_Channel_2; // Code address locations.
|
|
#ifdef USE_MMX // {
|
|
typedef __int64 QWORD;
|
|
QWORD OneMask = 0x0000000010001000;
|
|
QWORD fffMask = 0x00000fff00000fff;
|
|
QWORD ffffMask = 0x0000ffff0000ffff;
|
|
DWORD UseMmx;
|
|
DWORD MmxVolume[2];
|
|
int Use_MMX = m_sfMMXEnabled;
|
|
|
|
_asm {
|
|
lea edi, $L43865
|
|
|
|
// Turned off
|
|
cmp Use_MMX, 0
|
|
je AssignMMXLabel
|
|
|
|
// != 2 channels
|
|
mov esi, DWORD PTR l_nChannels
|
|
cmp esi, 2
|
|
jne AssignMmxLabel
|
|
|
|
// Ok, init and use MMX
|
|
lea edi, UseMmxLabel
|
|
|
|
pxor mm0, mm0
|
|
movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
|
|
|
|
AssignMmxLabel:
|
|
mov DWORD PTR UseMmx, edi
|
|
|
|
}
|
|
#endif // }
|
|
|
|
_asm {
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
cmp edi, 8
|
|
jna Start1
|
|
|
|
lea esi, $L44008
|
|
jmp Do_One_Channel_2
|
|
|
|
// Put this code more than 127 bytes away from the references.
|
|
|
|
overflow_x:
|
|
js overflow_y
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
jmp edi
|
|
|
|
overflow_y:
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
jmp edi
|
|
|
|
Start1:
|
|
test edi, edi
|
|
jne Start2
|
|
|
|
lea esi, $L43860
|
|
jmp Do_One_Channel_2
|
|
|
|
Start2:
|
|
lea eax, $L43851
|
|
lea edx, $L43853
|
|
|
|
sub edx, eax
|
|
mov esi, 8
|
|
|
|
sub esi, edi
|
|
imul esi, edx
|
|
add esi, eax
|
|
|
|
Do_One_Channel_2:
|
|
mov DWORD PTR One_Channel_1, esi
|
|
|
|
// Create second jump table location.
|
|
|
|
lea esi, $L43876
|
|
lea ecx, $L43880
|
|
|
|
sub ecx, esi
|
|
|
|
push ecx // Span between branches.
|
|
|
|
mov eax, 8
|
|
sub eax, DWORD PTR l_nChannels
|
|
|
|
jge Start3
|
|
|
|
lea ecx, $L44009
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start3:
|
|
cmp eax, 8
|
|
jne Start4
|
|
|
|
lea ecx, $L43866
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start4:
|
|
imul ecx, eax
|
|
add ecx, esi
|
|
|
|
Done_Do_Channel_2:
|
|
mov DWORD PTR One_Channel_2, ecx
|
|
|
|
|
|
mov ecx, DWORD PTR dwLength
|
|
xor ebx, ebx // dwI
|
|
|
|
test ecx, ecx
|
|
jbe Exit_$L43841
|
|
|
|
mov ecx, DWORD PTR ppBuffer
|
|
sub ecx, 4
|
|
|
|
// ecx == ppBuffer
|
|
// ebx == dwI
|
|
// edi == l_nChannels
|
|
$L44021:
|
|
|
|
mov edx, DWORD PTR pfSamplePos
|
|
cmp edx, DWORD PTR pfSampleLength
|
|
jl SHORT $L43842
|
|
|
|
mov eax, DWORD PTR pfLoopLength
|
|
test eax, eax
|
|
je Exit_$L43841
|
|
|
|
sub edx, eax
|
|
mov DWORD PTR pfSamplePos, edx
|
|
|
|
$L43842:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
mov eax, DWORD PTR pfPFract
|
|
|
|
dec edx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
jne $L43860
|
|
|
|
mov edx, DWORD PTR dwDeltaPeriod
|
|
mov esi, DWORD PTR pfDeltaPitch
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
add eax, esi
|
|
|
|
mov DWORD PTR pfPFract, eax
|
|
|
|
sar eax, 8
|
|
mov DWORD PTR pfPitch, eax
|
|
|
|
mov esi, DWORD PTR vfDeltaVolume
|
|
jmp One_Channel_1
|
|
|
|
// ONE_CHANNEL
|
|
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
|
|
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
|
|
|
|
$L44008:
|
|
|
|
mov DWORD PTR dwI, ebx
|
|
lea ebx, DWORD PTR [edi*4-4]
|
|
add edi, -8 ; fffffff8H
|
|
$L43849:
|
|
|
|
lea eax, DWORD PTR vfVFract[ebx]
|
|
mov ecx, DWORD PTR [esi+ebx]
|
|
sub ebx, 4
|
|
add DWORD PTR [eax], ecx
|
|
mov eax, DWORD PTR [eax]
|
|
sar eax, 8
|
|
mov DWORD PTR vfVolume[ebx+4], eax
|
|
dec edi
|
|
jne SHORT $L43849
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
mov ecx, DWORD PTR ppBuffer
|
|
|
|
mov ebx, DWORD PTR dwI
|
|
sub ecx, 4
|
|
}
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
|
|
_asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
|
|
_asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { mov eax, DWORD PTR vfVFract[0] }; \
|
|
_asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
|
|
_asm { mov DWORD PTR vfVFract[0], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm { mov DWORD PTR [edx], eax };
|
|
|
|
$L43851:
|
|
ONE_CHANNEL_VOLUME(8)
|
|
$L43853:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
$L43860:
|
|
_asm {
|
|
; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov eax, DWORD PTR pfSampleLength
|
|
|
|
dec esi
|
|
sub eax, DWORD PTR pfSamplePos
|
|
|
|
add eax, esi
|
|
cdq
|
|
idiv DWORD PTR pfPitch
|
|
|
|
mov edx, DWORD PTR dwLength
|
|
sub edx, ebx
|
|
|
|
cmp edx, eax
|
|
jae SHORT $L43863
|
|
mov eax, edx
|
|
|
|
$L43863:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
cmp edx, eax
|
|
jae SHORT $L43864
|
|
mov eax, edx
|
|
|
|
$L43864:
|
|
|
|
; 309 :
|
|
; 310 : for (a += dwI; dwI < a; dwI++)
|
|
|
|
inc edx
|
|
|
|
sub edx, eax
|
|
add eax, ebx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
cmp ebx, eax
|
|
|
|
mov DWORD PTR a, eax
|
|
jae $L43867
|
|
|
|
#ifdef USE_MMX // {
|
|
// Try to handle two positions at once.
|
|
|
|
lea edx, [eax-3]
|
|
cmp ebx, edx
|
|
jge $L43865
|
|
|
|
jmp UseMmx
|
|
|
|
UseMmxLabel:
|
|
// Ok, there are at least two samples to handle.
|
|
|
|
movd mm1, DWORD PTR pfPitch
|
|
psllq mm1, 32 // Pitch, 0
|
|
movd mm2, DWORD PTR pfSamplePos
|
|
punpckldq mm2, mm2 // SamplePos, SamplePos
|
|
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
|
|
punpckhdq mm1, mm1 // Pitch, Pitch
|
|
pslld mm1, 1 // Pitch * 2, Pitch * 2
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
#if 0
|
|
movq mm4, QWORD PTR vfVolume
|
|
pand mm4, QWORD PTR ffffMask
|
|
movq mm5, mm4
|
|
pslld mm4, 16
|
|
por mm4, mm5
|
|
psllw mm4, 3
|
|
movq QWORD PTR MmxVolume, mm4
|
|
#endif
|
|
|
|
TwoAtATime:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
|
|
movq mm4, mm2
|
|
psrad mm4, 12 // dwPosition + Pitch, dwPosition
|
|
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
|
|
|
|
movd esi, mm4 // dwPosition
|
|
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
|
|
movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
|
|
// Instead for byte codes
|
|
// mov si, WORD PTR [eax+esi]
|
|
// movd mm6, esi
|
|
// punpcklbw mm5, mm6
|
|
// psarw mm5, 8
|
|
movd esi, mm4
|
|
movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
|
|
// Instead for byte codes
|
|
// mov si, WORD PTR [eax+esi]
|
|
// movd mm6, esi
|
|
// punpcklbw mm4, mm6
|
|
// psarw mm4, 8
|
|
// This code could be combined with code above, a bit.
|
|
|
|
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
|
|
movq mm4, mm2
|
|
pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
|
|
packssdw mm4, mm0
|
|
movq mm6, mm3
|
|
psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
|
|
punpcklwd mm6, mm4
|
|
paddd mm2, mm1 // Next iteration
|
|
pmaddwd mm6, mm5
|
|
#if 1
|
|
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
// pand mm6, QWORD PTR ffffMask
|
|
// pand mm5, QWORD PTR ffffMask // 16 bits only.
|
|
|
|
movq mm4, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
|
|
punpckldq mm4, mm4
|
|
pmaddwd mm4, mm6
|
|
psrad mm4, 13
|
|
packssdw mm4, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm4
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
// CHANNEL 2
|
|
|
|
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
|
|
mov esi, DWORD PTR [ecx+8]
|
|
|
|
pmaddwd mm5, mm6
|
|
psrad mm5, 13
|
|
packssdw mm5, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm5
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
#else // There is noise here, probably due to the signed nature of the multiply.
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
movq mm5, QWORD PTR MmxVolume
|
|
packssdw mm6, mm0
|
|
punpckldq mm6, mm6
|
|
pmulhw mm6, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
mov esi, DWORD PTR [ecx+8]
|
|
movd mm4, DWORD PTR [esi+ebx*2]
|
|
punpckldq mm4, mm7
|
|
paddsw mm4, mm6
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
punpckhdq mm4, mm4
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
|
|
#endif
|
|
|
|
add ebx, 2
|
|
|
|
cmp ebx, edx
|
|
jb TwoAtATime
|
|
|
|
movd DWORD PTR pfSamplePos, mm2
|
|
#endif // }
|
|
|
|
|
|
$L43865:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov edx, DWORD PTR pfSamplePos
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
mov edi, edx
|
|
|
|
add esi, edx
|
|
and edi, 4095
|
|
|
|
sar edx, 12
|
|
mov DWORD PTR pfSamplePos, esi
|
|
|
|
movsx esi, WORD PTR [eax+edx*2]
|
|
movsx eax, WORD PTR [eax+edx*2+2]
|
|
|
|
sub eax, esi
|
|
|
|
imul eax, edi
|
|
|
|
sar eax, 12
|
|
mov edi, One_Channel_2
|
|
|
|
// ebx, ecx, edx are used in switch branches
|
|
|
|
add eax, esi // lMInterp
|
|
jmp edi
|
|
|
|
// ONE_CHANNEL
|
|
// lM = lMInterp * vfVolume[dwJ - 1];
|
|
// lM >>= 13;
|
|
// ppBuffer[dwJ - 1][dwI] += (short) lM;
|
|
|
|
$L44009:
|
|
|
|
; 342 : default:
|
|
; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
// ecx ppBuffer
|
|
// eax lMInterp
|
|
// edi counter
|
|
// ebx dwI
|
|
|
|
$L43874:
|
|
mov edx, DWORD PTR vfVolume[edi*4-4]
|
|
mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
|
|
|
|
imul edx, eax
|
|
sar edx, 13
|
|
add WORD PTR [esi+ebx*2], dx
|
|
|
|
jno no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
js no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
|
|
no_overflow:
|
|
dec edi
|
|
cmp edi, 8
|
|
jne SHORT $L43874
|
|
|
|
lea edi, $L43876
|
|
}
|
|
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
|
|
_asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 13 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
|
|
_asm { mov esi, DWORD PTR [ecx + 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 13 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
$L43876:
|
|
ONE_CHANNEL_VOLUME(8);
|
|
$L43880:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
$L43866:
|
|
_asm {
|
|
mov eax, DWORD PTR a
|
|
inc ebx
|
|
|
|
cmp ebx, eax
|
|
jb $L43865
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
$L43867:
|
|
cmp ebx, DWORD PTR dwLength
|
|
jb $L44021
|
|
Exit_$L43841:
|
|
pop eax
|
|
mov DWORD PTR dwI, ebx
|
|
|
|
#ifdef USE_MMX
|
|
mov edi, UseMmx
|
|
cmp edi, UseMmxLabel
|
|
jne NoMmxCleanupLabel
|
|
|
|
emms
|
|
NoMmxCleanupLabel:
|
|
#endif
|
|
}
|
|
#else // }{
|
|
for (dwI = 0; dwI < dwLength;)
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
|
|
#if 1
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1]; \
|
|
vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
|
|
|
|
switch (l_nChannels)
|
|
{
|
|
default:
|
|
for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
{
|
|
ONE_CHANNEL_VOLUME(dwJ);
|
|
}
|
|
case 8: ONE_CHANNEL_VOLUME(8);
|
|
case 7: ONE_CHANNEL_VOLUME(7);
|
|
case 6: ONE_CHANNEL_VOLUME(6);
|
|
case 5: ONE_CHANNEL_VOLUME(5);
|
|
case 4: ONE_CHANNEL_VOLUME(4);
|
|
case 3: ONE_CHANNEL_VOLUME(3);
|
|
case 2: ONE_CHANNEL_VOLUME(2);
|
|
case 1: ONE_CHANNEL_VOLUME(1);
|
|
case 0:;
|
|
}
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#else
|
|
for (dwJ = 0; dwJ < l_nChannels; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#if 1 // {
|
|
DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
|
|
DWORD b = dwLength - dwI;
|
|
|
|
if (b < a) a = b;
|
|
if (dwIncDelta < a) a = dwIncDelta;
|
|
|
|
dwIncDelta -= a - 1;
|
|
a += dwI;
|
|
|
|
for (; dwI < a; dwI++)
|
|
{
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
#if 1 // {
|
|
#if 1
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 13; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
long b = ppBuffer[dwJ - 1][dwI]; \
|
|
if ((short)b != b) { \
|
|
if ((long)b < 0) b = 0x8000; \
|
|
else b = 0x7fff; \
|
|
ppBuffer[dwJ - 1][dwI] = (short) b; \
|
|
} \
|
|
}
|
|
#else
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 13; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
}
|
|
#endif
|
|
switch (l_nChannels)
|
|
{
|
|
default:
|
|
for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
{
|
|
ONE_CHANNEL_VOLUME(dwJ);
|
|
}
|
|
case 8: ONE_CHANNEL_VOLUME(8);
|
|
case 7: ONE_CHANNEL_VOLUME(7);
|
|
case 6: ONE_CHANNEL_VOLUME(6);
|
|
case 5: ONE_CHANNEL_VOLUME(5);
|
|
case 4: ONE_CHANNEL_VOLUME(4);
|
|
case 3: ONE_CHANNEL_VOLUME(3);
|
|
case 2: ONE_CHANNEL_VOLUME(2);
|
|
case 1: ONE_CHANNEL_VOLUME(1);
|
|
case 0:;
|
|
}
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#else // }{
|
|
for (dwJ = 0; dwJ < l_nChannels; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 13; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
#endif // }
|
|
}
|
|
#else // }{
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
#if 1
|
|
#if 1
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 13; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
long b = ppBuffer[dwJ - 1][dwI]; \
|
|
if ((short)b != b) { \
|
|
if ((long)b < 0) b = 0x8000; \
|
|
else b = 0x7fff; \
|
|
ppBuffer[dwJ - 1][dwI] = (short) b; \
|
|
} \
|
|
}
|
|
#else
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
{ \
|
|
lM = lMInterp * vfVolume[dwJ - 1]; \
|
|
lM >>= 13; \
|
|
ppBuffer[dwJ - 1][dwI] += (short) lM;\
|
|
}
|
|
#endif
|
|
switch (l_nChannels)
|
|
{
|
|
default:
|
|
for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
{
|
|
ONE_CHANNEL_VOLUME(dwJ);
|
|
}
|
|
case 8: ONE_CHANNEL_VOLUME(8);
|
|
case 7: ONE_CHANNEL_VOLUME(7);
|
|
case 6: ONE_CHANNEL_VOLUME(6);
|
|
case 5: ONE_CHANNEL_VOLUME(5);
|
|
case 4: ONE_CHANNEL_VOLUME(4);
|
|
case 3: ONE_CHANNEL_VOLUME(3);
|
|
case 2: ONE_CHANNEL_VOLUME(2);
|
|
case 1: ONE_CHANNEL_VOLUME(1);
|
|
case 0:;
|
|
}
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#else
|
|
for (dwJ = 0; dwJ < l_nChannels; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 13; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
#endif
|
|
dwI++;
|
|
#endif // }
|
|
}
|
|
#endif // }
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
return (dwI);
|
|
}
|
|
#endif
|
|
|
|
DWORD CDigitalAudio::MixMulti16Filter(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength,
|
|
COEFF cfdK,
|
|
COEFF cfdB1,
|
|
COEFF cfdB2)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lA;//, lB;
|
|
long lM;
|
|
long lMInterp;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
short * pcWave = m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
COEFF cfK = m_cfLastK;
|
|
COEFF cfB1 = m_cfLastB1;
|
|
COEFF cfB2 = m_cfLastB2;
|
|
DWORD dMM6[2]; // Handle filter...
|
|
DWORD dMM4[2]; // Handle filter...
|
|
DWORD dMM5[2]; // Handle filter...
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
#if 1 // {
|
|
DWORD l_nChannels = dwBufferCount;
|
|
DWORD a;
|
|
DWORD One_Channel_1, One_Channel_2; // Code address locations.
|
|
long l_lPrevPrevSample = m_lPrevPrevSample, l_lPrevSample = m_lPrevSample;
|
|
|
|
#ifdef USE_MMX_FILTERED // {
|
|
typedef __int64 QWORD;
|
|
QWORD OneMask = 0x0000000010001000;
|
|
QWORD fffMask = 0x00000fff00000fff;
|
|
QWORD ffffMask = 0x0000ffff0000ffff;
|
|
DWORD UseMmx;
|
|
DWORD MmxVolume[2];
|
|
int Use_MMX = m_sfMMXEnabled;
|
|
|
|
_asm {
|
|
lea edi, $L43865
|
|
|
|
// Turned off
|
|
cmp Use_MMX, 0
|
|
je AssignMMXLabel
|
|
|
|
// != 2 channels
|
|
mov esi, DWORD PTR l_nChannels
|
|
cmp esi, 2
|
|
jne AssignMmxLabel
|
|
|
|
// Ok, init and use MMX
|
|
lea edi, UseMmxLabel
|
|
|
|
pxor mm0, mm0
|
|
movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
|
|
|
|
AssignMmxLabel:
|
|
mov DWORD PTR UseMmx, edi
|
|
}
|
|
#endif // }
|
|
|
|
_asm {
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
cmp edi, 8
|
|
jna Start1
|
|
|
|
lea esi, $L44008
|
|
jmp Do_One_Channel_2
|
|
|
|
// Put this code more than 127 bytes away from the references.
|
|
|
|
overflow_x:
|
|
js overflow_y
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
jmp edi
|
|
|
|
overflow_y:
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
jmp edi
|
|
|
|
Start1:
|
|
test edi, edi
|
|
jne Start2
|
|
|
|
lea esi, $L43860
|
|
jmp Do_One_Channel_2
|
|
|
|
Start2:
|
|
lea eax, $L43851
|
|
lea edx, $L43853
|
|
|
|
sub edx, eax
|
|
mov esi, 8
|
|
|
|
sub esi, edi
|
|
imul esi, edx
|
|
add esi, eax
|
|
|
|
Do_One_Channel_2:
|
|
mov DWORD PTR One_Channel_1, esi
|
|
|
|
// Create second jump table location.
|
|
|
|
lea esi, $L43876
|
|
lea ecx, $L43880
|
|
|
|
sub ecx, esi
|
|
|
|
push ecx // Span between branches.
|
|
|
|
mov eax, 8
|
|
sub eax, DWORD PTR l_nChannels
|
|
|
|
jge Start3
|
|
|
|
lea ecx, $L44009
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start3:
|
|
cmp eax, 8
|
|
jne Start4
|
|
|
|
lea ecx, $L43866
|
|
jmp Done_Do_Channel_2
|
|
|
|
Start4:
|
|
imul ecx, eax
|
|
add ecx, esi
|
|
|
|
Done_Do_Channel_2:
|
|
mov DWORD PTR One_Channel_2, ecx
|
|
|
|
|
|
mov ecx, DWORD PTR dwLength
|
|
xor ebx, ebx // dwI
|
|
|
|
test ecx, ecx
|
|
jbe Exit_$L43841
|
|
|
|
mov ecx, DWORD PTR ppBuffer
|
|
sub ecx, 4
|
|
|
|
// ecx == ppBuffer - 4
|
|
// ebx == dwI
|
|
// edi == l_nChannels
|
|
$L44021:
|
|
|
|
mov edx, DWORD PTR pfSamplePos
|
|
cmp edx, DWORD PTR pfSampleLength
|
|
jl SHORT $L43842
|
|
|
|
mov eax, DWORD PTR pfLoopLength
|
|
test eax, eax
|
|
je Exit_$L43841
|
|
|
|
sub edx, eax
|
|
mov DWORD PTR pfSamplePos, edx
|
|
|
|
$L43842:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
mov eax, DWORD PTR pfPFract
|
|
|
|
dec edx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
jne $L43860
|
|
|
|
mov edx, DWORD PTR dwDeltaPeriod
|
|
mov esi, DWORD PTR pfDeltaPitch
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
add eax, esi
|
|
|
|
mov DWORD PTR pfPFract, eax
|
|
|
|
sar eax, 8
|
|
mov DWORD PTR pfPitch, eax
|
|
|
|
mov esi, DWORD PTR vfDeltaVolume
|
|
jmp One_Channel_1
|
|
|
|
// ONE_CHANNEL
|
|
// vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
|
|
// vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
|
|
|
|
$L44008:
|
|
|
|
mov DWORD PTR dwI, ebx
|
|
lea ebx, DWORD PTR [edi*4-4]
|
|
add edi, -8 ; fffffff8H
|
|
$L43849:
|
|
|
|
lea eax, DWORD PTR vfVFract[ebx]
|
|
mov ecx, DWORD PTR [esi+ebx]
|
|
sub ebx, 4
|
|
add DWORD PTR [eax], ecx
|
|
mov eax, DWORD PTR [eax]
|
|
sar eax, 8
|
|
mov DWORD PTR vfVolume[ebx+4], eax
|
|
dec edi
|
|
jne SHORT $L43849
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
mov ecx, DWORD PTR ppBuffer
|
|
|
|
mov ebx, DWORD PTR dwI
|
|
sub ecx, 4
|
|
}
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
|
|
_asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
|
|
_asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { mov eax, DWORD PTR vfVFract[0] }; \
|
|
_asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
|
|
_asm { mov DWORD PTR vfVFract[0], eax }; \
|
|
_asm { sar eax, 8 }; \
|
|
_asm { lea edx, vfVolume }; \
|
|
_asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
|
|
|
|
$L43851:
|
|
ONE_CHANNEL_VOLUME(8)
|
|
$L43853:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
|
|
_asm {
|
|
// cfK += cfdK;
|
|
// cfB1 += cfdB1;
|
|
// cfB2 += cfdB2;
|
|
|
|
mov eax, DWORD PTR cfdK
|
|
mov edx, DWORD PTR cfdB1
|
|
|
|
mov esi, DWORD PTR cfdB2
|
|
add DWORD PTR cfK, eax
|
|
|
|
add DWORD PTR cfB1, edx
|
|
add DWORD PTR cfB2, esi
|
|
|
|
$L43860:
|
|
; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov eax, DWORD PTR pfSampleLength
|
|
|
|
dec esi
|
|
sub eax, DWORD PTR pfSamplePos
|
|
|
|
add eax, esi
|
|
cdq
|
|
idiv DWORD PTR pfPitch
|
|
|
|
mov edx, DWORD PTR dwLength
|
|
sub edx, ebx
|
|
|
|
cmp edx, eax
|
|
jae SHORT $L43863
|
|
mov eax, edx
|
|
|
|
$L43863:
|
|
mov edx, DWORD PTR dwIncDelta
|
|
cmp edx, eax
|
|
jae SHORT $L43864
|
|
mov eax, edx
|
|
|
|
$L43864:
|
|
|
|
; 309 :
|
|
; 310 : for (a += dwI; dwI < a; dwI++)
|
|
|
|
inc edx
|
|
|
|
sub edx, eax
|
|
add eax, ebx
|
|
|
|
mov DWORD PTR dwIncDelta, edx
|
|
cmp ebx, eax
|
|
|
|
mov DWORD PTR a, eax
|
|
jae $L43867
|
|
|
|
#ifdef USE_MMX_FILTERED // {
|
|
// Try to handle two positions at once.
|
|
|
|
lea edx, [eax-3]
|
|
cmp ebx, edx
|
|
jge $L43865
|
|
|
|
jmp UseMmx
|
|
|
|
UseMmxLabel:
|
|
// Ok, there are at least two samples to handle.
|
|
|
|
movd mm1, DWORD PTR pfPitch
|
|
psllq mm1, 32 // Pitch, 0
|
|
movd mm2, DWORD PTR pfSamplePos
|
|
punpckldq mm2, mm2 // SamplePos, SamplePos
|
|
paddd mm2, mm1 // SamplePos + Pitch, SamplePos
|
|
punpckhdq mm1, mm1 // Pitch, Pitch
|
|
pslld mm1, 1 // Pitch * 2, Pitch * 2
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
#if 0
|
|
movq mm4, QWORD PTR vfVolume
|
|
pand mm4, QWORD PTR ffffMask
|
|
movq mm5, mm4
|
|
pslld mm4, 16
|
|
por mm4, mm5
|
|
psllw mm4, 3
|
|
movq QWORD PTR MmxVolume, mm4
|
|
#endif
|
|
|
|
TwoAtATime:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
|
|
movq mm4, mm2
|
|
psrad mm4, 12 // dwPosition + Pitch, dwPosition
|
|
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
|
|
|
|
movd esi, mm4 // dwPosition
|
|
punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
|
|
movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
|
|
// Instead for byte codes
|
|
// mov si, WORD PTR [eax+esi]
|
|
// movd mm6, esi
|
|
// punpcklbw mm5, mm6
|
|
// psarw mm5, 8
|
|
movd esi, mm4
|
|
movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
|
|
// Instead for byte codes
|
|
// mov si, WORD PTR [eax+esi]
|
|
// movd mm6, esi
|
|
// punpcklbw mm4, mm6
|
|
// psarw mm4, 8
|
|
// This code could be combined with code above, a bit.
|
|
|
|
punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
|
|
movq mm4, mm2
|
|
pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
|
|
packssdw mm4, mm0
|
|
movq mm6, mm3
|
|
psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
|
|
punpcklwd mm6, mm4
|
|
paddd mm2, mm1 // Next iteration
|
|
pmaddwd mm6, mm5
|
|
#if 1 // {
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
|
|
#if 1 // {
|
|
// eax, ebx, ecx, edx, esi are used. edi is free...
|
|
push eax
|
|
push ecx
|
|
push edx
|
|
|
|
movq QWORD PTR dMM6, mm6
|
|
|
|
mov eax, DWORD PTR dMM6
|
|
imul DWORD PTR cfK // edx:eax
|
|
|
|
mov ecx, eax
|
|
mov eax, DWORD PTR l_lPrevPrevSample
|
|
|
|
mov edi, edx // esi:ecx
|
|
imul DWORD PTR cfB2
|
|
|
|
sub ecx, eax
|
|
mov eax, DWORD PTR l_lPrevSample
|
|
|
|
sbb edi, edx
|
|
mov DWORD PTR l_lPrevPrevSample, eax
|
|
|
|
imul DWORD PTR cfB1
|
|
|
|
add eax, ecx
|
|
adc edx, edi
|
|
|
|
//>>>>> MOD:PETCHEY
|
|
// shld eax, edx, 2
|
|
//>>>>> should be
|
|
shld edx, eax, 2
|
|
mov eax, edx
|
|
|
|
mov DWORD PTR dMM6, eax
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
|
|
// 2nd sample
|
|
|
|
mov eax, DWORD PTR dMM6+4
|
|
imul DWORD PTR cfK // edx:eax
|
|
|
|
mov ecx, eax
|
|
mov eax, DWORD PTR l_lPrevPrevSample
|
|
|
|
mov edi, edx // esi:ecx
|
|
imul DWORD PTR cfB2
|
|
|
|
sub ecx, eax
|
|
mov eax, DWORD PTR l_lPrevSample
|
|
|
|
sbb edi, edx
|
|
mov DWORD PTR l_lPrevPrevSample, eax
|
|
|
|
imul DWORD PTR cfB1
|
|
|
|
add eax, ecx
|
|
adc edx, edi
|
|
|
|
//>>>>> MOD:PETCHEY
|
|
// shld eax, edx, 2
|
|
//>>>>> should be
|
|
shld edx, eax, 2
|
|
mov eax, edx
|
|
|
|
mov DWORD PTR dMM6+4, eax
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
|
|
movq mm6, QWORD PTR dMM6
|
|
|
|
pop edx
|
|
pop ecx
|
|
pop eax
|
|
#endif // }
|
|
|
|
#define DO_32BIT_MULTIPLY
|
|
#ifndef DO_32BIT_MULTIPLY
|
|
movq mm5, QWORD PTR vfVolume // Volume2, Volume1
|
|
// pand mm5, QWORD PTR ffffMask // 16 bits only.
|
|
#endif
|
|
|
|
// pand mm6, QWORD PTR ffffMask
|
|
|
|
#ifndef DO_32BIT_MULTIPLY
|
|
movq mm4, mm5
|
|
#endif
|
|
mov esi, DWORD PTR [ecx+4]
|
|
|
|
#ifndef DO_32BIT_MULTIPLY
|
|
punpckldq mm4, mm4
|
|
#endif
|
|
|
|
#ifdef DO_32BIT_MULTIPLY
|
|
mov edi, DWORD PTR vfVolume
|
|
imul edi, DWORD PTR dMM6
|
|
sar edi, 13
|
|
mov DWORD PTR dMM4, edi
|
|
|
|
mov edi, DWORD PTR vfVolume
|
|
imul edi, DWORD PTR dMM6+4
|
|
sar edi, 13
|
|
mov DWORD PTR dMM4+4, edi
|
|
|
|
movq mm4, QWORD PTR dMM4
|
|
#else
|
|
pmaddwd mm4, mm6
|
|
psrad mm4, 13
|
|
#endif
|
|
|
|
packssdw mm4, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm4
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
// CHANNEL 2
|
|
|
|
|
|
#ifndef DO_32BIT_MULTIPLY
|
|
punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
|
|
#endif
|
|
mov esi, DWORD PTR [ecx+8]
|
|
|
|
#ifdef DO_32BIT_MULTIPLY
|
|
mov edi, DWORD PTR vfVolume+4
|
|
imul edi, DWORD PTR dMM6
|
|
sar edi, 13
|
|
mov DWORD PTR dMM5, edi
|
|
|
|
mov edi, DWORD PTR vfVolume+4
|
|
imul edi, DWORD PTR dMM6+4
|
|
sar edi, 13
|
|
mov DWORD PTR dMM5+4, edi
|
|
|
|
movq mm5, QWORD PTR dMM5
|
|
#else
|
|
pmaddwd mm5, mm6
|
|
psrad mm5, 13
|
|
#endif
|
|
packssdw mm5, mm0
|
|
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
paddsw mm7, mm5
|
|
movd DWORD PTR [esi+ebx*2], mm7
|
|
|
|
#else // }{ There is noise here, probably due to the signed nature of the multiply.
|
|
|
|
// NOTE the filter is NOT implemented here....
|
|
|
|
psrad mm6, 12 // lMIntrep2, lMInterp
|
|
movq mm5, QWORD PTR MmxVolume
|
|
packssdw mm6, mm0
|
|
punpckldq mm6, mm6
|
|
pmulhw mm6, mm5
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd mm7, DWORD PTR [esi+ebx*2]
|
|
mov esi, DWORD PTR [ecx+8]
|
|
movd mm4, DWORD PTR [esi+ebx*2]
|
|
punpckldq mm4, mm7
|
|
paddsw mm4, mm6
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
punpckhdq mm4, mm4
|
|
mov esi, DWORD PTR [ecx+4]
|
|
movd DWORD PTR [esi+ebx*2], mm4
|
|
|
|
#endif // }
|
|
|
|
add ebx, 2
|
|
|
|
cmp ebx, edx
|
|
jb TwoAtATime
|
|
|
|
movd DWORD PTR pfSamplePos, mm2
|
|
#endif // }
|
|
|
|
$L43865:
|
|
|
|
; dwPosition = pfSamplePos >> 12;
|
|
; dwFract = pfSamplePos & 0xFFF;
|
|
; pfSamplePos += pfPitch;
|
|
; lA = (long) pcWave[dwPosition];
|
|
; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
mov esi, DWORD PTR pfPitch
|
|
mov edx, DWORD PTR pfSamplePos
|
|
|
|
mov eax, DWORD PTR pcWave
|
|
mov edi, edx
|
|
|
|
add esi, edx
|
|
and edi, 4095
|
|
|
|
sar edx, 12
|
|
mov DWORD PTR pfSamplePos, esi
|
|
|
|
movsx esi, WORD PTR [eax+edx*2]
|
|
movsx eax, WORD PTR [eax+edx*2+2]
|
|
|
|
sub eax, esi
|
|
|
|
imul eax, edi
|
|
|
|
sar eax, 12
|
|
mov edi, One_Channel_2
|
|
|
|
// ebx, ecx, edx are used in switch branches
|
|
add eax, esi // lMInterp
|
|
|
|
#if 1
|
|
// lMInterp =
|
|
// MulDiv(lMInterp, cfK, (1 << 30))
|
|
// - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30))
|
|
// + MulDiv(m_lPrevSample, cfB1, (1 << 30))
|
|
|
|
push ecx
|
|
imul DWORD PTR cfK // edx:eax
|
|
|
|
mov ecx, eax
|
|
mov eax, DWORD PTR l_lPrevPrevSample
|
|
|
|
mov esi, edx // esi:ecx
|
|
imul DWORD PTR cfB2
|
|
|
|
sub ecx, eax
|
|
mov eax, DWORD PTR l_lPrevSample
|
|
|
|
sbb esi, edx
|
|
mov DWORD PTR l_lPrevPrevSample, eax
|
|
|
|
imul DWORD PTR cfB1
|
|
|
|
add eax, ecx
|
|
// adc esi, edx
|
|
adc edx, esi
|
|
|
|
pop ecx
|
|
// shrd eax, edx, 30
|
|
// mov esi,0x40000000
|
|
// idiv esi
|
|
|
|
//>>>>> MOD:PETCHEY
|
|
// shld eax, edx, 2
|
|
//>>>>> should be
|
|
shld edx, eax, 2
|
|
mov eax, edx
|
|
#endif
|
|
|
|
//>>>>>>>>>>>> removed dp
|
|
#if 0
|
|
// if (lMInterp < -32767) lMInterp = -32767;
|
|
// else if (lMInterp > 32767) lMInterp = 32767;
|
|
|
|
cmp eax, -32767
|
|
jl Less_than
|
|
cmp eax, 32767
|
|
jg Greater_than
|
|
#endif
|
|
|
|
// m_lPrevPrevSample = m_lPrevSample;
|
|
// m_lPrevSample = lMInterp;
|
|
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
jmp edi
|
|
|
|
//>>>>>>>>>>>> removed dp
|
|
#if 0
|
|
Less_than:
|
|
mov eax, -32767
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
jmp edi
|
|
|
|
Greater_than:
|
|
mov eax, 32767
|
|
mov DWORD PTR l_lPrevSample, eax
|
|
jmp edi
|
|
#endif
|
|
|
|
// ONE_CHANNEL
|
|
// lM = lMInterp * vfVolume[dwJ - 1];
|
|
// lM >>= 13;
|
|
// ppBuffer[dwJ - 1][dwI] += (short) lM;
|
|
|
|
$L44009:
|
|
|
|
; 342 : default:
|
|
; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
|
|
// ecx ppBuffer
|
|
// eax lMInterp
|
|
// edi counter
|
|
// ebx dwI
|
|
|
|
$L43874:
|
|
mov edx, DWORD PTR vfVolume[edi*4-4]
|
|
mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
|
|
|
|
imul edx, eax
|
|
sar edx, 13
|
|
add WORD PTR [esi+ebx*2], dx
|
|
|
|
jno no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x7fff
|
|
js no_overflow
|
|
mov WORD PTR [esi+ebx*2], 0x8000
|
|
|
|
no_overflow:
|
|
dec edi
|
|
cmp edi, 8
|
|
jne SHORT $L43874
|
|
|
|
lea edi, $L43876
|
|
}
|
|
|
|
#define ONE_CHANNEL_VOLUME(dwJ) \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
|
|
_asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 13 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
|
|
//-------------------------------------------------------------------------
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
// This lovely hack makes sure that all the instructions
|
|
// are the same length for the case (dwJ - 1) == 0. Code depends on this
|
|
// by calculating instruction offsets based on having 8 identical blocks.
|
|
//
|
|
// ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
|
|
//
|
|
//-------------------------------------------------------------------------
|
|
|
|
#define ONE_CHANNEL_VOLUME_1 \
|
|
_asm { lea edx, vfVolume } \
|
|
_asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
|
|
_asm { mov esi, DWORD PTR [ecx + 4] } \
|
|
_asm { imul edx, eax } \
|
|
_asm { sar edx, 13 } \
|
|
_asm { add edi, [esp] } \
|
|
\
|
|
_asm { add WORD PTR [esi+ebx*2], dx } \
|
|
_asm { jo FAR overflow_x }
|
|
|
|
$L43876:
|
|
ONE_CHANNEL_VOLUME(8);
|
|
$L43880:
|
|
ONE_CHANNEL_VOLUME(7);
|
|
ONE_CHANNEL_VOLUME(6);
|
|
ONE_CHANNEL_VOLUME(5);
|
|
ONE_CHANNEL_VOLUME(4);
|
|
ONE_CHANNEL_VOLUME(3);
|
|
ONE_CHANNEL_VOLUME(2);
|
|
ONE_CHANNEL_VOLUME_1;
|
|
#undef ONE_CHANNEL_VOLUME
|
|
#undef ONE_CHANNEL_VOLUME_1
|
|
$L43866:
|
|
_asm {
|
|
mov eax, DWORD PTR a
|
|
inc ebx
|
|
|
|
cmp ebx, eax
|
|
jb $L43865
|
|
|
|
mov edi, DWORD PTR l_nChannels
|
|
$L43867:
|
|
cmp ebx, DWORD PTR dwLength
|
|
jb $L44021
|
|
Exit_$L43841:
|
|
pop eax
|
|
mov DWORD PTR dwI, ebx
|
|
|
|
#ifdef USE_MMX_FILTERED
|
|
mov edi, UseMmx
|
|
cmp edi, UseMmxLabel
|
|
jne NoMmxCleanupLabel
|
|
|
|
emms
|
|
|
|
NoMmxCleanupLabel:
|
|
#endif
|
|
}
|
|
|
|
m_lPrevPrevSample = l_lPrevPrevSample;
|
|
m_lPrevSample = l_lPrevSample;
|
|
#else // }{
|
|
for (dwI = 0; dwI < dwLength;)
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
|
|
cfK += cfdK;
|
|
cfB1 += cfdB1;
|
|
cfB2 += cfdB2;
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
// Filter
|
|
//
|
|
// z = k*s - b1*z1 - b2*b2
|
|
// We store the negative of b1 in the table, so we flip the sign again by
|
|
// adding here
|
|
//
|
|
lMInterp =
|
|
MulDiv(lMInterp, cfK, (1 << 30))
|
|
+ MulDiv(m_lPrevSample, cfB1, (1 << 30))
|
|
- MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
|
|
|
|
//>>>>>>>>>>>> removed dp
|
|
#if 0
|
|
if (lMInterp < -32767) lMInterp = -32767;
|
|
else if (lMInterp > 32767) lMInterp = 32767;
|
|
#endif
|
|
m_lPrevPrevSample = m_lPrevSample;
|
|
m_lPrevSample = lMInterp;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 13; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
#endif // }
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
m_cfLastK = cfK;
|
|
m_cfLastB1 = cfB1;
|
|
m_cfLastB2 = cfB2;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
return (dwI);
|
|
}
|
|
|
|
#else // }{ all assembly code
|
|
DWORD CDigitalAudio::MixMulti8(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lMInterp;
|
|
long lM;
|
|
long lA;//, lB;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
char * pcWave = (char *) m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
for (dwI = 0; dwI < dwLength; )
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
lMInterp = pcWave[dwPosition]; // pcWave
|
|
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 5;
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
#ifdef i386
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
return (dwI);
|
|
}
|
|
|
|
DWORD CDigitalAudio::MixMulti8Filter(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength,
|
|
COEFF cfdK,
|
|
COEFF cfdB1,
|
|
COEFF cfdB2)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lMInterp;
|
|
long lM;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
char * pcWave = (char *) m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
COEFF cfK = m_cfLastK;
|
|
COEFF cfB1 = m_cfLastB1;
|
|
COEFF cfB2 = m_cfLastB2;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
DWORD dMM6[2];
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
for (dwI = 0; dwI < dwLength; )
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
|
|
cfK += cfdK;
|
|
cfB1 += cfdB1;
|
|
cfB2 += cfdB2;
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lMInterp = pcWave[dwPosition]; // pcWave
|
|
lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
|
|
|
|
// Filter
|
|
//
|
|
lMInterp =
|
|
MulDiv(lMInterp, cfK, (1 << 30))
|
|
- MulDiv(m_lPrevSample, cfB1, (1 << 30))
|
|
+ MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
|
|
|
|
m_lPrevPrevSample = m_lPrevSample;
|
|
m_lPrevSample = lMInterp;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 5;
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
#ifdef i386
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
return (dwI);
|
|
}
|
|
|
|
DWORD CDigitalAudio::MixMulti16(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength)
|
|
{
|
|
DWORD dwI = 0;
|
|
DWORD dwJ = 0;
|
|
DWORD dwPosition = 0;
|
|
long lA = 0;//, lB;
|
|
long lM = 0;
|
|
long lMInterp = 0;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
short * pcWave = m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
for (dwI = 0; dwI < dwLength;)
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 13; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
#ifdef i386
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
return (dwI);
|
|
}
|
|
|
|
DWORD CDigitalAudio::MixMulti16Filter(
|
|
short *ppBuffer[],
|
|
DWORD dwBufferCount,
|
|
DWORD dwLength,
|
|
DWORD dwDeltaPeriod,
|
|
VFRACT vfDeltaVolume[],
|
|
VFRACT vfLastVolume[],
|
|
PFRACT pfDeltaPitch,
|
|
PFRACT pfSampleLength,
|
|
PFRACT pfLoopLength,
|
|
COEFF cfdK,
|
|
COEFF cfdB1,
|
|
COEFF cfdB2)
|
|
{
|
|
DWORD dwI, dwJ;
|
|
DWORD dwPosition;
|
|
long lA;//, lB;
|
|
long lM;
|
|
long lMInterp;
|
|
DWORD dwIncDelta = dwDeltaPeriod;
|
|
VFRACT dwFract;
|
|
short * pcWave = m_pnWave;
|
|
PFRACT pfSamplePos = m_pfLastSample;
|
|
PFRACT pfPitch = m_pfLastPitch;
|
|
PFRACT pfPFract = pfPitch << 8;
|
|
COEFF cfK = m_cfLastK;
|
|
COEFF cfB1 = m_cfLastB1;
|
|
COEFF cfB2 = m_cfLastB2;
|
|
DWORD dMM6[2]; // Handle filter...
|
|
|
|
VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
|
|
VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
|
|
|
|
for (dwI = 0; dwI < dwBufferCount; dwI++)
|
|
{
|
|
vfVolume[dwI] = vfLastVolume[dwI];
|
|
vfVFract[dwI] = vfVolume[dwI] << 8;
|
|
}
|
|
|
|
for (dwI = 0; dwI < dwLength;)
|
|
{
|
|
if (pfSamplePos >= pfSampleLength)
|
|
{
|
|
if (pfLoopLength)
|
|
pfSamplePos -= pfLoopLength;
|
|
else
|
|
break;
|
|
}
|
|
dwIncDelta--;
|
|
if (!dwIncDelta)
|
|
{
|
|
dwIncDelta = dwDeltaPeriod;
|
|
pfPFract += pfDeltaPitch;
|
|
pfPitch = pfPFract >> 8;
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfVFract[dwJ] += vfDeltaVolume[dwJ];
|
|
vfVolume[dwJ] = vfVFract[dwJ] >> 8;
|
|
}
|
|
|
|
cfK += cfdK;
|
|
cfB1 += cfdB1;
|
|
cfB2 += cfdB2;
|
|
}
|
|
|
|
dwPosition = pfSamplePos >> 12;
|
|
dwFract = pfSamplePos & 0xFFF;
|
|
pfSamplePos += pfPitch;
|
|
|
|
lA = (long) pcWave[dwPosition];
|
|
lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
|
|
|
|
// Filter
|
|
//
|
|
// z = k*s - b1*z1 - b2*b2
|
|
// We store the negative of b1 in the table, so we flip the sign again by
|
|
// adding here
|
|
//
|
|
lMInterp =
|
|
MulDiv(lMInterp, cfK, (1 << 30))
|
|
+ MulDiv(m_lPrevSample, cfB1, (1 << 30))
|
|
- MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
|
|
|
|
//>>>>>>>>>>>> removed dp
|
|
#if 0
|
|
if (lMInterp < -32767) lMInterp = -32767;
|
|
else if (lMInterp > 32767) lMInterp = 32767;
|
|
#endif
|
|
m_lPrevPrevSample = m_lPrevSample;
|
|
m_lPrevSample = lMInterp;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
lM = lMInterp * vfVolume[dwJ];
|
|
lM >>= 13; // Signal bumps up to 12 bits.
|
|
|
|
// Keep this around so we can use it to generate new assembly code (see below...)
|
|
#if 1
|
|
{
|
|
long x = ppBuffer[dwJ][dwI];
|
|
|
|
x += lM;
|
|
|
|
if (x != (short)x) {
|
|
if (x > 32767) x = 32767;
|
|
else x = -32768;
|
|
}
|
|
|
|
ppBuffer[dwJ][dwI] = (short)x;
|
|
}
|
|
#else
|
|
ppBuffer[dwJ][dwI] += (short) lM;
|
|
#ifdef i386
|
|
_asm{jno no_oflow}
|
|
ppBuffer[dwJ][dwI] = 0x7fff;
|
|
_asm{js no_oflow}
|
|
ppBuffer[dwJ][dwI] = (short) 0x8000;
|
|
no_oflow: ;
|
|
#endif
|
|
#endif
|
|
}
|
|
dwI++;
|
|
}
|
|
|
|
m_pfLastPitch = pfPitch;
|
|
m_pfLastSample = pfSamplePos;
|
|
|
|
m_cfLastK = cfK;
|
|
m_cfLastB1 = cfB1;
|
|
m_cfLastB2 = cfB2;
|
|
|
|
for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
|
|
{
|
|
vfLastVolume[dwJ] = vfVolume[dwJ];
|
|
}
|
|
|
|
return (dwI);
|
|
}
|
|
|
|
#endif // }
|