Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3999 lines
88 KiB

  1. // Mix.cpp
  2. // Copyright (c) Microsoft Corporation 1996, 1998
  3. // Mix engines for MSSynth
  4. #ifdef DMSYNTH_MINIPORT
  5. #include "common.h"
  6. #define STR_MODULENAME "DMusicMix:"
  7. #else
  8. #include "simple.h"
  9. #include <mmsystem.h>
  10. #include "synth.h"
  11. #endif
  12. ///////////////////////////////////////////////////////
  13. // Modifications
  14. // member m_nChannels => parameter dwBufferCount
  15. //
  16. // Changed number of arguments into Filtered mixers
  17. //
  18. // Remove range checking after filter
  19. #pragma warning(disable : 4101 4102 4146)
  20. #ifdef _ALPHA_
  21. extern "C" {
  22. int __ADAWI(short, short *);
  23. };
  24. #pragma intrinsic(__ADAWI)
  25. #define ALPHA_OVERFLOW 2
  26. #define ALPHA_NEGATIVE 8
  27. #else // !_ALPHA_
  28. // TODO -- overflow detection for ia64 (+ axp64?)
  29. #endif // !_ALPHA_
  30. #ifdef DMSYNTH_MINIPORT
  31. #pragma code_seg("PAGE")
  32. #endif // DMSYNTH_MINIPORT
  33. #define USE_MMX
  34. #define USE_MMX_FILTERED
  35. #ifdef i386 // {
  36. DWORD CDigitalAudio::MixMulti8(
  37. short *ppBuffer[],
  38. DWORD dwBufferCount,
  39. DWORD dwLength,
  40. DWORD dwDeltaPeriod,
  41. VFRACT vfDeltaVolume[],
  42. VFRACT vfLastVolume[],
  43. PFRACT pfDeltaPitch,
  44. PFRACT pfSampleLength,
  45. PFRACT pfLoopLength)
  46. {
  47. DWORD dwI, dwJ;
  48. DWORD dwPosition;
  49. long lMInterp;
  50. long lM;
  51. long lA;//, lB;
  52. DWORD dwIncDelta = dwDeltaPeriod;
  53. VFRACT dwFract;
  54. char * pcWave = (char *) m_pnWave;
  55. PFRACT pfSamplePos = m_pfLastSample;
  56. PFRACT pfPitch = m_pfLastPitch;
  57. PFRACT pfPFract = pfPitch << 8;
  58. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  59. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  60. for (dwI = 0; dwI < dwBufferCount; dwI++)
  61. {
  62. vfVolume[dwI] = vfLastVolume[dwI];
  63. vfVFract[dwI] = vfVolume[dwI] << 8;
  64. }
  65. #if 1 // {
  66. DWORD l_nChannels = dwBufferCount;
  67. #if 1 // {
  68. DWORD a;
  69. DWORD One_Channel_1, One_Channel_2; // Code address locations.
  70. #ifdef USE_MMX // {
  71. typedef __int64 QWORD;
  72. QWORD OneMask = 0x0000000010001000;
  73. QWORD fffMask = 0x00000fff00000fff;
  74. QWORD ffffMask = 0x0000ffff0000ffff;
  75. DWORD UseMmx;
  76. DWORD MmxVolume[2];
  77. int Use_MMX = m_sfMMXEnabled;
  78. _asm {
  79. lea edi, $L43865
  80. // Turned off
  81. cmp Use_MMX, 0
  82. je AssignMmxLabel
  83. // != 2 channels
  84. mov esi, DWORD PTR l_nChannels
  85. cmp esi, 2
  86. jne AssignMmxLabel
  87. // Ok, init and use MMX
  88. lea edi, UseMmxLabel
  89. pxor mm0, mm0
  90. movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
  91. AssignMmxLabel:
  92. mov DWORD PTR UseMmx, edi
  93. }
  94. #endif // }
  95. _asm {
  96. mov edi, DWORD PTR l_nChannels
  97. cmp edi, 8
  98. jna Start1
  99. lea esi, $L44008
  100. jmp Do_One_Channel_2
  101. // Put this code more than 127 bytes away from the references.
  102. overflow_x:
  103. js overflow_y
  104. mov WORD PTR [esi+ebx*2], 0x8000
  105. jmp edi
  106. overflow_y:
  107. mov WORD PTR [esi+ebx*2], 0x7fff
  108. jmp edi
  109. Start1:
  110. test edi, edi
  111. jne Start2
  112. lea esi, $L43860
  113. jmp Do_One_Channel_2
  114. Start2:
  115. lea eax, $L43851
  116. lea edx, $L43853
  117. sub edx, eax
  118. mov esi, 8
  119. sub esi, edi
  120. imul esi, edx
  121. add esi, eax
  122. Do_One_Channel_2:
  123. mov DWORD PTR One_Channel_1, esi
  124. // Create second jump table location.
  125. lea esi, $L43876
  126. lea ecx, $L43880
  127. sub ecx, esi
  128. push ecx // Span between branches.
  129. mov eax, 8
  130. sub eax, DWORD PTR l_nChannels
  131. jge Start3
  132. lea ecx, $L44009
  133. jmp Done_Do_Channel_2
  134. Start3:
  135. cmp eax, 8
  136. jne Start4
  137. lea ecx, $L43866
  138. jmp Done_Do_Channel_2
  139. Start4:
  140. imul ecx, eax
  141. add ecx, esi
  142. Done_Do_Channel_2:
  143. mov DWORD PTR One_Channel_2, ecx
  144. mov ecx, DWORD PTR dwLength
  145. xor ebx, ebx // dwI
  146. test ecx, ecx
  147. jbe Exit_$L43841
  148. mov ecx, DWORD PTR ppBuffer
  149. sub ecx, 4
  150. // ecx == ppBuffer
  151. // ebx == dwI
  152. // edi == l_nChannels
  153. $L44021:
  154. mov edx, DWORD PTR pfSamplePos
  155. cmp edx, DWORD PTR pfSampleLength
  156. jl SHORT $L43842
  157. mov eax, DWORD PTR pfLoopLength
  158. test eax, eax
  159. je Exit_$L43841
  160. sub edx, eax
  161. mov DWORD PTR pfSamplePos, edx
  162. $L43842:
  163. mov edx, DWORD PTR dwIncDelta
  164. mov eax, DWORD PTR pfPFract
  165. dec edx
  166. mov DWORD PTR dwIncDelta, edx
  167. jne $L43860
  168. mov edx, DWORD PTR dwDeltaPeriod
  169. mov esi, DWORD PTR pfDeltaPitch
  170. mov DWORD PTR dwIncDelta, edx
  171. add eax, esi
  172. mov DWORD PTR pfPFract, eax
  173. sar eax, 8
  174. mov DWORD PTR pfPitch, eax
  175. mov esi, DWORD PTR vfDeltaVolume
  176. jmp One_Channel_1
  177. // ONE_CHANNEL
  178. // vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
  179. // vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
  180. $L44008:
  181. mov DWORD PTR dwI, ebx
  182. lea ebx, DWORD PTR [edi*4-4]
  183. add edi, -8 ; fffffff8H
  184. $L43849:
  185. lea eax, DWORD PTR vfVFract[ebx]
  186. mov ecx, DWORD PTR [esi+ebx]
  187. sub ebx, 4
  188. add DWORD PTR [eax], ecx
  189. mov eax, DWORD PTR [eax]
  190. sar eax, 8
  191. mov DWORD PTR vfVolume[ebx+4], eax
  192. dec edi
  193. jne SHORT $L43849
  194. mov edi, DWORD PTR l_nChannels
  195. mov ecx, DWORD PTR ppBuffer
  196. mov ebx, DWORD PTR dwI
  197. sub ecx, 4
  198. }
  199. #define ONE_CHANNEL_VOLUME(dwJ) \
  200. _asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
  201. _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
  202. _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
  203. _asm { sar eax, 8 }; \
  204. _asm { lea edx, vfVolume }; \
  205. _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
  206. //-------------------------------------------------------------------------
  207. //
  208. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  209. //
  210. // This lovely hack makes sure that all the instructions
  211. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  212. // by calculating instruction offsets based on having 8 identical blocks.
  213. //
  214. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  215. //
  216. //-------------------------------------------------------------------------
  217. #define ONE_CHANNEL_VOLUME_1 \
  218. _asm { mov eax, DWORD PTR vfVFract[0] }; \
  219. _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
  220. _asm { mov DWORD PTR vfVFract[0], eax }; \
  221. _asm { sar eax, 8 }; \
  222. _asm { lea edx, vfVolume }; \
  223. _asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
  224. $L43851:
  225. ONE_CHANNEL_VOLUME(8)
  226. $L43853:
  227. ONE_CHANNEL_VOLUME(7);
  228. ONE_CHANNEL_VOLUME(6);
  229. ONE_CHANNEL_VOLUME(5);
  230. ONE_CHANNEL_VOLUME(4);
  231. ONE_CHANNEL_VOLUME(3);
  232. ONE_CHANNEL_VOLUME(2);
  233. ONE_CHANNEL_VOLUME_1;
  234. #undef ONE_CHANNEL_VOLUME
  235. #undef ONE_CHANNEL_VOLUME_1
  236. $L43860:
  237. _asm {
  238. ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
  239. mov esi, DWORD PTR pfPitch
  240. mov eax, DWORD PTR pfSampleLength
  241. dec esi
  242. sub eax, DWORD PTR pfSamplePos
  243. add eax, esi
  244. cdq
  245. idiv DWORD PTR pfPitch
  246. mov edx, DWORD PTR dwLength
  247. sub edx, ebx
  248. cmp edx, eax
  249. jae SHORT $L43863
  250. mov eax, edx
  251. $L43863:
  252. mov edx, DWORD PTR dwIncDelta
  253. cmp edx, eax
  254. jae SHORT $L43864
  255. mov eax, edx
  256. $L43864:
  257. ; 309 :
  258. ; 310 : for (a += dwI; dwI < a; dwI++)
  259. inc edx
  260. sub edx, eax
  261. add eax, ebx
  262. mov DWORD PTR dwIncDelta, edx
  263. cmp ebx, eax
  264. mov DWORD PTR a, eax
  265. jae $L43867
  266. #ifdef USE_MMX // {
  267. // Try to handle two positions at once.
  268. lea edx, [eax-3]
  269. cmp ebx, edx
  270. jge $L43865
  271. jmp UseMmx
  272. UseMmxLabel:
  273. // Ok, there are at least two samples to handle.
  274. movd mm1, DWORD PTR pfPitch
  275. psllq mm1, 32 // Pitch, 0
  276. movd mm2, DWORD PTR pfSamplePos
  277. punpckldq mm2, mm2 // SamplePos, SamplePos
  278. paddd mm2, mm1 // SamplePos + Pitch, SamplePos
  279. punpckhdq mm1, mm1 // Pitch, Pitch
  280. pslld mm1, 1 // Pitch * 2, Pitch * 2
  281. mov eax, DWORD PTR pcWave
  282. #if 0
  283. movq mm4, QWORD PTR vfVolume
  284. pand mm4, QWORD PTR ffffMask
  285. movq mm5, mm4
  286. pslld mm4, 16
  287. por mm4, mm5
  288. psllw mm4, 3
  289. movq QWORD PTR MmxVolume, mm4
  290. #endif
  291. TwoAtATime:
  292. ; dwPosition = pfSamplePos >> 12;
  293. ; dwFract = pfSamplePos & 0xFFF;
  294. ; pfSamplePos += pfPitch;
  295. movq mm4, mm2
  296. psrad mm4, 12 // dwPosition + Pitch, dwPosition
  297. ; lA = (long) pcWave[dwPosition];
  298. ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
  299. movd esi, mm4 // dwPosition
  300. punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
  301. // movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
  302. // Instead for byte codes
  303. mov si, WORD PTR [eax+esi]
  304. movd mm6, esi
  305. punpcklbw mm5, mm6
  306. psraw mm5, 8
  307. movd esi, mm4
  308. // movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
  309. // Instead for byte codes
  310. mov si, WORD PTR [eax+esi]
  311. movd mm6, esi
  312. punpcklbw mm4, mm6
  313. psraw mm4, 8
  314. // This code could be combined with code above, a bit.
  315. punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
  316. movq mm4, mm2
  317. pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
  318. packssdw mm4, mm0
  319. movq mm6, mm3
  320. psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
  321. punpcklwd mm6, mm4
  322. paddd mm2, mm1 // Next iteration
  323. pmaddwd mm6, mm5
  324. #if 1
  325. movq mm5, QWORD PTR vfVolume // Volume2, Volume1
  326. psrad mm6, 12 // lMIntrep2, lMInterp
  327. // pand mm6, QWORD PTR ffffMask
  328. // pand mm5, QWORD PTR ffffMask // 16 bits only.
  329. movq mm4, mm5
  330. mov esi, DWORD PTR [ecx+4]
  331. punpckldq mm4, mm4
  332. pmaddwd mm4, mm6
  333. psrad mm4, 5
  334. packssdw mm4, mm0
  335. movd mm7, DWORD PTR [esi+ebx*2]
  336. paddsw mm7, mm4
  337. movd DWORD PTR [esi+ebx*2], mm7
  338. // CHANNEL 2
  339. punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
  340. mov esi, DWORD PTR [ecx+8]
  341. pmaddwd mm5, mm6
  342. psrad mm5, 5
  343. packssdw mm5, mm0
  344. movd mm7, DWORD PTR [esi+ebx*2]
  345. paddsw mm7, mm5
  346. movd DWORD PTR [esi+ebx*2], mm7
  347. #else // There is noise here, probably due to the signed nature of the multiply.
  348. psrad mm6, 12 // lMIntrep2, lMInterp
  349. movq mm5, QWORD PTR MmxVolume
  350. packssdw mm6, mm0
  351. punpckldq mm6, mm6
  352. pmulhw mm6, mm5
  353. mov esi, DWORD PTR [ecx+4]
  354. movd mm7, DWORD PTR [esi+ebx*2]
  355. mov esi, DWORD PTR [ecx+8]
  356. movd mm4, DWORD PTR [esi+ebx*2]
  357. punpckldq mm4, mm7
  358. paddsw mm4, mm6
  359. movd DWORD PTR [esi+ebx*2], mm4
  360. punpckhdq mm4, mm4
  361. mov esi, DWORD PTR [ecx+4]
  362. movd DWORD PTR [esi+ebx*2], mm4
  363. #endif
  364. add ebx, 2
  365. cmp ebx, edx
  366. jb TwoAtATime
  367. movd DWORD PTR pfSamplePos, mm2
  368. #endif // }
  369. $L43865:
  370. ; dwPosition = pfSamplePos >> 12;
  371. ; dwFract = pfSamplePos & 0xFFF;
  372. ; pfSamplePos += pfPitch;
  373. ; lA = (long) pcWave[dwPosition];
  374. ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  375. mov esi, DWORD PTR pfPitch
  376. mov edx, DWORD PTR pfSamplePos
  377. mov eax, DWORD PTR pcWave
  378. mov edi, edx
  379. add esi, edx
  380. and edi, 4095
  381. sar edx, 12
  382. mov DWORD PTR pfSamplePos, esi
  383. movsx esi, BYTE PTR [eax+edx]
  384. movsx eax, BYTE PTR [eax+edx+1]
  385. sub eax, esi
  386. imul eax, edi
  387. sar eax, 12
  388. mov edi, One_Channel_2
  389. // ebx, ecx, edx are used in switch branches
  390. add eax, esi // lMInterp
  391. jmp edi
  392. // ONE_CHANNEL
  393. // lM = lMInterp * vfVolume[dwJ - 1];
  394. // lM >>= 5;
  395. // ppBuffer[dwJ - 1][dwI] += (short) lM;
  396. $L44009:
  397. ; 342 : default:
  398. ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  399. mov edi, DWORD PTR l_nChannels
  400. // ecx ppBuffer
  401. // eax lMInterp
  402. // edi counter
  403. // ebx dwI
  404. $L43874:
  405. mov edx, DWORD PTR vfVolume[edi*4-4]
  406. mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
  407. imul edx, eax
  408. sar edx, 5
  409. add WORD PTR [esi+ebx*2], dx
  410. jno no_overflow
  411. mov WORD PTR [esi+ebx*2], 0x7fff
  412. js no_overflow
  413. mov WORD PTR [esi+ebx*2], 0x8000
  414. no_overflow:
  415. dec edi
  416. cmp edi, 8
  417. jne SHORT $L43874
  418. lea edi, $L43876
  419. }
  420. #define ONE_CHANNEL_VOLUME(dwJ) \
  421. _asm { lea edx, vfVolume } \
  422. _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
  423. _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
  424. _asm { imul edx, eax } \
  425. _asm { sar edx, 5 } \
  426. _asm { add edi, [esp] } \
  427. \
  428. _asm { add WORD PTR [esi+ebx*2], dx } \
  429. _asm { jo FAR overflow_x }
  430. //-------------------------------------------------------------------------
  431. //
  432. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  433. //
  434. // This lovely hack makes sure that all the instructions
  435. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  436. // by calculating instruction offsets based on having 8 identical blocks.
  437. //
  438. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  439. //
  440. //-------------------------------------------------------------------------
  441. #define ONE_CHANNEL_VOLUME_1 \
  442. _asm { lea edx, vfVolume } \
  443. _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
  444. _asm { mov esi, DWORD PTR [ecx + 4] } \
  445. _asm { imul edx, eax } \
  446. _asm { sar edx, 5 } \
  447. _asm { add edi, [esp] } \
  448. \
  449. _asm { add WORD PTR [esi+ebx*2], dx } \
  450. _asm { jo FAR overflow_x }
  451. $L43876:
  452. ONE_CHANNEL_VOLUME(8);
  453. $L43880:
  454. ONE_CHANNEL_VOLUME(7);
  455. ONE_CHANNEL_VOLUME(6);
  456. ONE_CHANNEL_VOLUME(5);
  457. ONE_CHANNEL_VOLUME(4);
  458. ONE_CHANNEL_VOLUME(3);
  459. ONE_CHANNEL_VOLUME(2);
  460. ONE_CHANNEL_VOLUME_1;
  461. #undef ONE_CHANNEL_VOLUME
  462. #undef ONE_CHANNEL_VOLUME_1
  463. $L43866:
  464. _asm {
  465. mov eax, DWORD PTR a
  466. inc ebx
  467. cmp ebx, eax
  468. jb $L43865
  469. mov edi, DWORD PTR l_nChannels
  470. $L43867:
  471. cmp ebx, DWORD PTR dwLength
  472. jb $L44021
  473. Exit_$L43841:
  474. pop eax
  475. mov DWORD PTR dwI, ebx
  476. #ifdef USE_MMX
  477. mov edi, UseMmx
  478. cmp edi, UseMmxLabel
  479. jne NoMmxCleanupLabel
  480. emms
  481. NoMmxCleanupLabel:
  482. #endif
  483. }
  484. #else // }{
  485. for (dwI = 0; dwI < dwLength;)
  486. {
  487. if (pfSamplePos >= pfSampleLength)
  488. {
  489. if (pfLoopLength)
  490. pfSamplePos -= pfLoopLength;
  491. else
  492. break;
  493. }
  494. dwIncDelta--;
  495. if (!dwIncDelta)
  496. {
  497. dwIncDelta = dwDeltaPeriod;
  498. pfPFract += pfDeltaPitch;
  499. pfPitch = pfPFract >> 8;
  500. #if 1
  501. #define ONE_CHANNEL_VOLUME(dwJ) \
  502. vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1]; \
  503. vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
  504. switch (l_nChannels)
  505. {
  506. default:
  507. for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  508. {
  509. ONE_CHANNEL_VOLUME(dwJ);
  510. }
  511. case 8: ONE_CHANNEL_VOLUME(8);
  512. case 7: ONE_CHANNEL_VOLUME(7);
  513. case 6: ONE_CHANNEL_VOLUME(6);
  514. case 5: ONE_CHANNEL_VOLUME(5);
  515. case 4: ONE_CHANNEL_VOLUME(4);
  516. case 3: ONE_CHANNEL_VOLUME(3);
  517. case 2: ONE_CHANNEL_VOLUME(2);
  518. case 1: ONE_CHANNEL_VOLUME(1);
  519. case 0:;
  520. }
  521. #undef ONE_CHANNEL_VOLUME
  522. #else
  523. for (dwJ = 0; dwJ < l_nChannels; dwJ++)
  524. {
  525. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  526. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  527. }
  528. #endif
  529. }
  530. #if 1 // {
  531. DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
  532. DWORD b = dwLength - dwI;
  533. if (b < a) a = b;
  534. if (dwIncDelta < a) a = dwIncDelta;
  535. dwIncDelta -= a - 1;
  536. a += dwI;
  537. for (; dwI < a; dwI++)
  538. {
  539. dwPosition = pfSamplePos >> 12;
  540. dwFract = pfSamplePos & 0xFFF;
  541. pfSamplePos += pfPitch;
  542. lA = (long) pcWave[dwPosition];
  543. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  544. #if 1 // {
  545. #if 1
  546. #define ONE_CHANNEL_VOLUME(dwJ) \
  547. { \
  548. lM = lMInterp * vfVolume[dwJ - 1]; \
  549. lM >>= 5; \
  550. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  551. long b = ppBuffer[dwJ - 1][dwI]; \
  552. if ((short)b != b) { \
  553. if ((long)b < 0) b = 0x8000; \
  554. else b = 0x7fff; \
  555. ppBuffer[dwJ - 1][dwI] = (short) b; \
  556. } \
  557. }
  558. #else
  559. #define ONE_CHANNEL_VOLUME(dwJ) \
  560. { \
  561. lM = lMInterp * vfVolume[dwJ - 1]; \
  562. lM >>= 5; \
  563. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  564. }
  565. #endif
  566. switch (l_nChannels)
  567. {
  568. default:
  569. for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  570. {
  571. ONE_CHANNEL_VOLUME(dwJ);
  572. }
  573. case 8: ONE_CHANNEL_VOLUME(8);
  574. case 7: ONE_CHANNEL_VOLUME(7);
  575. case 6: ONE_CHANNEL_VOLUME(6);
  576. case 5: ONE_CHANNEL_VOLUME(5);
  577. case 4: ONE_CHANNEL_VOLUME(4);
  578. case 3: ONE_CHANNEL_VOLUME(3);
  579. case 2: ONE_CHANNEL_VOLUME(2);
  580. case 1: ONE_CHANNEL_VOLUME(1);
  581. case 0:;
  582. }
  583. #undef ONE_CHANNEL_VOLUME
  584. #else // }{
  585. for (dwJ = 0; dwJ < l_nChannels; dwJ++)
  586. {
  587. lM = lMInterp * vfVolume[dwJ];
  588. lM >>= 5; // Signal bumps up to 12 bits.
  589. // Keep this around so we can use it to generate new assembly code (see below...)
  590. #if 1
  591. {
  592. long x = ppBuffer[dwJ][dwI];
  593. x += lM;
  594. if (x != (short)x) {
  595. if (x > 32767) x = 32767;
  596. else x = -32768;
  597. }
  598. ppBuffer[dwJ][dwI] = (short)x;
  599. }
  600. #else
  601. ppBuffer[dwJ][dwI] += (short) lM;
  602. _asm{jno no_oflow}
  603. ppBuffer[dwJ][dwI] = 0x7fff;
  604. _asm{js no_oflow}
  605. ppBuffer[dwJ][dwI] = (short) 0x8000;
  606. no_oflow: ;
  607. #endif
  608. }
  609. #endif // }
  610. }
  611. #else // }{
  612. dwPosition = pfSamplePos >> 12;
  613. dwFract = pfSamplePos & 0xFFF;
  614. pfSamplePos += pfPitch;
  615. lA = (long) pcWave[dwPosition];
  616. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  617. #if 1
  618. #if 1
  619. #define ONE_CHANNEL_VOLUME(dwJ) \
  620. { \
  621. lM = lMInterp * vfVolume[dwJ - 1]; \
  622. lM >>= 5; \
  623. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  624. long b = ppBuffer[dwJ - 1][dwI]; \
  625. if ((short)b != b) { \
  626. if ((long)b < 0) b = 0x8000; \
  627. else b = 0x7fff; \
  628. ppBuffer[dwJ - 1][dwI] = (short) b; \
  629. } \
  630. }
  631. #else
  632. #define ONE_CHANNEL_VOLUME(dwJ) \
  633. { \
  634. lM = lMInterp * vfVolume[dwJ - 1]; \
  635. lM >>= 5; \
  636. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  637. }
  638. #endif
  639. switch (l_nChannels)
  640. {
  641. default:
  642. for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  643. {
  644. ONE_CHANNEL_VOLUME(dwJ);
  645. }
  646. case 8: ONE_CHANNEL_VOLUME(8);
  647. case 7: ONE_CHANNEL_VOLUME(7);
  648. case 6: ONE_CHANNEL_VOLUME(6);
  649. case 5: ONE_CHANNEL_VOLUME(5);
  650. case 4: ONE_CHANNEL_VOLUME(4);
  651. case 3: ONE_CHANNEL_VOLUME(3);
  652. case 2: ONE_CHANNEL_VOLUME(2);
  653. case 1: ONE_CHANNEL_VOLUME(1);
  654. case 0:;
  655. }
  656. #undef ONE_CHANNEL_VOLUME
  657. #else
  658. for (dwJ = 0; dwJ < l_nChannels; dwJ++)
  659. {
  660. lM = lMInterp * vfVolume[dwJ];
  661. lM >>= 5; // Signal bumps up to 12 bits.
  662. // Keep this around so we can use it to generate new assembly code (see below...)
  663. #if 1
  664. {
  665. long x = ppBuffer[dwJ][dwI];
  666. x += lM;
  667. if (x != (short)x) {
  668. if (x > 32767) x = 32767;
  669. else x = -32768;
  670. }
  671. ppBuffer[dwJ][dwI] = (short)x;
  672. }
  673. #else
  674. ppBuffer[dwJ][dwI] += (short) lM;
  675. _asm{jno no_oflow}
  676. ppBuffer[dwJ][dwI] = 0x7fff;
  677. _asm{js no_oflow}
  678. ppBuffer[dwJ][dwI] = (short) 0x8000;
  679. no_oflow: ;
  680. #endif
  681. }
  682. #endif
  683. dwI++;
  684. #endif // }
  685. }
  686. #endif // }
  687. #else // }{
  688. for (dwI = 0; dwI < dwLength; )
  689. {
  690. if (pfSamplePos >= pfSampleLength)
  691. {
  692. if (pfLoopLength)
  693. pfSamplePos -= pfLoopLength;
  694. else
  695. break;
  696. }
  697. dwIncDelta--;
  698. if (!dwIncDelta)
  699. {
  700. dwIncDelta = dwDeltaPeriod;
  701. pfPFract += pfDeltaPitch;
  702. pfPitch = pfPFract >> 8;
  703. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  704. {
  705. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  706. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  707. }
  708. }
  709. dwPosition = pfSamplePos >> 12;
  710. dwFract = pfSamplePos & 0xFFF;
  711. pfSamplePos += pfPitch;
  712. lMInterp = pcWave[dwPosition]; // pcWave
  713. lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
  714. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  715. {
  716. lM = lMInterp * vfVolume[dwJ];
  717. lM >>= 5;
  718. // Keep this around so we can use it to generate new assembly code (see below...)
  719. #if 1
  720. {
  721. long x = ppBuffer[dwJ][dwI];
  722. x += lM;
  723. if (x != (short)x) {
  724. if (x > 32767) x = 32767;
  725. else x = -32768;
  726. }
  727. ppBuffer[dwJ][dwI] = (short)x;
  728. }
  729. #else
  730. ppBuffer[dwJ][dwI] += (short) lM;
  731. _asm{jno no_oflow}
  732. ppBuffer[dwJ][dwI] = 0x7fff;
  733. _asm{js no_oflow}
  734. ppBuffer[dwJ][dwI] = (short) 0x8000;
  735. no_oflow: ;
  736. #endif
  737. }
  738. dwI++;
  739. }
  740. #endif // }
  741. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  742. {
  743. vfLastVolume[dwJ] = vfVolume[dwJ];
  744. }
  745. m_pfLastPitch = pfPitch;
  746. m_pfLastSample = pfSamplePos;
  747. return (dwI);
  748. }
  749. DWORD CDigitalAudio::MixMulti8Filter(
  750. short *ppBuffer[],
  751. DWORD dwBufferCount,
  752. DWORD dwLength,
  753. DWORD dwDeltaPeriod,
  754. VFRACT vfDeltaVolume[],
  755. VFRACT vfLastVolume[],
  756. PFRACT pfDeltaPitch,
  757. PFRACT pfSampleLength,
  758. PFRACT pfLoopLength,
  759. COEFF cfdK,
  760. COEFF cfdB1,
  761. COEFF cfdB2)
  762. {
  763. DWORD dwI, dwJ;
  764. DWORD dwPosition;
  765. long lMInterp;
  766. long lM;
  767. DWORD dwIncDelta = dwDeltaPeriod;
  768. VFRACT dwFract;
  769. char * pcWave = (char *) m_pnWave;
  770. PFRACT pfSamplePos = m_pfLastSample;
  771. PFRACT pfPitch = m_pfLastPitch;
  772. PFRACT pfPFract = pfPitch << 8;
  773. COEFF cfK = m_cfLastK;
  774. COEFF cfB1 = m_cfLastB1;
  775. COEFF cfB2 = m_cfLastB2;
  776. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  777. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  778. DWORD dMM6[2];
  779. for (dwI = 0; dwI < dwBufferCount; dwI++)
  780. {
  781. vfVolume[dwI] = vfLastVolume[dwI];
  782. vfVFract[dwI] = vfVolume[dwI] << 8;
  783. }
  784. #if 1 // {
  785. DWORD l_nChannels = dwBufferCount;
  786. DWORD a;
  787. DWORD One_Channel_1, One_Channel_2; // Code address locations.
  788. long l_lPrevPrevSample = m_lPrevPrevSample, l_lPrevSample = m_lPrevSample;
  789. #ifdef USE_MMX_FILTERED // {
  790. typedef __int64 QWORD;
  791. QWORD OneMask = 0x0000000010001000;
  792. QWORD fffMask = 0x00000fff00000fff;
  793. QWORD ffffMask = 0x0000ffff0000ffff;
  794. DWORD UseMmx;
  795. DWORD MmxVolume[2];
  796. int Use_MMX = m_sfMMXEnabled;
  797. _asm {
  798. lea edi, $L43865
  799. // Turned off
  800. cmp Use_MMX, 0
  801. je AssignMmxLabel
  802. // != 2 channels
  803. mov esi, DWORD PTR l_nChannels
  804. cmp esi, 2
  805. jne AssignMmxLabel
  806. // Ok, init and use MMX
  807. lea edi, UseMmxLabel
  808. pxor mm0, mm0
  809. movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
  810. AssignMmxLabel:
  811. mov DWORD PTR UseMmx, edi
  812. }
  813. #endif // }
  814. _asm {
  815. mov edi, DWORD PTR l_nChannels
  816. cmp edi, 8
  817. jna Start1
  818. lea esi, $L44008
  819. jmp Do_One_Channel_2
  820. // Put this code more than 127 bytes away from the references.
  821. overflow_x:
  822. js overflow_y
  823. mov WORD PTR [esi+ebx*2], 0x8000
  824. jmp edi
  825. overflow_y:
  826. mov WORD PTR [esi+ebx*2], 0x7fff
  827. jmp edi
  828. Start1:
  829. test edi, edi
  830. jne Start2
  831. lea esi, $L43860
  832. jmp Do_One_Channel_2
  833. Start2:
  834. lea eax, $L43851
  835. lea edx, $L43853
  836. sub edx, eax
  837. mov esi, 8
  838. sub esi, edi
  839. imul esi, edx
  840. add esi, eax
  841. Do_One_Channel_2:
  842. mov DWORD PTR One_Channel_1, esi
  843. // Create second jump table location.
  844. lea esi, $L43876
  845. lea ecx, $L43880
  846. sub ecx, esi
  847. push ecx // Span between branches.
  848. mov eax, 8
  849. sub eax, DWORD PTR l_nChannels
  850. jge Start3
  851. lea ecx, $L44009
  852. jmp Done_Do_Channel_2
  853. Start3:
  854. cmp eax, 8
  855. jne Start4
  856. lea ecx, $L43866
  857. jmp Done_Do_Channel_2
  858. Start4:
  859. imul ecx, eax
  860. add ecx, esi
  861. Done_Do_Channel_2:
  862. mov DWORD PTR One_Channel_2, ecx
  863. mov ecx, DWORD PTR dwLength
  864. xor ebx, ebx // dwI
  865. test ecx, ecx
  866. jbe Exit_$L43841
  867. mov ecx, DWORD PTR ppBuffer
  868. sub ecx, 4
  869. // ecx == ppBuffer
  870. // ebx == dwI
  871. // edi == l_nChannels
  872. $L44021:
  873. mov edx, DWORD PTR pfSamplePos
  874. cmp edx, DWORD PTR pfSampleLength
  875. jl SHORT $L43842
  876. mov eax, DWORD PTR pfLoopLength
  877. test eax, eax
  878. je Exit_$L43841
  879. sub edx, eax
  880. mov DWORD PTR pfSamplePos, edx
  881. $L43842:
  882. mov edx, DWORD PTR dwIncDelta
  883. mov eax, DWORD PTR pfPFract
  884. dec edx
  885. mov DWORD PTR dwIncDelta, edx
  886. jne $L43860
  887. mov edx, DWORD PTR dwDeltaPeriod
  888. mov esi, DWORD PTR pfDeltaPitch
  889. mov DWORD PTR dwIncDelta, edx
  890. add eax, esi
  891. mov DWORD PTR pfPFract, eax
  892. sar eax, 8
  893. mov DWORD PTR pfPitch, eax
  894. mov esi, DWORD PTR vfDeltaVolume
  895. jmp One_Channel_1
  896. // ONE_CHANNEL
  897. // vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
  898. // vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
  899. $L44008:
  900. mov DWORD PTR dwI, ebx
  901. lea ebx, DWORD PTR [edi*4-4]
  902. add edi, -8 ; fffffff8H
  903. $L43849:
  904. lea eax, DWORD PTR vfVFract[ebx]
  905. mov ecx, DWORD PTR [esi+ebx]
  906. sub ebx, 4
  907. add DWORD PTR [eax], ecx
  908. mov eax, DWORD PTR [eax]
  909. sar eax, 8
  910. mov DWORD PTR vfVolume[ebx+4], eax
  911. dec edi
  912. jne SHORT $L43849
  913. mov edi, DWORD PTR l_nChannels
  914. mov ecx, DWORD PTR ppBuffer
  915. mov ebx, DWORD PTR dwI
  916. sub ecx, 4
  917. }
  918. #define ONE_CHANNEL_VOLUME(dwJ) \
  919. _asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
  920. _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
  921. _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
  922. _asm { sar eax, 8 }; \
  923. _asm { lea edx, vfVolume }; \
  924. _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
  925. //-------------------------------------------------------------------------
  926. //
  927. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  928. //
  929. // This lovely hack makes sure that all the instructions
  930. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  931. // by calculating instruction offsets based on having 8 identical blocks.
  932. //
  933. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  934. //
  935. //-------------------------------------------------------------------------
  936. #define ONE_CHANNEL_VOLUME_1 \
  937. _asm { mov eax, DWORD PTR vfVFract[0] }; \
  938. _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
  939. _asm { mov DWORD PTR vfVFract[0], eax }; \
  940. _asm { sar eax, 8 }; \
  941. _asm { lea edx, vfVolume }; \
  942. _asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
  943. $L43851:
  944. ONE_CHANNEL_VOLUME(8)
  945. $L43853:
  946. ONE_CHANNEL_VOLUME(7);
  947. ONE_CHANNEL_VOLUME(6);
  948. ONE_CHANNEL_VOLUME(5);
  949. ONE_CHANNEL_VOLUME(4);
  950. ONE_CHANNEL_VOLUME(3);
  951. ONE_CHANNEL_VOLUME(2);
  952. ONE_CHANNEL_VOLUME_1;
  953. #undef ONE_CHANNEL_VOLUME
  954. #undef ONE_CHANNEL_VOLUME_1
  955. _asm {
  956. // cfK += cfdK;
  957. // cfB1 += cfdB1;
  958. // cfB2 += cfdB2;
  959. mov eax, DWORD PTR cfdK
  960. mov edx, DWORD PTR cfdB1
  961. mov esi, DWORD PTR cfdB2
  962. add DWORD PTR cfK, eax
  963. add DWORD PTR cfB1, edx
  964. add DWORD PTR cfB2, esi
  965. $L43860:
  966. ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
  967. mov esi, DWORD PTR pfPitch
  968. mov eax, DWORD PTR pfSampleLength
  969. dec esi
  970. sub eax, DWORD PTR pfSamplePos
  971. add eax, esi
  972. cdq
  973. idiv DWORD PTR pfPitch
  974. mov edx, DWORD PTR dwLength
  975. sub edx, ebx
  976. cmp edx, eax
  977. jae SHORT $L43863
  978. mov eax, edx
  979. $L43863:
  980. mov edx, DWORD PTR dwIncDelta
  981. cmp edx, eax
  982. jae SHORT $L43864
  983. mov eax, edx
  984. $L43864:
  985. ; 309 :
  986. ; 310 : for (a += dwI; dwI < a; dwI++)
  987. inc edx
  988. sub edx, eax
  989. add eax, ebx
  990. mov DWORD PTR dwIncDelta, edx
  991. cmp ebx, eax
  992. mov DWORD PTR a, eax
  993. jae $L43867
  994. #ifdef USE_MMX_FILTERED // {
  995. // Try to handle two positions at once.
  996. lea edx, [eax-3]
  997. cmp ebx, edx
  998. jge $L43865
  999. jmp UseMmx
  1000. UseMmxLabel:
  1001. // Ok, there are at least two samples to handle.
  1002. movd mm1, DWORD PTR pfPitch
  1003. psllq mm1, 32 // Pitch, 0
  1004. movd mm2, DWORD PTR pfSamplePos
  1005. punpckldq mm2, mm2 // SamplePos, SamplePos
  1006. paddd mm2, mm1 // SamplePos + Pitch, SamplePos
  1007. punpckhdq mm1, mm1 // Pitch, Pitch
  1008. pslld mm1, 1 // Pitch * 2, Pitch * 2
  1009. mov eax, DWORD PTR pcWave
  1010. #if 0
  1011. movq mm4, QWORD PTR vfVolume
  1012. pand mm4, QWORD PTR ffffMask
  1013. movq mm5, mm4
  1014. pslld mm4, 16
  1015. por mm4, mm5
  1016. psllw mm4, 3
  1017. movq QWORD PTR MmxVolume, mm4
  1018. #endif
  1019. TwoAtATime:
  1020. ; dwPosition = pfSamplePos >> 12;
  1021. ; dwFract = pfSamplePos & 0xFFF;
  1022. ; pfSamplePos += pfPitch;
  1023. movq mm4, mm2
  1024. psrad mm4, 12 // dwPosition + Pitch, dwPosition
  1025. ; lA = (long) pcWave[dwPosition];
  1026. ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
  1027. movd esi, mm4 // dwPosition
  1028. punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
  1029. // movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
  1030. // Instead for byte codes
  1031. mov si, WORD PTR [eax+esi]
  1032. movd mm6, esi
  1033. punpcklbw mm5, mm6
  1034. psraw mm5, 8
  1035. movd esi, mm4
  1036. // movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
  1037. // Instead for byte codes
  1038. mov si, WORD PTR [eax+esi]
  1039. movd mm6, esi
  1040. punpcklbw mm4, mm6
  1041. psraw mm4, 8
  1042. // This code could be combined with code above, a bit.
  1043. punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
  1044. movq mm4, mm2
  1045. pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
  1046. packssdw mm4, mm0
  1047. movq mm6, mm3
  1048. psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
  1049. punpcklwd mm6, mm4
  1050. paddd mm2, mm1 // Next iteration
  1051. pmaddwd mm6, mm5
  1052. #if 1
  1053. psrad mm6, 12 // lMIntrep2, lMInterp
  1054. #if 1
  1055. // eax, ebx, ecx, edx, esi are used. edi is free...
  1056. push eax
  1057. push ecx
  1058. push edx
  1059. movq QWORD PTR dMM6, mm6
  1060. mov eax, DWORD PTR dMM6
  1061. imul DWORD PTR cfK // edx:eax
  1062. mov ecx, eax
  1063. mov eax, DWORD PTR l_lPrevPrevSample
  1064. mov edi, edx // esi:ecx
  1065. imul DWORD PTR cfB2
  1066. sub ecx, eax
  1067. mov eax, DWORD PTR l_lPrevSample
  1068. sbb edi, edx
  1069. mov DWORD PTR l_lPrevPrevSample, eax
  1070. imul DWORD PTR cfB1
  1071. add eax, ecx
  1072. adc edx, edi
  1073. //>>>>> MOD:PETCHEY
  1074. // shld eax, edx, 2
  1075. //>>>>> should be
  1076. shld edx, eax, 2
  1077. mov eax, edx
  1078. mov DWORD PTR dMM6, eax
  1079. mov DWORD PTR l_lPrevSample, eax
  1080. // 2nd sample
  1081. mov eax, DWORD PTR dMM6+4
  1082. imul DWORD PTR cfK // edx:eax
  1083. mov ecx, eax
  1084. mov eax, DWORD PTR l_lPrevPrevSample
  1085. mov edi, edx // esi:ecx
  1086. imul DWORD PTR cfB2
  1087. sub ecx, eax
  1088. mov eax, DWORD PTR l_lPrevSample
  1089. sbb edi, edx
  1090. mov DWORD PTR l_lPrevPrevSample, eax
  1091. imul DWORD PTR cfB1
  1092. add eax, ecx
  1093. adc edx, edi
  1094. //>>>>> MOD:PETCHEY
  1095. // shld eax, edx, 2
  1096. //>>>>> should be
  1097. shld edx, eax, 2
  1098. mov eax, edx
  1099. mov DWORD PTR dMM6+4, eax
  1100. mov DWORD PTR l_lPrevSample, eax
  1101. movq mm6, QWORD PTR dMM6
  1102. pop edx
  1103. pop ecx
  1104. pop eax
  1105. #endif
  1106. movq mm5, QWORD PTR vfVolume // Volume2, Volume1
  1107. // pand mm6, QWORD PTR ffffMask
  1108. // packssdw mm6, mm0 // Saturate to 16 bits, instead.
  1109. // punpcklwd mm6, mm0
  1110. // pand mm5, QWORD PTR ffffMask // 16 bits only.
  1111. movq mm4, mm5
  1112. mov esi, DWORD PTR [ecx+4]
  1113. punpckldq mm4, mm4
  1114. pmaddwd mm4, mm6
  1115. psrad mm4, 5
  1116. packssdw mm4, mm0
  1117. movd mm7, DWORD PTR [esi+ebx*2]
  1118. paddsw mm7, mm4
  1119. movd DWORD PTR [esi+ebx*2], mm7
  1120. // CHANNEL 2
  1121. punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
  1122. mov esi, DWORD PTR [ecx+8]
  1123. pmaddwd mm5, mm6
  1124. psrad mm5, 5
  1125. packssdw mm5, mm0
  1126. movd mm7, DWORD PTR [esi+ebx*2]
  1127. paddsw mm7, mm5
  1128. movd DWORD PTR [esi+ebx*2], mm7
  1129. #else // There is noise here, probably due to the signed nature of the multiply.
  1130. psrad mm6, 12 // lMIntrep2, lMInterp
  1131. movq mm5, QWORD PTR MmxVolume
  1132. packssdw mm6, mm0
  1133. punpckldq mm6, mm6
  1134. pmulhw mm6, mm5
  1135. mov esi, DWORD PTR [ecx+4]
  1136. movd mm7, DWORD PTR [esi+ebx*2]
  1137. mov esi, DWORD PTR [ecx+8]
  1138. movd mm4, DWORD PTR [esi+ebx*2]
  1139. punpckldq mm4, mm7
  1140. paddsw mm4, mm6
  1141. movd DWORD PTR [esi+ebx*2], mm4
  1142. punpckhdq mm4, mm4
  1143. mov esi, DWORD PTR [ecx+4]
  1144. movd DWORD PTR [esi+ebx*2], mm4
  1145. #endif
  1146. add ebx, 2
  1147. cmp ebx, edx
  1148. jb TwoAtATime
  1149. movd DWORD PTR pfSamplePos, mm2
  1150. #endif // }
  1151. $L43865:
  1152. ; dwPosition = pfSamplePos >> 12;
  1153. ; dwFract = pfSamplePos & 0xFFF;
  1154. ; pfSamplePos += pfPitch;
  1155. ; lA = (long) pcWave[dwPosition];
  1156. ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  1157. mov esi, DWORD PTR pfPitch
  1158. mov edx, DWORD PTR pfSamplePos
  1159. mov eax, DWORD PTR pcWave
  1160. mov edi, edx
  1161. add esi, edx
  1162. and edi, 4095
  1163. sar edx, 12
  1164. mov DWORD PTR pfSamplePos, esi
  1165. movsx esi, BYTE PTR [eax+edx]
  1166. movsx eax, BYTE PTR [eax+edx+1]
  1167. sub eax, esi
  1168. imul eax, edi
  1169. sar eax, 12
  1170. mov edi, One_Channel_2
  1171. // ebx, ecx, edx are used in switch branches
  1172. add eax, esi // lMInterp
  1173. // lMInterp =
  1174. // MulDiv(lMInterp, cfK, (1 << 30))
  1175. // - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30))
  1176. // + MulDiv(m_lPrevSample, cfB1, (1 << 30))
  1177. push ecx
  1178. imul DWORD PTR cfK // edx:eax
  1179. mov ecx, eax
  1180. mov eax, DWORD PTR l_lPrevPrevSample
  1181. mov esi, edx // esi:ecx
  1182. imul DWORD PTR cfB2
  1183. sub ecx, eax
  1184. mov eax, DWORD PTR l_lPrevSample
  1185. sbb esi, edx
  1186. mov DWORD PTR l_lPrevPrevSample, eax
  1187. imul DWORD PTR cfB1
  1188. add eax, ecx // esi:eax
  1189. adc esi, edx
  1190. pop ecx
  1191. // shrd eax, esi, 30
  1192. //>>>>> MOD:PETCHEY
  1193. // shld eax, esi, 2
  1194. //>>>>> should be
  1195. shld esi, eax, 2
  1196. mov eax, esi
  1197. //>>>>>>>>>>>> removed dp
  1198. #if 0
  1199. // if (lMInterp < -32767) lMInterp = -32767;
  1200. // else if (lMInterp > 32767) lMInterp = 32767;
  1201. cmp eax, -32767
  1202. jl Less_than
  1203. cmp eax, 32767
  1204. jg Greater_than
  1205. #endif
  1206. // m_lPrevPrevSample = m_lPrevSample;
  1207. // m_lPrevSample = lMInterp;
  1208. mov DWORD PTR l_lPrevSample, eax
  1209. jmp edi
  1210. Less_than:
  1211. mov eax, -32767
  1212. mov DWORD PTR l_lPrevSample, eax
  1213. jmp edi
  1214. Greater_than:
  1215. mov eax, 32767
  1216. mov DWORD PTR l_lPrevSample, eax
  1217. jmp edi
  1218. // ONE_CHANNEL
  1219. // lM = lMInterp * vfVolume[dwJ - 1];
  1220. // lM >>= 5;
  1221. // ppBuffer[dwJ - 1][dwI] += (short) lM;
  1222. $L44009:
  1223. ; 342 : default:
  1224. ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  1225. mov edi, DWORD PTR l_nChannels
  1226. // ecx ppBuffer
  1227. // eax lMInterp
  1228. // edi counter
  1229. // ebx dwI
  1230. $L43874:
  1231. mov edx, DWORD PTR vfVolume[edi*4-4]
  1232. mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
  1233. imul edx, eax
  1234. sar edx, 5
  1235. add WORD PTR [esi+ebx*2], dx
  1236. jno no_overflow
  1237. mov WORD PTR [esi+ebx*2], 0x7fff
  1238. js no_overflow
  1239. mov WORD PTR [esi+ebx*2], 0x8000
  1240. no_overflow:
  1241. dec edi
  1242. cmp edi, 8
  1243. jne SHORT $L43874
  1244. lea edi, $L43876
  1245. }
  1246. #define ONE_CHANNEL_VOLUME(dwJ) \
  1247. _asm { lea edx, vfVolume } \
  1248. _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
  1249. _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
  1250. _asm { imul edx, eax } \
  1251. _asm { sar edx, 5 } \
  1252. _asm { add edi, [esp] } \
  1253. \
  1254. _asm { add WORD PTR [esi+ebx*2], dx } \
  1255. _asm { jo FAR overflow_x }
  1256. //-------------------------------------------------------------------------
  1257. //
  1258. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  1259. //
  1260. // This lovely hack makes sure that all the instructions
  1261. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  1262. // by calculating instruction offsets based on having 8 identical blocks.
  1263. //
  1264. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  1265. //
  1266. //-------------------------------------------------------------------------
  1267. #define ONE_CHANNEL_VOLUME_1 \
  1268. _asm { lea edx, vfVolume } \
  1269. _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
  1270. _asm { mov esi, DWORD PTR [ecx + 4] } \
  1271. _asm { imul edx, eax } \
  1272. _asm { sar edx, 5 } \
  1273. _asm { add edi, [esp] } \
  1274. \
  1275. _asm { add WORD PTR [esi+ebx*2], dx } \
  1276. _asm { jo FAR overflow_x }
  1277. $L43876:
  1278. ONE_CHANNEL_VOLUME(8);
  1279. $L43880:
  1280. ONE_CHANNEL_VOLUME(7);
  1281. ONE_CHANNEL_VOLUME(6);
  1282. ONE_CHANNEL_VOLUME(5);
  1283. ONE_CHANNEL_VOLUME(4);
  1284. ONE_CHANNEL_VOLUME(3);
  1285. ONE_CHANNEL_VOLUME(2);
  1286. ONE_CHANNEL_VOLUME_1;
  1287. #undef ONE_CHANNEL_VOLUME
  1288. #undef ONE_CHANNEL_VOLUME_1
  1289. $L43866:
  1290. _asm {
  1291. mov eax, DWORD PTR a
  1292. inc ebx
  1293. cmp ebx, eax
  1294. jb $L43865
  1295. mov edi, DWORD PTR l_nChannels
  1296. $L43867:
  1297. cmp ebx, DWORD PTR dwLength
  1298. jb $L44021
  1299. Exit_$L43841:
  1300. pop eax
  1301. mov DWORD PTR dwI, ebx
  1302. #ifdef USE_MMX_FILTERED
  1303. mov edi, UseMmx
  1304. cmp edi, UseMmxLabel
  1305. jne NoMmxCleanupLabel
  1306. emms
  1307. NoMmxCleanupLabel:
  1308. #endif
  1309. }
  1310. m_lPrevPrevSample = l_lPrevPrevSample;
  1311. m_lPrevSample = l_lPrevSample;
  1312. #else // }{
  1313. for (dwI = 0; dwI < dwLength; )
  1314. {
  1315. if (pfSamplePos >= pfSampleLength)
  1316. {
  1317. if (pfLoopLength)
  1318. pfSamplePos -= pfLoopLength;
  1319. else
  1320. break;
  1321. }
  1322. dwIncDelta--;
  1323. if (!dwIncDelta)
  1324. {
  1325. dwIncDelta = dwDeltaPeriod;
  1326. pfPFract += pfDeltaPitch;
  1327. pfPitch = pfPFract >> 8;
  1328. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  1329. {
  1330. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  1331. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  1332. }
  1333. cfK += cfdK;
  1334. cfB1 += cfdB1;
  1335. cfB2 += cfdB2;
  1336. }
  1337. dwPosition = pfSamplePos >> 12;
  1338. dwFract = pfSamplePos & 0xFFF;
  1339. pfSamplePos += pfPitch;
  1340. lMInterp = pcWave[dwPosition]; // pcWave
  1341. lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
  1342. // Filter
  1343. //
  1344. lMInterp =
  1345. MulDiv(lMInterp, cfK, (1 << 30))
  1346. - MulDiv(m_lPrevSample, cfB1, (1 << 30))
  1347. + MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
  1348. m_lPrevPrevSample = m_lPrevSample;
  1349. m_lPrevSample = lMInterp;
  1350. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  1351. {
  1352. lM = lMInterp * vfVolume[dwJ];
  1353. lM >>= 5;
  1354. // Keep this around so we can use it to generate new assembly code (see below...)
  1355. #if 1
  1356. {
  1357. long x = ppBuffer[dwJ][dwI];
  1358. x += lM;
  1359. if (x != (short)x) {
  1360. if (x > 32767) x = 32767;
  1361. else x = -32768;
  1362. }
  1363. ppBuffer[dwJ][dwI] = (short)x;
  1364. }
  1365. #else
  1366. ppBuffer[dwJ][dwI] += (short) lM;
  1367. _asm{jno no_oflow}
  1368. ppBuffer[dwJ][dwI] = 0x7fff;
  1369. _asm{js no_oflow}
  1370. ppBuffer[dwJ][dwI] = (short) 0x8000;
  1371. no_oflow: ;
  1372. #endif
  1373. }
  1374. dwI++;
  1375. }
  1376. #endif // }
  1377. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  1378. {
  1379. vfLastVolume[dwJ] = vfVolume[dwJ];
  1380. }
  1381. m_pfLastPitch = pfPitch;
  1382. m_pfLastSample = pfSamplePos;
  1383. return (dwI);
  1384. }
  1385. #if 0
  1386. DWORD CDigitalAudio::MixMulti16(
  1387. short *ppBuffer[],
  1388. DWORD dwBufferCount,
  1389. DWORD dwLength,
  1390. DWORD dwDeltaPeriod,
  1391. VFRACT vfDeltaVolume[],
  1392. VFRACT vfLastVolume[],
  1393. PFRACT pfDeltaPitch,
  1394. PFRACT pfSampleLength,
  1395. PFRACT pfLoopLength)
  1396. {
  1397. DWORD dwI, dwJ;
  1398. DWORD dwPosition;
  1399. long lA;//, lB;
  1400. long lM;
  1401. long lMInterp;
  1402. DWORD dwIncDelta = dwDeltaPeriod;
  1403. VFRACT dwFract;
  1404. short * pcWave = m_pnWave;
  1405. PFRACT pfSamplePos = m_pfLastSample;
  1406. PFRACT pfPitch = m_pfLastPitch;
  1407. PFRACT pfPFract = pfPitch << 8;
  1408. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  1409. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  1410. for (dwI = 0; dwI < dwBufferCount; dwI++)
  1411. {
  1412. vfVolume[dwI] = vfLastVolume[dwI];
  1413. vfVFract[dwI] = vfVolume[dwI] << 8;
  1414. }
  1415. for (dwI = 0; dwI < dwLength;)
  1416. {
  1417. if (pfSamplePos >= pfSampleLength)
  1418. {
  1419. if (pfLoopLength)
  1420. pfSamplePos -= pfLoopLength;
  1421. else
  1422. break;
  1423. }
  1424. dwIncDelta--;
  1425. if (!dwIncDelta)
  1426. {
  1427. dwIncDelta = dwDeltaPeriod;
  1428. pfPFract += pfDeltaPitch;
  1429. pfPitch = pfPFract >> 8;
  1430. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  1431. {
  1432. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  1433. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  1434. }
  1435. }
  1436. dwPosition = pfSamplePos >> 12;
  1437. dwFract = pfSamplePos & 0xFFF;
  1438. pfSamplePos += pfPitch;
  1439. lA = (long) pcWave[dwPosition];
  1440. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  1441. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  1442. {
  1443. lM = lMInterp * vfVolume[dwJ];
  1444. lM >>= 13; // Signal bumps up to 12 bits.
  1445. // Keep this around so we can use it to generate new assembly code (see below...)
  1446. #if 1
  1447. {
  1448. long x = ppBuffer[dwJ][dwI];
  1449. x += lM;
  1450. if (x != (short)x) {
  1451. if (x > 32767) x = 32767;
  1452. else x = -32768;
  1453. }
  1454. ppBuffer[dwJ][dwI] = (short)x;
  1455. }
  1456. #else
  1457. ppBuffer[dwJ][dwI] += (short) lM;
  1458. _asm{jno no_oflow}
  1459. ppBuffer[dwJ][dwI] = 0x7fff;
  1460. _asm{js no_oflow}
  1461. ppBuffer[dwJ][dwI] = (short) 0x8000;
  1462. #endif
  1463. no_oflow: ;
  1464. }
  1465. dwI++;
  1466. }
  1467. m_pfLastPitch = pfPitch;
  1468. m_pfLastSample = pfSamplePos;
  1469. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  1470. {
  1471. vfLastVolume[dwJ] = vfVolume[dwJ];
  1472. }
  1473. return (dwI);
  1474. }
  1475. #else
  1476. DWORD CDigitalAudio::MixMulti16(
  1477. short *ppBuffer[],
  1478. DWORD dwBufferCount,
  1479. DWORD dwLength,
  1480. DWORD dwDeltaPeriod,
  1481. VFRACT vfDeltaVolume[],
  1482. VFRACT vfLastVolume[],
  1483. PFRACT pfDeltaPitch,
  1484. PFRACT pfSampleLength,
  1485. PFRACT pfLoopLength)
  1486. {
  1487. DWORD dwI, dwJ;
  1488. DWORD dwPosition;
  1489. long lA;//, lB;
  1490. long lM;
  1491. long lMInterp;
  1492. DWORD dwIncDelta = dwDeltaPeriod;
  1493. VFRACT dwFract;
  1494. short * pcWave = m_pnWave;
  1495. PFRACT pfSamplePos = m_pfLastSample;
  1496. PFRACT pfPitch = m_pfLastPitch;
  1497. PFRACT pfPFract = pfPitch << 8;
  1498. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  1499. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  1500. for (dwI = 0; dwI < dwBufferCount; dwI++)
  1501. {
  1502. vfVolume[dwI] = vfLastVolume[dwI];
  1503. vfVFract[dwI] = vfVolume[dwI] << 8;
  1504. }
  1505. #if 1 // {
  1506. DWORD l_nChannels = dwBufferCount;
  1507. DWORD a;
  1508. DWORD One_Channel_1, One_Channel_2; // Code address locations.
  1509. #ifdef USE_MMX // {
  1510. typedef __int64 QWORD;
  1511. QWORD OneMask = 0x0000000010001000;
  1512. QWORD fffMask = 0x00000fff00000fff;
  1513. QWORD ffffMask = 0x0000ffff0000ffff;
  1514. DWORD UseMmx;
  1515. DWORD MmxVolume[2];
  1516. int Use_MMX = m_sfMMXEnabled;
  1517. _asm {
  1518. lea edi, $L43865
  1519. // Turned off
  1520. cmp Use_MMX, 0
  1521. je AssignMMXLabel
  1522. // != 2 channels
  1523. mov esi, DWORD PTR l_nChannels
  1524. cmp esi, 2
  1525. jne AssignMmxLabel
  1526. // Ok, init and use MMX
  1527. lea edi, UseMmxLabel
  1528. pxor mm0, mm0
  1529. movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
  1530. AssignMmxLabel:
  1531. mov DWORD PTR UseMmx, edi
  1532. }
  1533. #endif // }
  1534. _asm {
  1535. mov edi, DWORD PTR l_nChannels
  1536. cmp edi, 8
  1537. jna Start1
  1538. lea esi, $L44008
  1539. jmp Do_One_Channel_2
  1540. // Put this code more than 127 bytes away from the references.
  1541. overflow_x:
  1542. js overflow_y
  1543. mov WORD PTR [esi+ebx*2], 0x8000
  1544. jmp edi
  1545. overflow_y:
  1546. mov WORD PTR [esi+ebx*2], 0x7fff
  1547. jmp edi
  1548. Start1:
  1549. test edi, edi
  1550. jne Start2
  1551. lea esi, $L43860
  1552. jmp Do_One_Channel_2
  1553. Start2:
  1554. lea eax, $L43851
  1555. lea edx, $L43853
  1556. sub edx, eax
  1557. mov esi, 8
  1558. sub esi, edi
  1559. imul esi, edx
  1560. add esi, eax
  1561. Do_One_Channel_2:
  1562. mov DWORD PTR One_Channel_1, esi
  1563. // Create second jump table location.
  1564. lea esi, $L43876
  1565. lea ecx, $L43880
  1566. sub ecx, esi
  1567. push ecx // Span between branches.
  1568. mov eax, 8
  1569. sub eax, DWORD PTR l_nChannels
  1570. jge Start3
  1571. lea ecx, $L44009
  1572. jmp Done_Do_Channel_2
  1573. Start3:
  1574. cmp eax, 8
  1575. jne Start4
  1576. lea ecx, $L43866
  1577. jmp Done_Do_Channel_2
  1578. Start4:
  1579. imul ecx, eax
  1580. add ecx, esi
  1581. Done_Do_Channel_2:
  1582. mov DWORD PTR One_Channel_2, ecx
  1583. mov ecx, DWORD PTR dwLength
  1584. xor ebx, ebx // dwI
  1585. test ecx, ecx
  1586. jbe Exit_$L43841
  1587. mov ecx, DWORD PTR ppBuffer
  1588. sub ecx, 4
  1589. // ecx == ppBuffer
  1590. // ebx == dwI
  1591. // edi == l_nChannels
  1592. $L44021:
  1593. mov edx, DWORD PTR pfSamplePos
  1594. cmp edx, DWORD PTR pfSampleLength
  1595. jl SHORT $L43842
  1596. mov eax, DWORD PTR pfLoopLength
  1597. test eax, eax
  1598. je Exit_$L43841
  1599. sub edx, eax
  1600. mov DWORD PTR pfSamplePos, edx
  1601. $L43842:
  1602. mov edx, DWORD PTR dwIncDelta
  1603. mov eax, DWORD PTR pfPFract
  1604. dec edx
  1605. mov DWORD PTR dwIncDelta, edx
  1606. jne $L43860
  1607. mov edx, DWORD PTR dwDeltaPeriod
  1608. mov esi, DWORD PTR pfDeltaPitch
  1609. mov DWORD PTR dwIncDelta, edx
  1610. add eax, esi
  1611. mov DWORD PTR pfPFract, eax
  1612. sar eax, 8
  1613. mov DWORD PTR pfPitch, eax
  1614. mov esi, DWORD PTR vfDeltaVolume
  1615. jmp One_Channel_1
  1616. // ONE_CHANNEL
  1617. // vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
  1618. // vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
  1619. $L44008:
  1620. mov DWORD PTR dwI, ebx
  1621. lea ebx, DWORD PTR [edi*4-4]
  1622. add edi, -8 ; fffffff8H
  1623. $L43849:
  1624. lea eax, DWORD PTR vfVFract[ebx]
  1625. mov ecx, DWORD PTR [esi+ebx]
  1626. sub ebx, 4
  1627. add DWORD PTR [eax], ecx
  1628. mov eax, DWORD PTR [eax]
  1629. sar eax, 8
  1630. mov DWORD PTR vfVolume[ebx+4], eax
  1631. dec edi
  1632. jne SHORT $L43849
  1633. mov edi, DWORD PTR l_nChannels
  1634. mov ecx, DWORD PTR ppBuffer
  1635. mov ebx, DWORD PTR dwI
  1636. sub ecx, 4
  1637. }
  1638. #define ONE_CHANNEL_VOLUME(dwJ) \
  1639. _asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
  1640. _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
  1641. _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
  1642. _asm { sar eax, 8 }; \
  1643. _asm { lea edx, vfVolume }; \
  1644. _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
  1645. //-------------------------------------------------------------------------
  1646. //
  1647. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  1648. //
  1649. // This lovely hack makes sure that all the instructions
  1650. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  1651. // by calculating instruction offsets based on having 8 identical blocks.
  1652. //
  1653. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  1654. //
  1655. //-------------------------------------------------------------------------
  1656. #define ONE_CHANNEL_VOLUME_1 \
  1657. _asm { mov eax, DWORD PTR vfVFract[0] }; \
  1658. _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
  1659. _asm { mov DWORD PTR vfVFract[0], eax }; \
  1660. _asm { sar eax, 8 }; \
  1661. _asm { lea edx, vfVolume }; \
  1662. _asm { mov DWORD PTR [edx], eax };
  1663. $L43851:
  1664. ONE_CHANNEL_VOLUME(8)
  1665. $L43853:
  1666. ONE_CHANNEL_VOLUME(7);
  1667. ONE_CHANNEL_VOLUME(6);
  1668. ONE_CHANNEL_VOLUME(5);
  1669. ONE_CHANNEL_VOLUME(4);
  1670. ONE_CHANNEL_VOLUME(3);
  1671. ONE_CHANNEL_VOLUME(2);
  1672. ONE_CHANNEL_VOLUME_1;
  1673. #undef ONE_CHANNEL_VOLUME
  1674. #undef ONE_CHANNEL_VOLUME_1
  1675. $L43860:
  1676. _asm {
  1677. ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
  1678. mov esi, DWORD PTR pfPitch
  1679. mov eax, DWORD PTR pfSampleLength
  1680. dec esi
  1681. sub eax, DWORD PTR pfSamplePos
  1682. add eax, esi
  1683. cdq
  1684. idiv DWORD PTR pfPitch
  1685. mov edx, DWORD PTR dwLength
  1686. sub edx, ebx
  1687. cmp edx, eax
  1688. jae SHORT $L43863
  1689. mov eax, edx
  1690. $L43863:
  1691. mov edx, DWORD PTR dwIncDelta
  1692. cmp edx, eax
  1693. jae SHORT $L43864
  1694. mov eax, edx
  1695. $L43864:
  1696. ; 309 :
  1697. ; 310 : for (a += dwI; dwI < a; dwI++)
  1698. inc edx
  1699. sub edx, eax
  1700. add eax, ebx
  1701. mov DWORD PTR dwIncDelta, edx
  1702. cmp ebx, eax
  1703. mov DWORD PTR a, eax
  1704. jae $L43867
  1705. #ifdef USE_MMX // {
  1706. // Try to handle two positions at once.
  1707. lea edx, [eax-3]
  1708. cmp ebx, edx
  1709. jge $L43865
  1710. jmp UseMmx
  1711. UseMmxLabel:
  1712. // Ok, there are at least two samples to handle.
  1713. movd mm1, DWORD PTR pfPitch
  1714. psllq mm1, 32 // Pitch, 0
  1715. movd mm2, DWORD PTR pfSamplePos
  1716. punpckldq mm2, mm2 // SamplePos, SamplePos
  1717. paddd mm2, mm1 // SamplePos + Pitch, SamplePos
  1718. punpckhdq mm1, mm1 // Pitch, Pitch
  1719. pslld mm1, 1 // Pitch * 2, Pitch * 2
  1720. mov eax, DWORD PTR pcWave
  1721. #if 0
  1722. movq mm4, QWORD PTR vfVolume
  1723. pand mm4, QWORD PTR ffffMask
  1724. movq mm5, mm4
  1725. pslld mm4, 16
  1726. por mm4, mm5
  1727. psllw mm4, 3
  1728. movq QWORD PTR MmxVolume, mm4
  1729. #endif
  1730. TwoAtATime:
  1731. ; dwPosition = pfSamplePos >> 12;
  1732. ; dwFract = pfSamplePos & 0xFFF;
  1733. ; pfSamplePos += pfPitch;
  1734. movq mm4, mm2
  1735. psrad mm4, 12 // dwPosition + Pitch, dwPosition
  1736. ; lA = (long) pcWave[dwPosition];
  1737. ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
  1738. movd esi, mm4 // dwPosition
  1739. punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
  1740. movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
  1741. // Instead for byte codes
  1742. // mov si, WORD PTR [eax+esi]
  1743. // movd mm6, esi
  1744. // punpcklbw mm5, mm6
  1745. // psarw mm5, 8
  1746. movd esi, mm4
  1747. movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
  1748. // Instead for byte codes
  1749. // mov si, WORD PTR [eax+esi]
  1750. // movd mm6, esi
  1751. // punpcklbw mm4, mm6
  1752. // psarw mm4, 8
  1753. // This code could be combined with code above, a bit.
  1754. punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
  1755. movq mm4, mm2
  1756. pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
  1757. packssdw mm4, mm0
  1758. movq mm6, mm3
  1759. psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
  1760. punpcklwd mm6, mm4
  1761. paddd mm2, mm1 // Next iteration
  1762. pmaddwd mm6, mm5
  1763. #if 1
  1764. movq mm5, QWORD PTR vfVolume // Volume2, Volume1
  1765. psrad mm6, 12 // lMIntrep2, lMInterp
  1766. // pand mm6, QWORD PTR ffffMask
  1767. // pand mm5, QWORD PTR ffffMask // 16 bits only.
  1768. movq mm4, mm5
  1769. mov esi, DWORD PTR [ecx+4]
  1770. punpckldq mm4, mm4
  1771. pmaddwd mm4, mm6
  1772. psrad mm4, 13
  1773. packssdw mm4, mm0
  1774. movd mm7, DWORD PTR [esi+ebx*2]
  1775. paddsw mm7, mm4
  1776. movd DWORD PTR [esi+ebx*2], mm7
  1777. // CHANNEL 2
  1778. punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
  1779. mov esi, DWORD PTR [ecx+8]
  1780. pmaddwd mm5, mm6
  1781. psrad mm5, 13
  1782. packssdw mm5, mm0
  1783. movd mm7, DWORD PTR [esi+ebx*2]
  1784. paddsw mm7, mm5
  1785. movd DWORD PTR [esi+ebx*2], mm7
  1786. #else // There is noise here, probably due to the signed nature of the multiply.
  1787. psrad mm6, 12 // lMIntrep2, lMInterp
  1788. movq mm5, QWORD PTR MmxVolume
  1789. packssdw mm6, mm0
  1790. punpckldq mm6, mm6
  1791. pmulhw mm6, mm5
  1792. mov esi, DWORD PTR [ecx+4]
  1793. movd mm7, DWORD PTR [esi+ebx*2]
  1794. mov esi, DWORD PTR [ecx+8]
  1795. movd mm4, DWORD PTR [esi+ebx*2]
  1796. punpckldq mm4, mm7
  1797. paddsw mm4, mm6
  1798. movd DWORD PTR [esi+ebx*2], mm4
  1799. punpckhdq mm4, mm4
  1800. mov esi, DWORD PTR [ecx+4]
  1801. movd DWORD PTR [esi+ebx*2], mm4
  1802. #endif
  1803. add ebx, 2
  1804. cmp ebx, edx
  1805. jb TwoAtATime
  1806. movd DWORD PTR pfSamplePos, mm2
  1807. #endif // }
  1808. $L43865:
  1809. ; dwPosition = pfSamplePos >> 12;
  1810. ; dwFract = pfSamplePos & 0xFFF;
  1811. ; pfSamplePos += pfPitch;
  1812. ; lA = (long) pcWave[dwPosition];
  1813. ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  1814. mov esi, DWORD PTR pfPitch
  1815. mov edx, DWORD PTR pfSamplePos
  1816. mov eax, DWORD PTR pcWave
  1817. mov edi, edx
  1818. add esi, edx
  1819. and edi, 4095
  1820. sar edx, 12
  1821. mov DWORD PTR pfSamplePos, esi
  1822. movsx esi, WORD PTR [eax+edx*2]
  1823. movsx eax, WORD PTR [eax+edx*2+2]
  1824. sub eax, esi
  1825. imul eax, edi
  1826. sar eax, 12
  1827. mov edi, One_Channel_2
  1828. // ebx, ecx, edx are used in switch branches
  1829. add eax, esi // lMInterp
  1830. jmp edi
  1831. // ONE_CHANNEL
  1832. // lM = lMInterp * vfVolume[dwJ - 1];
  1833. // lM >>= 13;
  1834. // ppBuffer[dwJ - 1][dwI] += (short) lM;
  1835. $L44009:
  1836. ; 342 : default:
  1837. ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  1838. mov edi, DWORD PTR l_nChannels
  1839. // ecx ppBuffer
  1840. // eax lMInterp
  1841. // edi counter
  1842. // ebx dwI
  1843. $L43874:
  1844. mov edx, DWORD PTR vfVolume[edi*4-4]
  1845. mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
  1846. imul edx, eax
  1847. sar edx, 13
  1848. add WORD PTR [esi+ebx*2], dx
  1849. jno no_overflow
  1850. mov WORD PTR [esi+ebx*2], 0x7fff
  1851. js no_overflow
  1852. mov WORD PTR [esi+ebx*2], 0x8000
  1853. no_overflow:
  1854. dec edi
  1855. cmp edi, 8
  1856. jne SHORT $L43874
  1857. lea edi, $L43876
  1858. }
  1859. #define ONE_CHANNEL_VOLUME(dwJ) \
  1860. _asm { lea edx, vfVolume } \
  1861. _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
  1862. _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
  1863. _asm { imul edx, eax } \
  1864. _asm { sar edx, 13 } \
  1865. _asm { add edi, [esp] } \
  1866. \
  1867. _asm { add WORD PTR [esi+ebx*2], dx } \
  1868. _asm { jo FAR overflow_x }
  1869. //-------------------------------------------------------------------------
  1870. //
  1871. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  1872. //
  1873. // This lovely hack makes sure that all the instructions
  1874. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  1875. // by calculating instruction offsets based on having 8 identical blocks.
  1876. //
  1877. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  1878. //
  1879. //-------------------------------------------------------------------------
  1880. #define ONE_CHANNEL_VOLUME_1 \
  1881. _asm { lea edx, vfVolume } \
  1882. _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
  1883. _asm { mov esi, DWORD PTR [ecx + 4] } \
  1884. _asm { imul edx, eax } \
  1885. _asm { sar edx, 13 } \
  1886. _asm { add edi, [esp] } \
  1887. \
  1888. _asm { add WORD PTR [esi+ebx*2], dx } \
  1889. _asm { jo FAR overflow_x }
  1890. $L43876:
  1891. ONE_CHANNEL_VOLUME(8);
  1892. $L43880:
  1893. ONE_CHANNEL_VOLUME(7);
  1894. ONE_CHANNEL_VOLUME(6);
  1895. ONE_CHANNEL_VOLUME(5);
  1896. ONE_CHANNEL_VOLUME(4);
  1897. ONE_CHANNEL_VOLUME(3);
  1898. ONE_CHANNEL_VOLUME(2);
  1899. ONE_CHANNEL_VOLUME_1;
  1900. #undef ONE_CHANNEL_VOLUME
  1901. #undef ONE_CHANNEL_VOLUME_1
  1902. $L43866:
  1903. _asm {
  1904. mov eax, DWORD PTR a
  1905. inc ebx
  1906. cmp ebx, eax
  1907. jb $L43865
  1908. mov edi, DWORD PTR l_nChannels
  1909. $L43867:
  1910. cmp ebx, DWORD PTR dwLength
  1911. jb $L44021
  1912. Exit_$L43841:
  1913. pop eax
  1914. mov DWORD PTR dwI, ebx
  1915. #ifdef USE_MMX
  1916. mov edi, UseMmx
  1917. cmp edi, UseMmxLabel
  1918. jne NoMmxCleanupLabel
  1919. emms
  1920. NoMmxCleanupLabel:
  1921. #endif
  1922. }
  1923. #else // }{
  1924. for (dwI = 0; dwI < dwLength;)
  1925. {
  1926. if (pfSamplePos >= pfSampleLength)
  1927. {
  1928. if (pfLoopLength)
  1929. pfSamplePos -= pfLoopLength;
  1930. else
  1931. break;
  1932. }
  1933. dwIncDelta--;
  1934. if (!dwIncDelta)
  1935. {
  1936. dwIncDelta = dwDeltaPeriod;
  1937. pfPFract += pfDeltaPitch;
  1938. pfPitch = pfPFract >> 8;
  1939. #if 1
  1940. #define ONE_CHANNEL_VOLUME(dwJ) \
  1941. vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1]; \
  1942. vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
  1943. switch (l_nChannels)
  1944. {
  1945. default:
  1946. for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  1947. {
  1948. ONE_CHANNEL_VOLUME(dwJ);
  1949. }
  1950. case 8: ONE_CHANNEL_VOLUME(8);
  1951. case 7: ONE_CHANNEL_VOLUME(7);
  1952. case 6: ONE_CHANNEL_VOLUME(6);
  1953. case 5: ONE_CHANNEL_VOLUME(5);
  1954. case 4: ONE_CHANNEL_VOLUME(4);
  1955. case 3: ONE_CHANNEL_VOLUME(3);
  1956. case 2: ONE_CHANNEL_VOLUME(2);
  1957. case 1: ONE_CHANNEL_VOLUME(1);
  1958. case 0:;
  1959. }
  1960. #undef ONE_CHANNEL_VOLUME
  1961. #else
  1962. for (dwJ = 0; dwJ < l_nChannels; dwJ++)
  1963. {
  1964. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  1965. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  1966. }
  1967. #endif
  1968. }
  1969. #if 1 // {
  1970. DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
  1971. DWORD b = dwLength - dwI;
  1972. if (b < a) a = b;
  1973. if (dwIncDelta < a) a = dwIncDelta;
  1974. dwIncDelta -= a - 1;
  1975. a += dwI;
  1976. for (; dwI < a; dwI++)
  1977. {
  1978. dwPosition = pfSamplePos >> 12;
  1979. dwFract = pfSamplePos & 0xFFF;
  1980. pfSamplePos += pfPitch;
  1981. lA = (long) pcWave[dwPosition];
  1982. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  1983. #if 1 // {
  1984. #if 1
  1985. #define ONE_CHANNEL_VOLUME(dwJ) \
  1986. { \
  1987. lM = lMInterp * vfVolume[dwJ - 1]; \
  1988. lM >>= 13; \
  1989. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  1990. long b = ppBuffer[dwJ - 1][dwI]; \
  1991. if ((short)b != b) { \
  1992. if ((long)b < 0) b = 0x8000; \
  1993. else b = 0x7fff; \
  1994. ppBuffer[dwJ - 1][dwI] = (short) b; \
  1995. } \
  1996. }
  1997. #else
  1998. #define ONE_CHANNEL_VOLUME(dwJ) \
  1999. { \
  2000. lM = lMInterp * vfVolume[dwJ - 1]; \
  2001. lM >>= 13; \
  2002. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  2003. }
  2004. #endif
  2005. switch (l_nChannels)
  2006. {
  2007. default:
  2008. for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  2009. {
  2010. ONE_CHANNEL_VOLUME(dwJ);
  2011. }
  2012. case 8: ONE_CHANNEL_VOLUME(8);
  2013. case 7: ONE_CHANNEL_VOLUME(7);
  2014. case 6: ONE_CHANNEL_VOLUME(6);
  2015. case 5: ONE_CHANNEL_VOLUME(5);
  2016. case 4: ONE_CHANNEL_VOLUME(4);
  2017. case 3: ONE_CHANNEL_VOLUME(3);
  2018. case 2: ONE_CHANNEL_VOLUME(2);
  2019. case 1: ONE_CHANNEL_VOLUME(1);
  2020. case 0:;
  2021. }
  2022. #undef ONE_CHANNEL_VOLUME
  2023. #else // }{
  2024. for (dwJ = 0; dwJ < l_nChannels; dwJ++)
  2025. {
  2026. lM = lMInterp * vfVolume[dwJ];
  2027. lM >>= 13; // Signal bumps up to 12 bits.
  2028. // Keep this around so we can use it to generate new assembly code (see below...)
  2029. #if 1
  2030. {
  2031. long x = ppBuffer[dwJ][dwI];
  2032. x += lM;
  2033. if (x != (short)x) {
  2034. if (x > 32767) x = 32767;
  2035. else x = -32768;
  2036. }
  2037. ppBuffer[dwJ][dwI] = (short)x;
  2038. }
  2039. #else
  2040. ppBuffer[dwJ][dwI] += (short) lM;
  2041. _asm{jno no_oflow}
  2042. ppBuffer[dwJ][dwI] = 0x7fff;
  2043. _asm{js no_oflow}
  2044. ppBuffer[dwJ][dwI] = (short) 0x8000;
  2045. no_oflow: ;
  2046. #endif
  2047. }
  2048. #endif // }
  2049. }
  2050. #else // }{
  2051. dwPosition = pfSamplePos >> 12;
  2052. dwFract = pfSamplePos & 0xFFF;
  2053. pfSamplePos += pfPitch;
  2054. lA = (long) pcWave[dwPosition];
  2055. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  2056. #if 1
  2057. #if 1
  2058. #define ONE_CHANNEL_VOLUME(dwJ) \
  2059. { \
  2060. lM = lMInterp * vfVolume[dwJ - 1]; \
  2061. lM >>= 13; \
  2062. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  2063. long b = ppBuffer[dwJ - 1][dwI]; \
  2064. if ((short)b != b) { \
  2065. if ((long)b < 0) b = 0x8000; \
  2066. else b = 0x7fff; \
  2067. ppBuffer[dwJ - 1][dwI] = (short) b; \
  2068. } \
  2069. }
  2070. #else
  2071. #define ONE_CHANNEL_VOLUME(dwJ) \
  2072. { \
  2073. lM = lMInterp * vfVolume[dwJ - 1]; \
  2074. lM >>= 13; \
  2075. ppBuffer[dwJ - 1][dwI] += (short) lM;\
  2076. }
  2077. #endif
  2078. switch (l_nChannels)
  2079. {
  2080. default:
  2081. for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  2082. {
  2083. ONE_CHANNEL_VOLUME(dwJ);
  2084. }
  2085. case 8: ONE_CHANNEL_VOLUME(8);
  2086. case 7: ONE_CHANNEL_VOLUME(7);
  2087. case 6: ONE_CHANNEL_VOLUME(6);
  2088. case 5: ONE_CHANNEL_VOLUME(5);
  2089. case 4: ONE_CHANNEL_VOLUME(4);
  2090. case 3: ONE_CHANNEL_VOLUME(3);
  2091. case 2: ONE_CHANNEL_VOLUME(2);
  2092. case 1: ONE_CHANNEL_VOLUME(1);
  2093. case 0:;
  2094. }
  2095. #undef ONE_CHANNEL_VOLUME
  2096. #else
  2097. for (dwJ = 0; dwJ < l_nChannels; dwJ++)
  2098. {
  2099. lM = lMInterp * vfVolume[dwJ];
  2100. lM >>= 13; // Signal bumps up to 12 bits.
  2101. // Keep this around so we can use it to generate new assembly code (see below...)
  2102. #if 1
  2103. {
  2104. long x = ppBuffer[dwJ][dwI];
  2105. x += lM;
  2106. if (x != (short)x) {
  2107. if (x > 32767) x = 32767;
  2108. else x = -32768;
  2109. }
  2110. ppBuffer[dwJ][dwI] = (short)x;
  2111. }
  2112. #else
  2113. ppBuffer[dwJ][dwI] += (short) lM;
  2114. _asm{jno no_oflow}
  2115. ppBuffer[dwJ][dwI] = 0x7fff;
  2116. _asm{js no_oflow}
  2117. ppBuffer[dwJ][dwI] = (short) 0x8000;
  2118. no_oflow: ;
  2119. #endif
  2120. }
  2121. #endif
  2122. dwI++;
  2123. #endif // }
  2124. }
  2125. #endif // }
  2126. m_pfLastPitch = pfPitch;
  2127. m_pfLastSample = pfSamplePos;
  2128. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2129. {
  2130. vfLastVolume[dwJ] = vfVolume[dwJ];
  2131. }
  2132. return (dwI);
  2133. }
  2134. #endif
  2135. DWORD CDigitalAudio::MixMulti16Filter(
  2136. short *ppBuffer[],
  2137. DWORD dwBufferCount,
  2138. DWORD dwLength,
  2139. DWORD dwDeltaPeriod,
  2140. VFRACT vfDeltaVolume[],
  2141. VFRACT vfLastVolume[],
  2142. PFRACT pfDeltaPitch,
  2143. PFRACT pfSampleLength,
  2144. PFRACT pfLoopLength,
  2145. COEFF cfdK,
  2146. COEFF cfdB1,
  2147. COEFF cfdB2)
  2148. {
  2149. DWORD dwI, dwJ;
  2150. DWORD dwPosition;
  2151. long lA;//, lB;
  2152. long lM;
  2153. long lMInterp;
  2154. DWORD dwIncDelta = dwDeltaPeriod;
  2155. VFRACT dwFract;
  2156. short * pcWave = m_pnWave;
  2157. PFRACT pfSamplePos = m_pfLastSample;
  2158. PFRACT pfPitch = m_pfLastPitch;
  2159. PFRACT pfPFract = pfPitch << 8;
  2160. COEFF cfK = m_cfLastK;
  2161. COEFF cfB1 = m_cfLastB1;
  2162. COEFF cfB2 = m_cfLastB2;
  2163. DWORD dMM6[2]; // Handle filter...
  2164. DWORD dMM4[2]; // Handle filter...
  2165. DWORD dMM5[2]; // Handle filter...
  2166. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  2167. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  2168. for (dwI = 0; dwI < dwBufferCount; dwI++)
  2169. {
  2170. vfVolume[dwI] = vfLastVolume[dwI];
  2171. vfVFract[dwI] = vfVolume[dwI] << 8;
  2172. }
  2173. #if 1 // {
  2174. DWORD l_nChannels = dwBufferCount;
  2175. DWORD a;
  2176. DWORD One_Channel_1, One_Channel_2; // Code address locations.
  2177. long l_lPrevPrevSample = m_lPrevPrevSample, l_lPrevSample = m_lPrevSample;
  2178. #ifdef USE_MMX_FILTERED // {
  2179. typedef __int64 QWORD;
  2180. QWORD OneMask = 0x0000000010001000;
  2181. QWORD fffMask = 0x00000fff00000fff;
  2182. QWORD ffffMask = 0x0000ffff0000ffff;
  2183. DWORD UseMmx;
  2184. DWORD MmxVolume[2];
  2185. int Use_MMX = m_sfMMXEnabled;
  2186. _asm {
  2187. lea edi, $L43865
  2188. // Turned off
  2189. cmp Use_MMX, 0
  2190. je AssignMMXLabel
  2191. // != 2 channels
  2192. mov esi, DWORD PTR l_nChannels
  2193. cmp esi, 2
  2194. jne AssignMmxLabel
  2195. // Ok, init and use MMX
  2196. lea edi, UseMmxLabel
  2197. pxor mm0, mm0
  2198. movq mm3, QWORD PTR OneMask // 0, 0, 0x1000, 0x1000
  2199. AssignMmxLabel:
  2200. mov DWORD PTR UseMmx, edi
  2201. }
  2202. #endif // }
  2203. _asm {
  2204. mov edi, DWORD PTR l_nChannels
  2205. cmp edi, 8
  2206. jna Start1
  2207. lea esi, $L44008
  2208. jmp Do_One_Channel_2
  2209. // Put this code more than 127 bytes away from the references.
  2210. overflow_x:
  2211. js overflow_y
  2212. mov WORD PTR [esi+ebx*2], 0x8000
  2213. jmp edi
  2214. overflow_y:
  2215. mov WORD PTR [esi+ebx*2], 0x7fff
  2216. jmp edi
  2217. Start1:
  2218. test edi, edi
  2219. jne Start2
  2220. lea esi, $L43860
  2221. jmp Do_One_Channel_2
  2222. Start2:
  2223. lea eax, $L43851
  2224. lea edx, $L43853
  2225. sub edx, eax
  2226. mov esi, 8
  2227. sub esi, edi
  2228. imul esi, edx
  2229. add esi, eax
  2230. Do_One_Channel_2:
  2231. mov DWORD PTR One_Channel_1, esi
  2232. // Create second jump table location.
  2233. lea esi, $L43876
  2234. lea ecx, $L43880
  2235. sub ecx, esi
  2236. push ecx // Span between branches.
  2237. mov eax, 8
  2238. sub eax, DWORD PTR l_nChannels
  2239. jge Start3
  2240. lea ecx, $L44009
  2241. jmp Done_Do_Channel_2
  2242. Start3:
  2243. cmp eax, 8
  2244. jne Start4
  2245. lea ecx, $L43866
  2246. jmp Done_Do_Channel_2
  2247. Start4:
  2248. imul ecx, eax
  2249. add ecx, esi
  2250. Done_Do_Channel_2:
  2251. mov DWORD PTR One_Channel_2, ecx
  2252. mov ecx, DWORD PTR dwLength
  2253. xor ebx, ebx // dwI
  2254. test ecx, ecx
  2255. jbe Exit_$L43841
  2256. mov ecx, DWORD PTR ppBuffer
  2257. sub ecx, 4
  2258. // ecx == ppBuffer - 4
  2259. // ebx == dwI
  2260. // edi == l_nChannels
  2261. $L44021:
  2262. mov edx, DWORD PTR pfSamplePos
  2263. cmp edx, DWORD PTR pfSampleLength
  2264. jl SHORT $L43842
  2265. mov eax, DWORD PTR pfLoopLength
  2266. test eax, eax
  2267. je Exit_$L43841
  2268. sub edx, eax
  2269. mov DWORD PTR pfSamplePos, edx
  2270. $L43842:
  2271. mov edx, DWORD PTR dwIncDelta
  2272. mov eax, DWORD PTR pfPFract
  2273. dec edx
  2274. mov DWORD PTR dwIncDelta, edx
  2275. jne $L43860
  2276. mov edx, DWORD PTR dwDeltaPeriod
  2277. mov esi, DWORD PTR pfDeltaPitch
  2278. mov DWORD PTR dwIncDelta, edx
  2279. add eax, esi
  2280. mov DWORD PTR pfPFract, eax
  2281. sar eax, 8
  2282. mov DWORD PTR pfPitch, eax
  2283. mov esi, DWORD PTR vfDeltaVolume
  2284. jmp One_Channel_1
  2285. // ONE_CHANNEL
  2286. // vfVFract[dwJ - 1] += vfDeltaVolume[dwJ - 1];
  2287. // vfVolume[dwJ - 1] = vfVFract [dwJ - 1] >> 8;
  2288. $L44008:
  2289. mov DWORD PTR dwI, ebx
  2290. lea ebx, DWORD PTR [edi*4-4]
  2291. add edi, -8 ; fffffff8H
  2292. $L43849:
  2293. lea eax, DWORD PTR vfVFract[ebx]
  2294. mov ecx, DWORD PTR [esi+ebx]
  2295. sub ebx, 4
  2296. add DWORD PTR [eax], ecx
  2297. mov eax, DWORD PTR [eax]
  2298. sar eax, 8
  2299. mov DWORD PTR vfVolume[ebx+4], eax
  2300. dec edi
  2301. jne SHORT $L43849
  2302. mov edi, DWORD PTR l_nChannels
  2303. mov ecx, DWORD PTR ppBuffer
  2304. mov ebx, DWORD PTR dwI
  2305. sub ecx, 4
  2306. }
  2307. #define ONE_CHANNEL_VOLUME(dwJ) \
  2308. _asm { mov eax, DWORD PTR vfVFract[(dwJ-1)*4] }; \
  2309. _asm { add eax, DWORD PTR [esi+(dwJ-1)*4] }; \
  2310. _asm { mov DWORD PTR vfVFract[(dwJ-1)*4], eax }; \
  2311. _asm { sar eax, 8 }; \
  2312. _asm { lea edx, vfVolume }; \
  2313. _asm { mov DWORD PTR [edx + (dwJ-1)*4], eax };
  2314. //-------------------------------------------------------------------------
  2315. //
  2316. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  2317. //
  2318. // This lovely hack makes sure that all the instructions
  2319. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  2320. // by calculating instruction offsets based on having 8 identical blocks.
  2321. //
  2322. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  2323. //
  2324. //-------------------------------------------------------------------------
  2325. #define ONE_CHANNEL_VOLUME_1 \
  2326. _asm { mov eax, DWORD PTR vfVFract[0] }; \
  2327. _asm _emit 0x03 _asm _emit 0x46 _asm _emit 0x00 \
  2328. _asm { mov DWORD PTR vfVFract[0], eax }; \
  2329. _asm { sar eax, 8 }; \
  2330. _asm { lea edx, vfVolume }; \
  2331. _asm _emit 0x89 _asm _emit 0x42 _asm _emit 0x00
  2332. $L43851:
  2333. ONE_CHANNEL_VOLUME(8)
  2334. $L43853:
  2335. ONE_CHANNEL_VOLUME(7);
  2336. ONE_CHANNEL_VOLUME(6);
  2337. ONE_CHANNEL_VOLUME(5);
  2338. ONE_CHANNEL_VOLUME(4);
  2339. ONE_CHANNEL_VOLUME(3);
  2340. ONE_CHANNEL_VOLUME(2);
  2341. ONE_CHANNEL_VOLUME_1;
  2342. #undef ONE_CHANNEL_VOLUME
  2343. #undef ONE_CHANNEL_VOLUME_1
  2344. _asm {
  2345. // cfK += cfdK;
  2346. // cfB1 += cfdB1;
  2347. // cfB2 += cfdB2;
  2348. mov eax, DWORD PTR cfdK
  2349. mov edx, DWORD PTR cfdB1
  2350. mov esi, DWORD PTR cfdB2
  2351. add DWORD PTR cfK, eax
  2352. add DWORD PTR cfB1, edx
  2353. add DWORD PTR cfB2, esi
  2354. $L43860:
  2355. ; 304 : DWORD a = (pfSampleLength - pfSamplePos + pfPitch - 1) / pfPitch;
  2356. mov esi, DWORD PTR pfPitch
  2357. mov eax, DWORD PTR pfSampleLength
  2358. dec esi
  2359. sub eax, DWORD PTR pfSamplePos
  2360. add eax, esi
  2361. cdq
  2362. idiv DWORD PTR pfPitch
  2363. mov edx, DWORD PTR dwLength
  2364. sub edx, ebx
  2365. cmp edx, eax
  2366. jae SHORT $L43863
  2367. mov eax, edx
  2368. $L43863:
  2369. mov edx, DWORD PTR dwIncDelta
  2370. cmp edx, eax
  2371. jae SHORT $L43864
  2372. mov eax, edx
  2373. $L43864:
  2374. ; 309 :
  2375. ; 310 : for (a += dwI; dwI < a; dwI++)
  2376. inc edx
  2377. sub edx, eax
  2378. add eax, ebx
  2379. mov DWORD PTR dwIncDelta, edx
  2380. cmp ebx, eax
  2381. mov DWORD PTR a, eax
  2382. jae $L43867
  2383. #ifdef USE_MMX_FILTERED // {
  2384. // Try to handle two positions at once.
  2385. lea edx, [eax-3]
  2386. cmp ebx, edx
  2387. jge $L43865
  2388. jmp UseMmx
  2389. UseMmxLabel:
  2390. // Ok, there are at least two samples to handle.
  2391. movd mm1, DWORD PTR pfPitch
  2392. psllq mm1, 32 // Pitch, 0
  2393. movd mm2, DWORD PTR pfSamplePos
  2394. punpckldq mm2, mm2 // SamplePos, SamplePos
  2395. paddd mm2, mm1 // SamplePos + Pitch, SamplePos
  2396. punpckhdq mm1, mm1 // Pitch, Pitch
  2397. pslld mm1, 1 // Pitch * 2, Pitch * 2
  2398. mov eax, DWORD PTR pcWave
  2399. #if 0
  2400. movq mm4, QWORD PTR vfVolume
  2401. pand mm4, QWORD PTR ffffMask
  2402. movq mm5, mm4
  2403. pslld mm4, 16
  2404. por mm4, mm5
  2405. psllw mm4, 3
  2406. movq QWORD PTR MmxVolume, mm4
  2407. #endif
  2408. TwoAtATime:
  2409. ; dwPosition = pfSamplePos >> 12;
  2410. ; dwFract = pfSamplePos & 0xFFF;
  2411. ; pfSamplePos += pfPitch;
  2412. movq mm4, mm2
  2413. psrad mm4, 12 // dwPosition + Pitch, dwPosition
  2414. ; lA = (long) pcWave[dwPosition];
  2415. ; lMInterp = (((pcWave[dwPosition+1] - lA) * (dwFract)) >> 12) + lA;
  2416. movd esi, mm4 // dwPosition
  2417. punpckhdq mm4, mm4 // dwPosition ( + Pitch ) = dwPos2
  2418. movd mm5, DWORD PTR [eax+esi*2] // 0, 0, dwPosition + 1, dwPosition
  2419. // Instead for byte codes
  2420. // mov si, WORD PTR [eax+esi]
  2421. // movd mm6, esi
  2422. // punpcklbw mm5, mm6
  2423. // psarw mm5, 8
  2424. movd esi, mm4
  2425. movd mm4, DWORD PTR [eax+esi*2] // 0, 0, dwPos2 + 1, dwPos2
  2426. // Instead for byte codes
  2427. // mov si, WORD PTR [eax+esi]
  2428. // movd mm6, esi
  2429. // punpcklbw mm4, mm6
  2430. // psarw mm4, 8
  2431. // This code could be combined with code above, a bit.
  2432. punpckldq mm5, mm4 // dwPos2 + 1, dwPos2, dwPos1 + 1, dwPos1
  2433. movq mm4, mm2
  2434. pand mm4, QWORD PTR fffMask // dwFract + Pitch, dwFract
  2435. packssdw mm4, mm0
  2436. movq mm6, mm3
  2437. psubw mm6, mm4 // 0, 0, 1000 - dwFract + Pitch, 1000 - dwFract
  2438. punpcklwd mm6, mm4
  2439. paddd mm2, mm1 // Next iteration
  2440. pmaddwd mm6, mm5
  2441. #if 1 // {
  2442. psrad mm6, 12 // lMIntrep2, lMInterp
  2443. #if 1 // {
  2444. // eax, ebx, ecx, edx, esi are used. edi is free...
  2445. push eax
  2446. push ecx
  2447. push edx
  2448. movq QWORD PTR dMM6, mm6
  2449. mov eax, DWORD PTR dMM6
  2450. imul DWORD PTR cfK // edx:eax
  2451. mov ecx, eax
  2452. mov eax, DWORD PTR l_lPrevPrevSample
  2453. mov edi, edx // esi:ecx
  2454. imul DWORD PTR cfB2
  2455. sub ecx, eax
  2456. mov eax, DWORD PTR l_lPrevSample
  2457. sbb edi, edx
  2458. mov DWORD PTR l_lPrevPrevSample, eax
  2459. imul DWORD PTR cfB1
  2460. add eax, ecx
  2461. adc edx, edi
  2462. //>>>>> MOD:PETCHEY
  2463. // shld eax, edx, 2
  2464. //>>>>> should be
  2465. shld edx, eax, 2
  2466. mov eax, edx
  2467. mov DWORD PTR dMM6, eax
  2468. mov DWORD PTR l_lPrevSample, eax
  2469. // 2nd sample
  2470. mov eax, DWORD PTR dMM6+4
  2471. imul DWORD PTR cfK // edx:eax
  2472. mov ecx, eax
  2473. mov eax, DWORD PTR l_lPrevPrevSample
  2474. mov edi, edx // esi:ecx
  2475. imul DWORD PTR cfB2
  2476. sub ecx, eax
  2477. mov eax, DWORD PTR l_lPrevSample
  2478. sbb edi, edx
  2479. mov DWORD PTR l_lPrevPrevSample, eax
  2480. imul DWORD PTR cfB1
  2481. add eax, ecx
  2482. adc edx, edi
  2483. //>>>>> MOD:PETCHEY
  2484. // shld eax, edx, 2
  2485. //>>>>> should be
  2486. shld edx, eax, 2
  2487. mov eax, edx
  2488. mov DWORD PTR dMM6+4, eax
  2489. mov DWORD PTR l_lPrevSample, eax
  2490. movq mm6, QWORD PTR dMM6
  2491. pop edx
  2492. pop ecx
  2493. pop eax
  2494. #endif // }
  2495. #define DO_32BIT_MULTIPLY
  2496. #ifndef DO_32BIT_MULTIPLY
  2497. movq mm5, QWORD PTR vfVolume // Volume2, Volume1
  2498. // pand mm5, QWORD PTR ffffMask // 16 bits only.
  2499. #endif
  2500. // pand mm6, QWORD PTR ffffMask
  2501. #ifndef DO_32BIT_MULTIPLY
  2502. movq mm4, mm5
  2503. #endif
  2504. mov esi, DWORD PTR [ecx+4]
  2505. #ifndef DO_32BIT_MULTIPLY
  2506. punpckldq mm4, mm4
  2507. #endif
  2508. #ifdef DO_32BIT_MULTIPLY
  2509. mov edi, DWORD PTR vfVolume
  2510. imul edi, DWORD PTR dMM6
  2511. sar edi, 13
  2512. mov DWORD PTR dMM4, edi
  2513. mov edi, DWORD PTR vfVolume
  2514. imul edi, DWORD PTR dMM6+4
  2515. sar edi, 13
  2516. mov DWORD PTR dMM4+4, edi
  2517. movq mm4, QWORD PTR dMM4
  2518. #else
  2519. pmaddwd mm4, mm6
  2520. psrad mm4, 13
  2521. #endif
  2522. packssdw mm4, mm0
  2523. movd mm7, DWORD PTR [esi+ebx*2]
  2524. paddsw mm7, mm4
  2525. movd DWORD PTR [esi+ebx*2], mm7
  2526. // CHANNEL 2
  2527. #ifndef DO_32BIT_MULTIPLY
  2528. punpckhdq mm5, mm5 // 0, Volume2, 0, Volume2
  2529. #endif
  2530. mov esi, DWORD PTR [ecx+8]
  2531. #ifdef DO_32BIT_MULTIPLY
  2532. mov edi, DWORD PTR vfVolume+4
  2533. imul edi, DWORD PTR dMM6
  2534. sar edi, 13
  2535. mov DWORD PTR dMM5, edi
  2536. mov edi, DWORD PTR vfVolume+4
  2537. imul edi, DWORD PTR dMM6+4
  2538. sar edi, 13
  2539. mov DWORD PTR dMM5+4, edi
  2540. movq mm5, QWORD PTR dMM5
  2541. #else
  2542. pmaddwd mm5, mm6
  2543. psrad mm5, 13
  2544. #endif
  2545. packssdw mm5, mm0
  2546. movd mm7, DWORD PTR [esi+ebx*2]
  2547. paddsw mm7, mm5
  2548. movd DWORD PTR [esi+ebx*2], mm7
  2549. #else // }{ There is noise here, probably due to the signed nature of the multiply.
  2550. // NOTE the filter is NOT implemented here....
  2551. psrad mm6, 12 // lMIntrep2, lMInterp
  2552. movq mm5, QWORD PTR MmxVolume
  2553. packssdw mm6, mm0
  2554. punpckldq mm6, mm6
  2555. pmulhw mm6, mm5
  2556. mov esi, DWORD PTR [ecx+4]
  2557. movd mm7, DWORD PTR [esi+ebx*2]
  2558. mov esi, DWORD PTR [ecx+8]
  2559. movd mm4, DWORD PTR [esi+ebx*2]
  2560. punpckldq mm4, mm7
  2561. paddsw mm4, mm6
  2562. movd DWORD PTR [esi+ebx*2], mm4
  2563. punpckhdq mm4, mm4
  2564. mov esi, DWORD PTR [ecx+4]
  2565. movd DWORD PTR [esi+ebx*2], mm4
  2566. #endif // }
  2567. add ebx, 2
  2568. cmp ebx, edx
  2569. jb TwoAtATime
  2570. movd DWORD PTR pfSamplePos, mm2
  2571. #endif // }
  2572. $L43865:
  2573. ; dwPosition = pfSamplePos >> 12;
  2574. ; dwFract = pfSamplePos & 0xFFF;
  2575. ; pfSamplePos += pfPitch;
  2576. ; lA = (long) pcWave[dwPosition];
  2577. ; lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  2578. mov esi, DWORD PTR pfPitch
  2579. mov edx, DWORD PTR pfSamplePos
  2580. mov eax, DWORD PTR pcWave
  2581. mov edi, edx
  2582. add esi, edx
  2583. and edi, 4095
  2584. sar edx, 12
  2585. mov DWORD PTR pfSamplePos, esi
  2586. movsx esi, WORD PTR [eax+edx*2]
  2587. movsx eax, WORD PTR [eax+edx*2+2]
  2588. sub eax, esi
  2589. imul eax, edi
  2590. sar eax, 12
  2591. mov edi, One_Channel_2
  2592. // ebx, ecx, edx are used in switch branches
  2593. add eax, esi // lMInterp
  2594. #if 1
  2595. // lMInterp =
  2596. // MulDiv(lMInterp, cfK, (1 << 30))
  2597. // - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30))
  2598. // + MulDiv(m_lPrevSample, cfB1, (1 << 30))
  2599. push ecx
  2600. imul DWORD PTR cfK // edx:eax
  2601. mov ecx, eax
  2602. mov eax, DWORD PTR l_lPrevPrevSample
  2603. mov esi, edx // esi:ecx
  2604. imul DWORD PTR cfB2
  2605. sub ecx, eax
  2606. mov eax, DWORD PTR l_lPrevSample
  2607. sbb esi, edx
  2608. mov DWORD PTR l_lPrevPrevSample, eax
  2609. imul DWORD PTR cfB1
  2610. add eax, ecx
  2611. // adc esi, edx
  2612. adc edx, esi
  2613. pop ecx
  2614. // shrd eax, edx, 30
  2615. // mov esi,0x40000000
  2616. // idiv esi
  2617. //>>>>> MOD:PETCHEY
  2618. // shld eax, edx, 2
  2619. //>>>>> should be
  2620. shld edx, eax, 2
  2621. mov eax, edx
  2622. #endif
  2623. //>>>>>>>>>>>> removed dp
  2624. #if 0
  2625. // if (lMInterp < -32767) lMInterp = -32767;
  2626. // else if (lMInterp > 32767) lMInterp = 32767;
  2627. cmp eax, -32767
  2628. jl Less_than
  2629. cmp eax, 32767
  2630. jg Greater_than
  2631. #endif
  2632. // m_lPrevPrevSample = m_lPrevSample;
  2633. // m_lPrevSample = lMInterp;
  2634. mov DWORD PTR l_lPrevSample, eax
  2635. jmp edi
  2636. //>>>>>>>>>>>> removed dp
  2637. #if 0
  2638. Less_than:
  2639. mov eax, -32767
  2640. mov DWORD PTR l_lPrevSample, eax
  2641. jmp edi
  2642. Greater_than:
  2643. mov eax, 32767
  2644. mov DWORD PTR l_lPrevSample, eax
  2645. jmp edi
  2646. #endif
  2647. // ONE_CHANNEL
  2648. // lM = lMInterp * vfVolume[dwJ - 1];
  2649. // lM >>= 13;
  2650. // ppBuffer[dwJ - 1][dwI] += (short) lM;
  2651. $L44009:
  2652. ; 342 : default:
  2653. ; 343 : for (dwJ = l_nChannels; dwJ > 8; dwJ--)
  2654. mov edi, DWORD PTR l_nChannels
  2655. // ecx ppBuffer
  2656. // eax lMInterp
  2657. // edi counter
  2658. // ebx dwI
  2659. $L43874:
  2660. mov edx, DWORD PTR vfVolume[edi*4-4]
  2661. mov esi, DWORD PTR [ecx+edi*4] // ppBuffer[dwJ - 1]
  2662. imul edx, eax
  2663. sar edx, 13
  2664. add WORD PTR [esi+ebx*2], dx
  2665. jno no_overflow
  2666. mov WORD PTR [esi+ebx*2], 0x7fff
  2667. js no_overflow
  2668. mov WORD PTR [esi+ebx*2], 0x8000
  2669. no_overflow:
  2670. dec edi
  2671. cmp edi, 8
  2672. jne SHORT $L43874
  2673. lea edi, $L43876
  2674. }
  2675. #define ONE_CHANNEL_VOLUME(dwJ) \
  2676. _asm { lea edx, vfVolume } \
  2677. _asm { mov edx, DWORD PTR [edx + (dwJ-1) * 4] } \
  2678. _asm { mov esi, DWORD PTR [ecx + (dwJ) * 4] } \
  2679. _asm { imul edx, eax } \
  2680. _asm { sar edx, 13 } \
  2681. _asm { add edi, [esp] } \
  2682. \
  2683. _asm { add WORD PTR [esi+ebx*2], dx } \
  2684. _asm { jo FAR overflow_x }
  2685. //-------------------------------------------------------------------------
  2686. //
  2687. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  2688. //
  2689. // This lovely hack makes sure that all the instructions
  2690. // are the same length for the case (dwJ - 1) == 0. Code depends on this
  2691. // by calculating instruction offsets based on having 8 identical blocks.
  2692. //
  2693. // ***** ***** ***** DO NOT CHANGE THIS! ***** ***** *****
  2694. //
  2695. //-------------------------------------------------------------------------
  2696. #define ONE_CHANNEL_VOLUME_1 \
  2697. _asm { lea edx, vfVolume } \
  2698. _asm _emit 0x8B _asm _emit 0x52 _asm _emit 0x00 \
  2699. _asm { mov esi, DWORD PTR [ecx + 4] } \
  2700. _asm { imul edx, eax } \
  2701. _asm { sar edx, 13 } \
  2702. _asm { add edi, [esp] } \
  2703. \
  2704. _asm { add WORD PTR [esi+ebx*2], dx } \
  2705. _asm { jo FAR overflow_x }
  2706. $L43876:
  2707. ONE_CHANNEL_VOLUME(8);
  2708. $L43880:
  2709. ONE_CHANNEL_VOLUME(7);
  2710. ONE_CHANNEL_VOLUME(6);
  2711. ONE_CHANNEL_VOLUME(5);
  2712. ONE_CHANNEL_VOLUME(4);
  2713. ONE_CHANNEL_VOLUME(3);
  2714. ONE_CHANNEL_VOLUME(2);
  2715. ONE_CHANNEL_VOLUME_1;
  2716. #undef ONE_CHANNEL_VOLUME
  2717. #undef ONE_CHANNEL_VOLUME_1
  2718. $L43866:
  2719. _asm {
  2720. mov eax, DWORD PTR a
  2721. inc ebx
  2722. cmp ebx, eax
  2723. jb $L43865
  2724. mov edi, DWORD PTR l_nChannels
  2725. $L43867:
  2726. cmp ebx, DWORD PTR dwLength
  2727. jb $L44021
  2728. Exit_$L43841:
  2729. pop eax
  2730. mov DWORD PTR dwI, ebx
  2731. #ifdef USE_MMX_FILTERED
  2732. mov edi, UseMmx
  2733. cmp edi, UseMmxLabel
  2734. jne NoMmxCleanupLabel
  2735. emms
  2736. NoMmxCleanupLabel:
  2737. #endif
  2738. }
  2739. m_lPrevPrevSample = l_lPrevPrevSample;
  2740. m_lPrevSample = l_lPrevSample;
  2741. #else // }{
  2742. for (dwI = 0; dwI < dwLength;)
  2743. {
  2744. if (pfSamplePos >= pfSampleLength)
  2745. {
  2746. if (pfLoopLength)
  2747. pfSamplePos -= pfLoopLength;
  2748. else
  2749. break;
  2750. }
  2751. dwIncDelta--;
  2752. if (!dwIncDelta)
  2753. {
  2754. dwIncDelta = dwDeltaPeriod;
  2755. pfPFract += pfDeltaPitch;
  2756. pfPitch = pfPFract >> 8;
  2757. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2758. {
  2759. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  2760. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  2761. }
  2762. cfK += cfdK;
  2763. cfB1 += cfdB1;
  2764. cfB2 += cfdB2;
  2765. }
  2766. dwPosition = pfSamplePos >> 12;
  2767. dwFract = pfSamplePos & 0xFFF;
  2768. pfSamplePos += pfPitch;
  2769. lA = (long) pcWave[dwPosition];
  2770. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  2771. // Filter
  2772. //
  2773. // z = k*s - b1*z1 - b2*b2
  2774. // We store the negative of b1 in the table, so we flip the sign again by
  2775. // adding here
  2776. //
  2777. lMInterp =
  2778. MulDiv(lMInterp, cfK, (1 << 30))
  2779. + MulDiv(m_lPrevSample, cfB1, (1 << 30))
  2780. - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
  2781. //>>>>>>>>>>>> removed dp
  2782. #if 0
  2783. if (lMInterp < -32767) lMInterp = -32767;
  2784. else if (lMInterp > 32767) lMInterp = 32767;
  2785. #endif
  2786. m_lPrevPrevSample = m_lPrevSample;
  2787. m_lPrevSample = lMInterp;
  2788. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2789. {
  2790. lM = lMInterp * vfVolume[dwJ];
  2791. lM >>= 13; // Signal bumps up to 12 bits.
  2792. // Keep this around so we can use it to generate new assembly code (see below...)
  2793. #if 1
  2794. {
  2795. long x = ppBuffer[dwJ][dwI];
  2796. x += lM;
  2797. if (x != (short)x) {
  2798. if (x > 32767) x = 32767;
  2799. else x = -32768;
  2800. }
  2801. ppBuffer[dwJ][dwI] = (short)x;
  2802. }
  2803. #else
  2804. ppBuffer[dwJ][dwI] += (short) lM;
  2805. _asm{jno no_oflow}
  2806. ppBuffer[dwJ][dwI] = 0x7fff;
  2807. _asm{js no_oflow}
  2808. ppBuffer[dwJ][dwI] = (short) 0x8000;
  2809. no_oflow: ;
  2810. #endif
  2811. }
  2812. dwI++;
  2813. }
  2814. #endif // }
  2815. m_pfLastPitch = pfPitch;
  2816. m_pfLastSample = pfSamplePos;
  2817. m_cfLastK = cfK;
  2818. m_cfLastB1 = cfB1;
  2819. m_cfLastB2 = cfB2;
  2820. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2821. {
  2822. vfLastVolume[dwJ] = vfVolume[dwJ];
  2823. }
  2824. return (dwI);
  2825. }
  2826. #else // }{ all assembly code
  2827. DWORD CDigitalAudio::MixMulti8(
  2828. short *ppBuffer[],
  2829. DWORD dwBufferCount,
  2830. DWORD dwLength,
  2831. DWORD dwDeltaPeriod,
  2832. VFRACT vfDeltaVolume[],
  2833. VFRACT vfLastVolume[],
  2834. PFRACT pfDeltaPitch,
  2835. PFRACT pfSampleLength,
  2836. PFRACT pfLoopLength)
  2837. {
  2838. DWORD dwI, dwJ;
  2839. DWORD dwPosition;
  2840. long lMInterp;
  2841. long lM;
  2842. long lA;//, lB;
  2843. DWORD dwIncDelta = dwDeltaPeriod;
  2844. VFRACT dwFract;
  2845. char * pcWave = (char *) m_pnWave;
  2846. PFRACT pfSamplePos = m_pfLastSample;
  2847. PFRACT pfPitch = m_pfLastPitch;
  2848. PFRACT pfPFract = pfPitch << 8;
  2849. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  2850. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  2851. for (dwI = 0; dwI < dwBufferCount; dwI++)
  2852. {
  2853. vfVolume[dwI] = vfLastVolume[dwI];
  2854. vfVFract[dwI] = vfVolume[dwI] << 8;
  2855. }
  2856. for (dwI = 0; dwI < dwLength; )
  2857. {
  2858. if (pfSamplePos >= pfSampleLength)
  2859. {
  2860. if (pfLoopLength)
  2861. pfSamplePos -= pfLoopLength;
  2862. else
  2863. break;
  2864. }
  2865. dwIncDelta--;
  2866. if (!dwIncDelta)
  2867. {
  2868. dwIncDelta = dwDeltaPeriod;
  2869. pfPFract += pfDeltaPitch;
  2870. pfPitch = pfPFract >> 8;
  2871. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2872. {
  2873. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  2874. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  2875. }
  2876. }
  2877. dwPosition = pfSamplePos >> 12;
  2878. dwFract = pfSamplePos & 0xFFF;
  2879. pfSamplePos += pfPitch;
  2880. lMInterp = pcWave[dwPosition]; // pcWave
  2881. lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
  2882. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2883. {
  2884. lM = lMInterp * vfVolume[dwJ];
  2885. lM >>= 5;
  2886. // Keep this around so we can use it to generate new assembly code (see below...)
  2887. #if 1
  2888. {
  2889. long x = ppBuffer[dwJ][dwI];
  2890. x += lM;
  2891. if (x != (short)x) {
  2892. if (x > 32767) x = 32767;
  2893. else x = -32768;
  2894. }
  2895. ppBuffer[dwJ][dwI] = (short)x;
  2896. }
  2897. #else
  2898. ppBuffer[dwJ][dwI] += (short) lM;
  2899. #ifdef i386
  2900. _asm{jno no_oflow}
  2901. ppBuffer[dwJ][dwI] = 0x7fff;
  2902. _asm{js no_oflow}
  2903. ppBuffer[dwJ][dwI] = (short) 0x8000;
  2904. no_oflow: ;
  2905. #endif
  2906. #endif
  2907. }
  2908. dwI++;
  2909. }
  2910. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2911. {
  2912. vfLastVolume[dwJ] = vfVolume[dwJ];
  2913. }
  2914. m_pfLastPitch = pfPitch;
  2915. m_pfLastSample = pfSamplePos;
  2916. return (dwI);
  2917. }
  2918. DWORD CDigitalAudio::MixMulti8Filter(
  2919. short *ppBuffer[],
  2920. DWORD dwBufferCount,
  2921. DWORD dwLength,
  2922. DWORD dwDeltaPeriod,
  2923. VFRACT vfDeltaVolume[],
  2924. VFRACT vfLastVolume[],
  2925. PFRACT pfDeltaPitch,
  2926. PFRACT pfSampleLength,
  2927. PFRACT pfLoopLength,
  2928. COEFF cfdK,
  2929. COEFF cfdB1,
  2930. COEFF cfdB2)
  2931. {
  2932. DWORD dwI, dwJ;
  2933. DWORD dwPosition;
  2934. long lMInterp;
  2935. long lM;
  2936. DWORD dwIncDelta = dwDeltaPeriod;
  2937. VFRACT dwFract;
  2938. char * pcWave = (char *) m_pnWave;
  2939. PFRACT pfSamplePos = m_pfLastSample;
  2940. PFRACT pfPitch = m_pfLastPitch;
  2941. PFRACT pfPFract = pfPitch << 8;
  2942. COEFF cfK = m_cfLastK;
  2943. COEFF cfB1 = m_cfLastB1;
  2944. COEFF cfB2 = m_cfLastB2;
  2945. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  2946. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  2947. DWORD dMM6[2];
  2948. for (dwI = 0; dwI < dwBufferCount; dwI++)
  2949. {
  2950. vfVolume[dwI] = vfLastVolume[dwI];
  2951. vfVFract[dwI] = vfVolume[dwI] << 8;
  2952. }
  2953. for (dwI = 0; dwI < dwLength; )
  2954. {
  2955. if (pfSamplePos >= pfSampleLength)
  2956. {
  2957. if (pfLoopLength)
  2958. pfSamplePos -= pfLoopLength;
  2959. else
  2960. break;
  2961. }
  2962. dwIncDelta--;
  2963. if (!dwIncDelta)
  2964. {
  2965. dwIncDelta = dwDeltaPeriod;
  2966. pfPFract += pfDeltaPitch;
  2967. pfPitch = pfPFract >> 8;
  2968. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2969. {
  2970. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  2971. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  2972. }
  2973. cfK += cfdK;
  2974. cfB1 += cfdB1;
  2975. cfB2 += cfdB2;
  2976. }
  2977. dwPosition = pfSamplePos >> 12;
  2978. dwFract = pfSamplePos & 0xFFF;
  2979. pfSamplePos += pfPitch;
  2980. lMInterp = pcWave[dwPosition]; // pcWave
  2981. lMInterp += ((pcWave[dwPosition + 1] - lMInterp) * dwFract) >> 12;
  2982. // Filter
  2983. //
  2984. lMInterp =
  2985. MulDiv(lMInterp, cfK, (1 << 30))
  2986. - MulDiv(m_lPrevSample, cfB1, (1 << 30))
  2987. + MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
  2988. m_lPrevPrevSample = m_lPrevSample;
  2989. m_lPrevSample = lMInterp;
  2990. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  2991. {
  2992. lM = lMInterp * vfVolume[dwJ];
  2993. lM >>= 5;
  2994. // Keep this around so we can use it to generate new assembly code (see below...)
  2995. #if 1
  2996. {
  2997. long x = ppBuffer[dwJ][dwI];
  2998. x += lM;
  2999. if (x != (short)x) {
  3000. if (x > 32767) x = 32767;
  3001. else x = -32768;
  3002. }
  3003. ppBuffer[dwJ][dwI] = (short)x;
  3004. }
  3005. #else
  3006. ppBuffer[dwJ][dwI] += (short) lM;
  3007. #ifdef i386
  3008. _asm{jno no_oflow}
  3009. ppBuffer[dwJ][dwI] = 0x7fff;
  3010. _asm{js no_oflow}
  3011. ppBuffer[dwJ][dwI] = (short) 0x8000;
  3012. no_oflow: ;
  3013. #endif
  3014. #endif
  3015. }
  3016. dwI++;
  3017. }
  3018. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3019. {
  3020. vfLastVolume[dwJ] = vfVolume[dwJ];
  3021. }
  3022. m_pfLastPitch = pfPitch;
  3023. m_pfLastSample = pfSamplePos;
  3024. return (dwI);
  3025. }
  3026. DWORD CDigitalAudio::MixMulti16(
  3027. short *ppBuffer[],
  3028. DWORD dwBufferCount,
  3029. DWORD dwLength,
  3030. DWORD dwDeltaPeriod,
  3031. VFRACT vfDeltaVolume[],
  3032. VFRACT vfLastVolume[],
  3033. PFRACT pfDeltaPitch,
  3034. PFRACT pfSampleLength,
  3035. PFRACT pfLoopLength)
  3036. {
  3037. DWORD dwI = 0;
  3038. DWORD dwJ = 0;
  3039. DWORD dwPosition = 0;
  3040. long lA = 0;//, lB;
  3041. long lM = 0;
  3042. long lMInterp = 0;
  3043. DWORD dwIncDelta = dwDeltaPeriod;
  3044. VFRACT dwFract;
  3045. short * pcWave = m_pnWave;
  3046. PFRACT pfSamplePos = m_pfLastSample;
  3047. PFRACT pfPitch = m_pfLastPitch;
  3048. PFRACT pfPFract = pfPitch << 8;
  3049. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  3050. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  3051. for (dwI = 0; dwI < dwBufferCount; dwI++)
  3052. {
  3053. vfVolume[dwI] = vfLastVolume[dwI];
  3054. vfVFract[dwI] = vfVolume[dwI] << 8;
  3055. }
  3056. for (dwI = 0; dwI < dwLength;)
  3057. {
  3058. if (pfSamplePos >= pfSampleLength)
  3059. {
  3060. if (pfLoopLength)
  3061. pfSamplePos -= pfLoopLength;
  3062. else
  3063. break;
  3064. }
  3065. dwIncDelta--;
  3066. if (!dwIncDelta)
  3067. {
  3068. dwIncDelta = dwDeltaPeriod;
  3069. pfPFract += pfDeltaPitch;
  3070. pfPitch = pfPFract >> 8;
  3071. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3072. {
  3073. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  3074. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  3075. }
  3076. }
  3077. dwPosition = pfSamplePos >> 12;
  3078. dwFract = pfSamplePos & 0xFFF;
  3079. pfSamplePos += pfPitch;
  3080. lA = (long) pcWave[dwPosition];
  3081. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  3082. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3083. {
  3084. lM = lMInterp * vfVolume[dwJ];
  3085. lM >>= 13; // Signal bumps up to 12 bits.
  3086. // Keep this around so we can use it to generate new assembly code (see below...)
  3087. #if 1
  3088. {
  3089. long x = ppBuffer[dwJ][dwI];
  3090. x += lM;
  3091. if (x != (short)x) {
  3092. if (x > 32767) x = 32767;
  3093. else x = -32768;
  3094. }
  3095. ppBuffer[dwJ][dwI] = (short)x;
  3096. }
  3097. #else
  3098. ppBuffer[dwJ][dwI] += (short) lM;
  3099. #ifdef i386
  3100. _asm{jno no_oflow}
  3101. ppBuffer[dwJ][dwI] = 0x7fff;
  3102. _asm{js no_oflow}
  3103. ppBuffer[dwJ][dwI] = (short) 0x8000;
  3104. no_oflow: ;
  3105. #endif
  3106. #endif
  3107. }
  3108. dwI++;
  3109. }
  3110. m_pfLastPitch = pfPitch;
  3111. m_pfLastSample = pfSamplePos;
  3112. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3113. {
  3114. vfLastVolume[dwJ] = vfVolume[dwJ];
  3115. }
  3116. return (dwI);
  3117. }
  3118. DWORD CDigitalAudio::MixMulti16Filter(
  3119. short *ppBuffer[],
  3120. DWORD dwBufferCount,
  3121. DWORD dwLength,
  3122. DWORD dwDeltaPeriod,
  3123. VFRACT vfDeltaVolume[],
  3124. VFRACT vfLastVolume[],
  3125. PFRACT pfDeltaPitch,
  3126. PFRACT pfSampleLength,
  3127. PFRACT pfLoopLength,
  3128. COEFF cfdK,
  3129. COEFF cfdB1,
  3130. COEFF cfdB2)
  3131. {
  3132. DWORD dwI, dwJ;
  3133. DWORD dwPosition;
  3134. long lA;//, lB;
  3135. long lM;
  3136. long lMInterp;
  3137. DWORD dwIncDelta = dwDeltaPeriod;
  3138. VFRACT dwFract;
  3139. short * pcWave = m_pnWave;
  3140. PFRACT pfSamplePos = m_pfLastSample;
  3141. PFRACT pfPitch = m_pfLastPitch;
  3142. PFRACT pfPFract = pfPitch << 8;
  3143. COEFF cfK = m_cfLastK;
  3144. COEFF cfB1 = m_cfLastB1;
  3145. COEFF cfB2 = m_cfLastB2;
  3146. DWORD dMM6[2]; // Handle filter...
  3147. VFRACT vfVolume[MAX_DAUD_CHAN]; // = m_vfLastLVolume;
  3148. VFRACT vfVFract[MAX_DAUD_CHAN]; // = vfVolume << 8; // Keep high res version around.
  3149. for (dwI = 0; dwI < dwBufferCount; dwI++)
  3150. {
  3151. vfVolume[dwI] = vfLastVolume[dwI];
  3152. vfVFract[dwI] = vfVolume[dwI] << 8;
  3153. }
  3154. for (dwI = 0; dwI < dwLength;)
  3155. {
  3156. if (pfSamplePos >= pfSampleLength)
  3157. {
  3158. if (pfLoopLength)
  3159. pfSamplePos -= pfLoopLength;
  3160. else
  3161. break;
  3162. }
  3163. dwIncDelta--;
  3164. if (!dwIncDelta)
  3165. {
  3166. dwIncDelta = dwDeltaPeriod;
  3167. pfPFract += pfDeltaPitch;
  3168. pfPitch = pfPFract >> 8;
  3169. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3170. {
  3171. vfVFract[dwJ] += vfDeltaVolume[dwJ];
  3172. vfVolume[dwJ] = vfVFract[dwJ] >> 8;
  3173. }
  3174. cfK += cfdK;
  3175. cfB1 += cfdB1;
  3176. cfB2 += cfdB2;
  3177. }
  3178. dwPosition = pfSamplePos >> 12;
  3179. dwFract = pfSamplePos & 0xFFF;
  3180. pfSamplePos += pfPitch;
  3181. lA = (long) pcWave[dwPosition];
  3182. lMInterp = (((pcWave[dwPosition+1] - lA) * dwFract) >> 12) + lA;
  3183. // Filter
  3184. //
  3185. // z = k*s - b1*z1 - b2*b2
  3186. // We store the negative of b1 in the table, so we flip the sign again by
  3187. // adding here
  3188. //
  3189. lMInterp =
  3190. MulDiv(lMInterp, cfK, (1 << 30))
  3191. + MulDiv(m_lPrevSample, cfB1, (1 << 30))
  3192. - MulDiv(m_lPrevPrevSample, cfB2, (1 << 30));
  3193. //>>>>>>>>>>>> removed dp
  3194. #if 0
  3195. if (lMInterp < -32767) lMInterp = -32767;
  3196. else if (lMInterp > 32767) lMInterp = 32767;
  3197. #endif
  3198. m_lPrevPrevSample = m_lPrevSample;
  3199. m_lPrevSample = lMInterp;
  3200. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3201. {
  3202. lM = lMInterp * vfVolume[dwJ];
  3203. lM >>= 13; // Signal bumps up to 12 bits.
  3204. // Keep this around so we can use it to generate new assembly code (see below...)
  3205. #if 1
  3206. {
  3207. long x = ppBuffer[dwJ][dwI];
  3208. x += lM;
  3209. if (x != (short)x) {
  3210. if (x > 32767) x = 32767;
  3211. else x = -32768;
  3212. }
  3213. ppBuffer[dwJ][dwI] = (short)x;
  3214. }
  3215. #else
  3216. ppBuffer[dwJ][dwI] += (short) lM;
  3217. #ifdef i386
  3218. _asm{jno no_oflow}
  3219. ppBuffer[dwJ][dwI] = 0x7fff;
  3220. _asm{js no_oflow}
  3221. ppBuffer[dwJ][dwI] = (short) 0x8000;
  3222. no_oflow: ;
  3223. #endif
  3224. #endif
  3225. }
  3226. dwI++;
  3227. }
  3228. m_pfLastPitch = pfPitch;
  3229. m_pfLastSample = pfSamplePos;
  3230. m_cfLastK = cfK;
  3231. m_cfLastB1 = cfB1;
  3232. m_cfLastB2 = cfB2;
  3233. for (dwJ = 0; dwJ < dwBufferCount; dwJ++)
  3234. {
  3235. vfLastVolume[dwJ] = vfVolume[dwJ];
  3236. }
  3237. return (dwI);
  3238. }
  3239. #endif // }