Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

481 lines
16 KiB

  1. //========= Copyright � Valve Corporation, All rights reserved. ============//
  2. #include "snd_ps3_mp3dec.h"
  3. #include "vjobs/root.h"
  4. #include "filesystem_engine.h"
  5. #include "filesystem.h"
  6. #include <cell/mstream.h>
  7. #include "Mp3DecLibPpu.h"
  8. #include "mp3declib.h"
  9. extern IVJobs * g_pVJobs;
  10. Mp3DecMgr g_mp3dec[NUMBER_OF_MP3_DECODER_SLOTS];
  11. void Mp3DecMgr::Init()
  12. {
  13. g_pVJobs->Register( this );
  14. }
  15. #if 0 // def _DEBUG
  16. #undef Assert
  17. #define Assert(X) do{if( !(X) ) {Msg("Assertion failed\n%s:%d\n%s\n", __FILE__, __LINE__, #X); DebuggerBreak();};}while(0)
  18. #endif
  19. void Mp3DecMgr::Shutdown()
  20. {
  21. Finish();
  22. g_pVJobs->Unregister( this );
  23. }
  24. void Mp3DecMgr::OnVjobsInit() // gets called after m_pRoot was created and assigned
  25. {
  26. COMPILE_TIME_ASSERT( !( ( JOBLET_COUNT - 1 ) & JOBLET_COUNT ) ); // JOBLET_COUNT must be a power of 2
  27. V_memset( &m_jobWorker, 0, sizeof( m_jobWorker ) );
  28. V_memset( m_joblets, 0, sizeof( m_joblets ) );
  29. m_nMaxSpuWorkers = 1; // Use only one SPU at a time (that way each job is implicitly dependent on the previous jobs to be completed).
  30. m_jobWorker.header = *m_pRoot->m_pJobMp3Dec;
  31. m_jobWorker.header.sizeScratch = 114 * 1024 / 16;
  32. m_jobWorker.header.sizeInOrInOut = job_mp3dec::IOBUFFER_SIZE;
  33. job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
  34. m_pWorkerParams = pParams;
  35. m_nDecoderSize = 0;
  36. m_nAllocatedNotKicked = 0;
  37. int nError = cellMP3IntegratedDecoderGetRequiredSize( &m_nDecoderSize );
  38. if( nError )
  39. {
  40. Warning( "cannot initialize mp3 decoding, error %d\n", nError );
  41. }
  42. pParams->m_eaDecoder = MemAlloc_AllocAligned( m_nDecoderSize, 128 );
  43. pParams->m_eaJoblets = m_joblets;
  44. cellMP3IntegratedDecoderInit( pParams->m_eaDecoder, m_nDecoderSize );
  45. m_nDecoderCrc = CRC32_ProcessSingleBuffer( pParams->m_eaDecoder, m_nDecoderSize );
  46. if( CommandLine()->FindParm( "-msftest" ) )
  47. {
  48. const char * pTestFiles[] = {
  49. "amb_muffled_lo_mach_14.mp3",
  50. "amb_muffled_lo_mach_15.mp3",
  51. "amb_muffled_lo_mach_17.mp3",
  52. "portal_4000_degrees_kelvin.msf",
  53. "portal_android_hell.msf",
  54. "portal_no_cake_for_you.msf",
  55. "portal_party_escort.msf",
  56. "portal_procedural_jiggle_bone.msf",
  57. "portal_self_esteem_fund.msf",
  58. "portal_still_alive.msf",
  59. "portal_stop_what_you_are_doing.msf",
  60. "portal_subject_name_here.msf",
  61. "portal_taste_of_blood.msf",
  62. "portal_you_cant_escape_you_know.msf",
  63. "portal_youre_not_a_good_person.msf",
  64. "error.mps",
  65. "amb_metal_imp_warehouse_39.mps",
  66. "aa2.msf",
  67. "aa1.msf"
  68. };
  69. for( int i = 0; i < ARRAYSIZE( pTestFiles ); i++ )
  70. {
  71. const char * pTestFile = pTestFiles[i];
  72. //g_pFileSystem->FileExists( pTestFile );
  73. //StartMsfTest( pTestFile, 0 );
  74. //StartMsfTest( pTestFile, "_skip1_noctx.wav", 3 );
  75. StartMsfTest( pTestFile, "_dec.wav", 1 );
  76. //StartMsfTest( pTestFile, "_ctx0.wav", 1 );
  77. }
  78. }
  79. }
  80. // This function skips ID3 tag version2.x
  81. uint8* SkipId3Tag( uint8 * pMp3header )
  82. {
  83. uint32 tmp;
  84. uint32 size=0;
  85. if ( (pMp3header[0]!='I') ||
  86. (pMp3header[1]!='D') ||
  87. (pMp3header[2]!='3')
  88. )
  89. {
  90. Msg( "ID3 tag v2.x not found\n" );
  91. while( *pMp3header != 0xFF || ( pMp3header[1] & 0xE0 ) != 0xE0 )
  92. ++pMp3header; // sync up with mp3 bitstream
  93. return pMp3header; // ID3 tag not found, these are probably MP3 frames going right here
  94. }
  95. for(uint i=0;i<4;i++) {
  96. tmp = (pMp3header[i+6] & 0x7f);
  97. tmp<<=(7*(3-i));
  98. size|=tmp;
  99. }
  100. // skip 10 bytes of the header, and the size is the size of the data after the header (the tag)
  101. return pMp3header + 10 + size;
  102. }
  103. void ValidateMp3( uint8 * pMp3Frames, uint8 * pMp3FramesEnd )
  104. {
  105. uint nPadding[2] = {0,0};
  106. for( uint8* p = pMp3Frames; p < pMp3FramesEnd; )
  107. {
  108. Mp3FrameHeader * pHdr = ( Mp3FrameHeader * )p;
  109. Assert( pHdr->CheckSync() );
  110. Mp3FrameHeader *pNext = ( Mp3FrameHeader * )( p + pHdr->CorrectFrameLength( pMp3FramesEnd ) );
  111. if( uintp( pNext + 1 ) >= uintp( pMp3FramesEnd ) )
  112. break;
  113. Assert( pNext->CheckSync() );
  114. nPadding[pHdr->GetPadding()]++;
  115. p = (uint8*)pNext;
  116. }
  117. Msg( "MP3 validation: %d padded, %d unpadded\n", nPadding[1], nPadding[0] );
  118. }
  119. void Mp3DecMgr::StartMsfTest( const char * pInputFile, const char *pExt, int nMode )
  120. {
  121. FileHandle_t fh = g_pFileSystem->OpenEx( pInputFile, "rb", FSOPEN_NEVERINPACK );
  122. if( fh == FILESYSTEM_INVALID_HANDLE )
  123. return;
  124. CUtlBuffer msf;
  125. if( !g_pFileSystem->ReadToBuffer( fh, msf ) )
  126. {
  127. Warning("Cannot load test msf file\n");
  128. return;
  129. }
  130. g_pFileSystem->Close( fh );
  131. uint8 * pMp3Frames = (uint8*)msf.Base(), *pMp3FramesEnd = pMp3Frames + msf.Size();
  132. {
  133. CellMSMSFHeader * pMsfHeader = (CellMSMSFHeader *)msf.Base();
  134. if( pMsfHeader->header[0] == 'M' && pMsfHeader->header[1] == 'S' && pMsfHeader->header[2] == 'F' )
  135. {
  136. if( pMsfHeader->compressionType != CELL_MS_MP3 )
  137. {
  138. Warning("Invalid compression type %d\n", pMsfHeader->compressionType );
  139. }
  140. // one of the samples comments that 0x10 is the bit responsible for -loop option at the time of compilation. Documentation states "bit 4" , so it means all the bits in documentation are little-endian.
  141. // See MSWrapResource.cpp : 173 (romaji) MSF fairu sakuseiji ni -loop wotsuketakadoukano handan
  142. Msg( "Testing %d-channel MP3 @%dHz, %d loops %s %s %s\n", pMsfHeader->channels, pMsfHeader->sampleRate, pMsfHeader->miscInfo & 0xF, pMsfHeader->miscInfo & 0x10 ? "-loop":"(no -loop)", pMsfHeader->miscInfo & 0x20 ? "VBR":"CBR", pMsfHeader->miscInfo & 0x40 ? "joint stereo":"" );
  143. pMp3Frames = (uint8*)( pMsfHeader + 1 );
  144. }
  145. else
  146. {
  147. pMp3Frames = SkipId3Tag( (uint8*)msf.Base() );
  148. }
  149. }
  150. char outputFile[256];
  151. V_snprintf( outputFile, sizeof( outputFile ), "/app_home/%s", pInputFile );
  152. V_strncpy( V_strrchr( outputFile, '.' ), pExt, sizeof( outputFile ) );
  153. FILE *fOut = ( nMode & 1 ) ? fopen( outputFile, "wb" ) : NULL;
  154. RiffWavHeader hdr;
  155. if( fOut )
  156. {
  157. fwrite( &hdr, 1, sizeof( hdr ), fOut );
  158. }
  159. else
  160. {
  161. V_strcpy( outputFile, "<null>" );
  162. }
  163. while( GetWorkerJobParams()->m_nWorkers > 1 )
  164. sys_timer_usleep(100);
  165. m_nMaxSpuWorkers = 1; // so that context is serialized
  166. Mp3DecContext * pMp3Context = NULL;
  167. pMp3Context = (Mp3DecContext * )MemAlloc_AllocAligned( sizeof( Mp3DecContext ), 128 );
  168. if( pMp3Context ) pMp3Context->Init();
  169. Msg("Decompressing %s into %s\n", pInputFile, outputFile );
  170. // CUtlVector<uint16> wav;
  171. // wav.EnsureCapacity( 32*1024*1024 );
  172. uint nTotalSamples = 0;
  173. CUtlVector<int16> arrWave;
  174. uint nChannelFlags = Mp3DecJoblet::FLAG_STEREO;
  175. ValidateMp3( pMp3Frames, pMp3FramesEnd );
  176. if( nMode == 4 )
  177. return;
  178. Mp3FrameHeader * pMp3FrameHeader = ( Mp3FrameHeader *)pMp3Frames;
  179. uint nSamplingRate = pMp3FrameHeader->GetFrameSamplingRate();
  180. float flBitrateSum = 0;
  181. uint nBitrateFrames = 0;
  182. uint nTickStart = __mftb();
  183. uint nBatchFrames = 1;
  184. EnterWorkerLock();
  185. const uint nMaxSkipFrames = 1;
  186. uint nSkipFrames = 0;
  187. uint8 * pPreviousFrame[nMaxSkipFrames+1];
  188. while( pMp3Frames < pMp3FramesEnd )
  189. {
  190. if( nMode & 2 )
  191. {
  192. const uint nMaxParallelJoblets = JOBLET_COUNT;
  193. arrWave.SetCount( nBatchFrames * nMaxParallelJoblets * 0x901 );
  194. Mp3DecJoblet *pDec[nMaxParallelJoblets];
  195. uint nDecCount = 0;
  196. for( uint i = 0; i < JOBLET_COUNT; ++i )
  197. {
  198. Assert( !m_joblets[i].IsAllocated() );
  199. }
  200. for( nDecCount = 0; nDecCount < nMaxParallelJoblets; ++nDecCount )
  201. {
  202. pPreviousFrame[0] = pMp3Frames;
  203. if( pMp3Frames + 4 >= pMp3FramesEnd )
  204. break;
  205. uint nFrameLength = 0, nBatchedFrames = 0;
  206. uint8 * pLastFrame = pMp3Frames;
  207. while( nBatchedFrames < nBatchFrames )
  208. {
  209. Mp3FrameHeader* pFrame = ( Mp3FrameHeader* )( pMp3Frames + nFrameLength );
  210. if( !pFrame->CheckSync() || pMp3Frames + nFrameLength > pMp3FramesEnd )
  211. break;
  212. pLastFrame = pMp3Frames + nFrameLength;
  213. nFrameLength += pFrame->CorrectFrameLength( pMp3FramesEnd );
  214. flBitrateSum += pFrame->GetBitrateKbps();
  215. nBitrateFrames ++;
  216. nBatchedFrames ++;
  217. }
  218. if( nFrameLength == 0 )
  219. break;
  220. pDec[nDecCount] = NewDecode( nChannelFlags | Mp3DecJoblet::FLAG_LITTLE_ENDIAN | Mp3DecJoblet::FLAG_FULL_MP3_FRAMES_ONLY );
  221. pDec[nDecCount]->m_eaMp3 = pMp3Context ? pMp3Frames : pPreviousFrame[nSkipFrames];
  222. pDec[nDecCount]->m_eaMp3End = pMp3Frames + nFrameLength;
  223. pDec[nDecCount]->m_eaWave = arrWave.Base() + nDecCount * 0x901 * nBatchFrames;
  224. pDec[nDecCount]->m_eaWaveEnd = arrWave.Base() + nDecCount * 0x901 * nBatchFrames + 0x900 * nBatchFrames;
  225. pDec[nDecCount]->m_eaContext = pMp3Context;
  226. pDec[nDecCount]->m_nSkipSamples = pMp3Context ? 0 : nSkipFrames * 0x480;
  227. pMp3Frames += nFrameLength;
  228. KickPending();
  229. for( uint i = nMaxSkipFrames; i-->0; )
  230. pPreviousFrame[i+1] = pPreviousFrame[i];
  231. nSkipFrames = MIN( nMaxSkipFrames, nSkipFrames + 1 );
  232. }
  233. if( nDecCount == 0 )
  234. break;// finished
  235. for( uint i = 0 ; i < nDecCount; ++i )
  236. {
  237. Wait( pDec[i] );
  238. Assert( pDec[i]->m_nFlags & pDec[i]->FLAG_DECODE_COMPLETE );
  239. Assert( pDec[i]->m_eaWavePut == pDec[i]->m_eaWaveEnd || pDec[i]->m_eaWavePut == pDec[i]->m_eaWave || i + 1 == nDecCount );
  240. uint nSamplesDecoded = pDec[i]->m_eaWavePut - pDec[i]->m_eaWave;
  241. Assert( nSamplesDecoded <= 0x900 * nBatchFrames );
  242. nTotalSamples += nSamplesDecoded;
  243. if( fOut )
  244. {
  245. fwrite( pDec[i]->m_eaWave, ( uintp( pDec[i]->m_eaWavePut ) - uintp( pDec[i]->m_eaWave ) ) & -2, 1, fOut );
  246. }
  247. DeleteDecode( pDec[i] );
  248. }
  249. for( uint i = 0; i < JOBLET_COUNT; ++i )
  250. {
  251. Assert( !m_joblets[i].IsAllocated() );
  252. }
  253. }
  254. else
  255. {
  256. Mp3DecJoblet *pDec = NewDecode( nChannelFlags | Mp3DecJoblet::FLAG_LITTLE_ENDIAN /*| Mp3DecJoblet::FLAG_FULL_MP3_FRAMES_ONLY*/ );
  257. arrWave.SetCount( 0x900 );
  258. uint nFrameSize = ((Mp3FrameHeader*)pMp3Frames)->CorrectFrameLength( pMp3FramesEnd );
  259. uint8 * pFrameCopy = new uint8[ nFrameSize ];
  260. V_memcpy( pFrameCopy, pMp3Frames, nFrameSize );
  261. Msg("Decoding %u-byte frame @%p..", nFrameSize, pMp3Frames );
  262. pDec->m_eaMp3 = pFrameCopy;
  263. pDec->m_eaMp3End = pFrameCopy + nFrameSize;
  264. pDec->m_eaWave = arrWave.Base();
  265. pDec->m_eaWaveEnd = arrWave.Base() + arrWave.Count();
  266. pDec->m_eaContext = pMp3Context;
  267. KickPending();
  268. Wait( pDec );
  269. nChannelFlags = pDec->m_nFlags & Mp3DecJoblet::FLAGS_MONO_OR_STEREO; // choose whichever (mono or stereo) the job decoded
  270. uint nSamplesDecoded = pDec->m_eaWavePut - pDec->m_eaWave;
  271. Msg( "%d chan, %d samples\n", nChannelFlags, nSamplesDecoded / nChannelFlags );
  272. nTotalSamples += nSamplesDecoded;
  273. if( fOut )
  274. {
  275. fwrite( arrWave.Base(), ( uintp( pDec->m_eaWavePut ) - uintp( pDec->m_eaWave ) ) & -2, 1, fOut );
  276. }
  277. pMp3Frames += pDec->m_eaMp3Get - pDec->m_eaMp3;
  278. DeleteDecode( pDec );
  279. delete[]pFrameCopy;
  280. if( pDec->m_nFlags & pDec->FLAG_DECODE_ERROR )
  281. {
  282. Warning("Mp3 Decoder Error\n");
  283. break;
  284. }
  285. if( pDec->m_eaWavePut <= pDec->m_eaWave )
  286. {
  287. break; // nothing was decoded
  288. }
  289. }
  290. }
  291. float flBitrate = nBitrateFrames ? flBitrateSum / nBitrateFrames : 0;
  292. LeaveWorkerLock();
  293. if( pMp3Context )
  294. MemAlloc_FreeAligned( pMp3Context );
  295. const char * pszSampleCh = "mono";
  296. uint nChannelCount = 1;
  297. if( nChannelFlags & Mp3DecJoblet::FLAG_STEREO )
  298. {
  299. pszSampleCh = "stereo";
  300. nChannelCount = 2;
  301. }
  302. nTotalSamples /= nChannelCount;
  303. float flSeconds = (nTotalSamples) / float( nSamplingRate );
  304. uint nTicksTotal = __mftb() - nTickStart;
  305. if( fOut )
  306. Msg( "Writing %dHz %.1f second Riff Wave File, %d %s samples\n", nSamplingRate, flSeconds, nTotalSamples, pszSampleCh );
  307. else
  308. {
  309. Msg( "%d %s samples @%dHz @%.1f kbps = %.1f seconds in %.2f ms, ratio = %.2f%%\n", nTotalSamples, pszSampleCh, nSamplingRate, flBitrate, flSeconds, nTicksTotal / 79800.0f, 100 * ( nTicksTotal / 79800000.0f ) / ( flSeconds ) );
  310. }
  311. hdr.Init( nTotalSamples, nChannelCount, 16, nSamplingRate );
  312. if( fOut )
  313. {
  314. fseek( fOut, 0, SEEK_SET );
  315. fwrite( &hdr, 1, sizeof( hdr ), fOut );
  316. fclose( fOut );
  317. }
  318. }
  319. void Mp3DecMgr::OnVjobsShutdown() // gets called before m_pRoot is about to be destructed and NULL'ed
  320. {
  321. Finish();
  322. job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
  323. if( m_nDecoderCrc != CRC32_ProcessSingleBuffer( pParams->m_eaDecoder, m_nDecoderSize ) )
  324. {
  325. Warning( "MP3 Decoder is corrupted; please tell Sergiy\n" );
  326. }
  327. MemAlloc_FreeAligned( pParams->m_eaDecoder );
  328. }
  329. Mp3DecJoblet * Mp3DecMgr::NewDecode( uint nFlags )
  330. {
  331. job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
  332. // there are JOBLET_COUNT joblets in the ring buffer. The first m_nAllocatedNotKicked (counting from m_nGet index)
  333. // are already taken (allocated) and we cannot wait for them or allocate them because they aren't even kicked yet
  334. // So somebody later will kick them, but for now we have to let them be.
  335. // Cycle through the remaining joblets and find one that's free and allocate it (return a pointer to it)
  336. int nSleepCounter = 0;
  337. for( uint i = m_nAllocatedNotKicked; i < JOBLET_COUNT; ++i )
  338. {
  339. // let's try to see if this joblet with this index is available for allocation
  340. uint nTryAllocateIndex = pParams->m_nPut + i;
  341. while( nTryAllocateIndex - pParams->m_nGet >= JOBLET_COUNT )
  342. {
  343. // this joblet is in previous ring of the ring buffer. SPU is working on it. Perhaps it's free,
  344. // but even if it is, we need to let SPU realize that and advance m_nGet pointer.
  345. // this joblet logically is not allocated yet, but it occupies the same space in memory as one of the joblets previously allocated
  346. // in the previous ring of the joblet ring buffer.
  347. // there are probably workers working on this joblet, but by this line they may have exited. If they did
  348. // then the queue must be empty (put == get)
  349. Assert( pParams->m_nWorkers || pParams->m_nPut == pParams->m_nGet );
  350. // at all times, put and get must be within this distance (the size of the ring buffer)
  351. Assert( pParams->m_nPut - pParams->m_nGet <= JOBLET_COUNT );
  352. // wait for SPU to advance get pointer
  353. sys_timer_usleep( 60 );
  354. ++nSleepCounter;
  355. }
  356. // if this joblet is free, we can now use it because SPU is past this point
  357. Mp3DecJoblet *pNextJoblet = &m_joblets[ nTryAllocateIndex & ( JOBLET_COUNT - 1 ) ];
  358. #ifdef _DEBUG
  359. Mp3DecJoblet jobletState;
  360. __sync(); // try to flush pending DMA's, to increase the probability of atomic copy (still not guaranteed, but it's for debugging only)
  361. V_memcpy( &jobletState, pNextJoblet, sizeof( jobletState ) );
  362. #endif
  363. if( !pNextJoblet->IsAllocated() )
  364. {
  365. // we found a joblet that is not allocated and is not worked by SPU. Return it.
  366. V_memset( pNextJoblet, 0, sizeof( *pNextJoblet ) );
  367. pNextJoblet->m_nFlags = nFlags | Mp3DecJoblet::FLAG_ALLOCATED;
  368. m_nAllocatedNotKicked++; // we'll need to kick this joblet
  369. return pNextJoblet;
  370. }
  371. else
  372. {
  373. // we found a joblet that spu finished working on, but it's not free. We must skip it.
  374. m_nAllocatedNotKicked++;
  375. }
  376. }
  377. if ( nSleepCounter >= 8 )
  378. {
  379. // If we had to wait more than 0.5 ms, let's print something...
  380. Warning( " Mp3DecMgr::NewDecode() waited for more than %f\n", (float)nSleepCounter * 0.060f );
  381. }
  382. return NULL;
  383. }
  384. // kick ALL pending allocated not kicked jobs
  385. void Mp3DecMgr::KickPending()
  386. {
  387. if( !m_nAllocatedNotKicked )
  388. return;
  389. job_mp3dec::JobParams_t *pParams = GetWorkerJobParams();
  390. __lwsync(); // order the previous writes with submitting this joblet for processing
  391. uint nNewPut = cellAtomicAdd32( &pParams->m_nPut, m_nAllocatedNotKicked ) + m_nAllocatedNotKicked;
  392. m_nAllocatedNotKicked = 0;
  393. Assert( nNewPut == pParams->m_nPut );
  394. __lwsync(); // order joblet submission with starting another job
  395. uint nWorkersNeeded = MIN( m_nMaxSpuWorkers, ( nNewPut - pParams->m_nGet ) / 8 + 1 );
  396. while( pParams->m_nWorkers < nWorkersNeeded )
  397. {
  398. cellAtomicIncr32( ( uint32* ) &pParams->m_nWorkers );
  399. // spawn another worker
  400. m_pRoot->m_queuePortSound.pushJob( &m_jobWorker.header, sizeof( m_jobWorker ), 0, 0 );
  401. }
  402. }
  403. void Mp3DecMgr::DeleteDecode( Mp3DecJoblet *pJoblet )
  404. {
  405. // free it up!
  406. Wait( pJoblet );
  407. Assert( pJoblet->IsComplete() && pJoblet->IsAllocated() );
  408. pJoblet->m_nFlags = 0; // it's free now, even if it's in the list of joblets to process
  409. }