//========= Copyright © Valve Corporation, All rights reserved. ============// #include "snd_ps3_mp3dec.h" #include "vjobs/root.h" #include "filesystem_engine.h" #include "filesystem.h" #include #include "Mp3DecLibPpu.h" #include "mp3declib.h" extern IVJobs * g_pVJobs; Mp3DecMgr g_mp3dec[NUMBER_OF_MP3_DECODER_SLOTS]; void Mp3DecMgr::Init() { g_pVJobs->Register( this ); } #if 0 // def _DEBUG #undef Assert #define Assert(X) do{if( !(X) ) {Msg("Assertion failed\n%s:%d\n%s\n", __FILE__, __LINE__, #X); DebuggerBreak();};}while(0) #endif void Mp3DecMgr::Shutdown() { Finish(); g_pVJobs->Unregister( this ); } void Mp3DecMgr::OnVjobsInit() // gets called after m_pRoot was created and assigned { COMPILE_TIME_ASSERT( !( ( JOBLET_COUNT - 1 ) & JOBLET_COUNT ) ); // JOBLET_COUNT must be a power of 2 V_memset( &m_jobWorker, 0, sizeof( m_jobWorker ) ); V_memset( m_joblets, 0, sizeof( m_joblets ) ); m_nMaxSpuWorkers = 1; // Use only one SPU at a time (that way each job is implicitly dependent on the previous jobs to be completed). m_jobWorker.header = *m_pRoot->m_pJobMp3Dec; m_jobWorker.header.sizeScratch = 114 * 1024 / 16; m_jobWorker.header.sizeInOrInOut = job_mp3dec::IOBUFFER_SIZE; job_mp3dec::JobParams_t *pParams = GetWorkerJobParams(); m_pWorkerParams = pParams; m_nDecoderSize = 0; m_nAllocatedNotKicked = 0; int nError = cellMP3IntegratedDecoderGetRequiredSize( &m_nDecoderSize ); if( nError ) { Warning( "cannot initialize mp3 decoding, error %d\n", nError ); } pParams->m_eaDecoder = MemAlloc_AllocAligned( m_nDecoderSize, 128 ); pParams->m_eaJoblets = m_joblets; cellMP3IntegratedDecoderInit( pParams->m_eaDecoder, m_nDecoderSize ); m_nDecoderCrc = CRC32_ProcessSingleBuffer( pParams->m_eaDecoder, m_nDecoderSize ); if( CommandLine()->FindParm( "-msftest" ) ) { const char * pTestFiles[] = { "amb_muffled_lo_mach_14.mp3", "amb_muffled_lo_mach_15.mp3", "amb_muffled_lo_mach_17.mp3", "portal_4000_degrees_kelvin.msf", "portal_android_hell.msf", "portal_no_cake_for_you.msf", "portal_party_escort.msf", "portal_procedural_jiggle_bone.msf", "portal_self_esteem_fund.msf", "portal_still_alive.msf", "portal_stop_what_you_are_doing.msf", "portal_subject_name_here.msf", "portal_taste_of_blood.msf", "portal_you_cant_escape_you_know.msf", "portal_youre_not_a_good_person.msf", "error.mps", "amb_metal_imp_warehouse_39.mps", "aa2.msf", "aa1.msf" }; for( int i = 0; i < ARRAYSIZE( pTestFiles ); i++ ) { const char * pTestFile = pTestFiles[i]; //g_pFileSystem->FileExists( pTestFile ); //StartMsfTest( pTestFile, 0 ); //StartMsfTest( pTestFile, "_skip1_noctx.wav", 3 ); StartMsfTest( pTestFile, "_dec.wav", 1 ); //StartMsfTest( pTestFile, "_ctx0.wav", 1 ); } } } // This function skips ID3 tag version2.x uint8* SkipId3Tag( uint8 * pMp3header ) { uint32 tmp; uint32 size=0; if ( (pMp3header[0]!='I') || (pMp3header[1]!='D') || (pMp3header[2]!='3') ) { Msg( "ID3 tag v2.x not found\n" ); while( *pMp3header != 0xFF || ( pMp3header[1] & 0xE0 ) != 0xE0 ) ++pMp3header; // sync up with mp3 bitstream return pMp3header; // ID3 tag not found, these are probably MP3 frames going right here } for(uint i=0;i<4;i++) { tmp = (pMp3header[i+6] & 0x7f); tmp<<=(7*(3-i)); size|=tmp; } // skip 10 bytes of the header, and the size is the size of the data after the header (the tag) return pMp3header + 10 + size; } void ValidateMp3( uint8 * pMp3Frames, uint8 * pMp3FramesEnd ) { uint nPadding[2] = {0,0}; for( uint8* p = pMp3Frames; p < pMp3FramesEnd; ) { Mp3FrameHeader * pHdr = ( Mp3FrameHeader * )p; Assert( pHdr->CheckSync() ); Mp3FrameHeader *pNext = ( Mp3FrameHeader * )( p + pHdr->CorrectFrameLength( pMp3FramesEnd ) ); if( uintp( pNext + 1 ) >= uintp( pMp3FramesEnd ) ) break; Assert( pNext->CheckSync() ); nPadding[pHdr->GetPadding()]++; p = (uint8*)pNext; } Msg( "MP3 validation: %d padded, %d unpadded\n", nPadding[1], nPadding[0] ); } void Mp3DecMgr::StartMsfTest( const char * pInputFile, const char *pExt, int nMode ) { FileHandle_t fh = g_pFileSystem->OpenEx( pInputFile, "rb", FSOPEN_NEVERINPACK ); if( fh == FILESYSTEM_INVALID_HANDLE ) return; CUtlBuffer msf; if( !g_pFileSystem->ReadToBuffer( fh, msf ) ) { Warning("Cannot load test msf file\n"); return; } g_pFileSystem->Close( fh ); uint8 * pMp3Frames = (uint8*)msf.Base(), *pMp3FramesEnd = pMp3Frames + msf.Size(); { CellMSMSFHeader * pMsfHeader = (CellMSMSFHeader *)msf.Base(); if( pMsfHeader->header[0] == 'M' && pMsfHeader->header[1] == 'S' && pMsfHeader->header[2] == 'F' ) { if( pMsfHeader->compressionType != CELL_MS_MP3 ) { Warning("Invalid compression type %d\n", pMsfHeader->compressionType ); } // one of the samples comments that 0x10 is the bit responsible for -loop option at the time of compilation. Documentation states "bit 4" , so it means all the bits in documentation are little-endian. // See MSWrapResource.cpp : 173 (romaji) MSF fairu sakuseiji ni -loop wotsuketakadoukano handan Msg( "Testing %d-channel MP3 @%dHz, %d loops %s %s %s\n", pMsfHeader->channels, pMsfHeader->sampleRate, pMsfHeader->miscInfo & 0xF, pMsfHeader->miscInfo & 0x10 ? "-loop":"(no -loop)", pMsfHeader->miscInfo & 0x20 ? "VBR":"CBR", pMsfHeader->miscInfo & 0x40 ? "joint stereo":"" ); pMp3Frames = (uint8*)( pMsfHeader + 1 ); } else { pMp3Frames = SkipId3Tag( (uint8*)msf.Base() ); } } char outputFile[256]; V_snprintf( outputFile, sizeof( outputFile ), "/app_home/%s", pInputFile ); V_strncpy( V_strrchr( outputFile, '.' ), pExt, sizeof( outputFile ) ); FILE *fOut = ( nMode & 1 ) ? fopen( outputFile, "wb" ) : NULL; RiffWavHeader hdr; if( fOut ) { fwrite( &hdr, 1, sizeof( hdr ), fOut ); } else { V_strcpy( outputFile, "" ); } while( GetWorkerJobParams()->m_nWorkers > 1 ) sys_timer_usleep(100); m_nMaxSpuWorkers = 1; // so that context is serialized Mp3DecContext * pMp3Context = NULL; pMp3Context = (Mp3DecContext * )MemAlloc_AllocAligned( sizeof( Mp3DecContext ), 128 ); if( pMp3Context ) pMp3Context->Init(); Msg("Decompressing %s into %s\n", pInputFile, outputFile ); // CUtlVector wav; // wav.EnsureCapacity( 32*1024*1024 ); uint nTotalSamples = 0; CUtlVector arrWave; uint nChannelFlags = Mp3DecJoblet::FLAG_STEREO; ValidateMp3( pMp3Frames, pMp3FramesEnd ); if( nMode == 4 ) return; Mp3FrameHeader * pMp3FrameHeader = ( Mp3FrameHeader *)pMp3Frames; uint nSamplingRate = pMp3FrameHeader->GetFrameSamplingRate(); float flBitrateSum = 0; uint nBitrateFrames = 0; uint nTickStart = __mftb(); uint nBatchFrames = 1; EnterWorkerLock(); const uint nMaxSkipFrames = 1; uint nSkipFrames = 0; uint8 * pPreviousFrame[nMaxSkipFrames+1]; while( pMp3Frames < pMp3FramesEnd ) { if( nMode & 2 ) { const uint nMaxParallelJoblets = JOBLET_COUNT; arrWave.SetCount( nBatchFrames * nMaxParallelJoblets * 0x901 ); Mp3DecJoblet *pDec[nMaxParallelJoblets]; uint nDecCount = 0; for( uint i = 0; i < JOBLET_COUNT; ++i ) { Assert( !m_joblets[i].IsAllocated() ); } for( nDecCount = 0; nDecCount < nMaxParallelJoblets; ++nDecCount ) { pPreviousFrame[0] = pMp3Frames; if( pMp3Frames + 4 >= pMp3FramesEnd ) break; uint nFrameLength = 0, nBatchedFrames = 0; uint8 * pLastFrame = pMp3Frames; while( nBatchedFrames < nBatchFrames ) { Mp3FrameHeader* pFrame = ( Mp3FrameHeader* )( pMp3Frames + nFrameLength ); if( !pFrame->CheckSync() || pMp3Frames + nFrameLength > pMp3FramesEnd ) break; pLastFrame = pMp3Frames + nFrameLength; nFrameLength += pFrame->CorrectFrameLength( pMp3FramesEnd ); flBitrateSum += pFrame->GetBitrateKbps(); nBitrateFrames ++; nBatchedFrames ++; } if( nFrameLength == 0 ) break; pDec[nDecCount] = NewDecode( nChannelFlags | Mp3DecJoblet::FLAG_LITTLE_ENDIAN | Mp3DecJoblet::FLAG_FULL_MP3_FRAMES_ONLY ); pDec[nDecCount]->m_eaMp3 = pMp3Context ? pMp3Frames : pPreviousFrame[nSkipFrames]; pDec[nDecCount]->m_eaMp3End = pMp3Frames + nFrameLength; pDec[nDecCount]->m_eaWave = arrWave.Base() + nDecCount * 0x901 * nBatchFrames; pDec[nDecCount]->m_eaWaveEnd = arrWave.Base() + nDecCount * 0x901 * nBatchFrames + 0x900 * nBatchFrames; pDec[nDecCount]->m_eaContext = pMp3Context; pDec[nDecCount]->m_nSkipSamples = pMp3Context ? 0 : nSkipFrames * 0x480; pMp3Frames += nFrameLength; KickPending(); for( uint i = nMaxSkipFrames; i-->0; ) pPreviousFrame[i+1] = pPreviousFrame[i]; nSkipFrames = MIN( nMaxSkipFrames, nSkipFrames + 1 ); } if( nDecCount == 0 ) break;// finished for( uint i = 0 ; i < nDecCount; ++i ) { Wait( pDec[i] ); Assert( pDec[i]->m_nFlags & pDec[i]->FLAG_DECODE_COMPLETE ); Assert( pDec[i]->m_eaWavePut == pDec[i]->m_eaWaveEnd || pDec[i]->m_eaWavePut == pDec[i]->m_eaWave || i + 1 == nDecCount ); uint nSamplesDecoded = pDec[i]->m_eaWavePut - pDec[i]->m_eaWave; Assert( nSamplesDecoded <= 0x900 * nBatchFrames ); nTotalSamples += nSamplesDecoded; if( fOut ) { fwrite( pDec[i]->m_eaWave, ( uintp( pDec[i]->m_eaWavePut ) - uintp( pDec[i]->m_eaWave ) ) & -2, 1, fOut ); } DeleteDecode( pDec[i] ); } for( uint i = 0; i < JOBLET_COUNT; ++i ) { Assert( !m_joblets[i].IsAllocated() ); } } else { Mp3DecJoblet *pDec = NewDecode( nChannelFlags | Mp3DecJoblet::FLAG_LITTLE_ENDIAN /*| Mp3DecJoblet::FLAG_FULL_MP3_FRAMES_ONLY*/ ); arrWave.SetCount( 0x900 ); uint nFrameSize = ((Mp3FrameHeader*)pMp3Frames)->CorrectFrameLength( pMp3FramesEnd ); uint8 * pFrameCopy = new uint8[ nFrameSize ]; V_memcpy( pFrameCopy, pMp3Frames, nFrameSize ); Msg("Decoding %u-byte frame @%p..", nFrameSize, pMp3Frames ); pDec->m_eaMp3 = pFrameCopy; pDec->m_eaMp3End = pFrameCopy + nFrameSize; pDec->m_eaWave = arrWave.Base(); pDec->m_eaWaveEnd = arrWave.Base() + arrWave.Count(); pDec->m_eaContext = pMp3Context; KickPending(); Wait( pDec ); nChannelFlags = pDec->m_nFlags & Mp3DecJoblet::FLAGS_MONO_OR_STEREO; // choose whichever (mono or stereo) the job decoded uint nSamplesDecoded = pDec->m_eaWavePut - pDec->m_eaWave; Msg( "%d chan, %d samples\n", nChannelFlags, nSamplesDecoded / nChannelFlags ); nTotalSamples += nSamplesDecoded; if( fOut ) { fwrite( arrWave.Base(), ( uintp( pDec->m_eaWavePut ) - uintp( pDec->m_eaWave ) ) & -2, 1, fOut ); } pMp3Frames += pDec->m_eaMp3Get - pDec->m_eaMp3; DeleteDecode( pDec ); delete[]pFrameCopy; if( pDec->m_nFlags & pDec->FLAG_DECODE_ERROR ) { Warning("Mp3 Decoder Error\n"); break; } if( pDec->m_eaWavePut <= pDec->m_eaWave ) { break; // nothing was decoded } } } float flBitrate = nBitrateFrames ? flBitrateSum / nBitrateFrames : 0; LeaveWorkerLock(); if( pMp3Context ) MemAlloc_FreeAligned( pMp3Context ); const char * pszSampleCh = "mono"; uint nChannelCount = 1; if( nChannelFlags & Mp3DecJoblet::FLAG_STEREO ) { pszSampleCh = "stereo"; nChannelCount = 2; } nTotalSamples /= nChannelCount; float flSeconds = (nTotalSamples) / float( nSamplingRate ); uint nTicksTotal = __mftb() - nTickStart; if( fOut ) Msg( "Writing %dHz %.1f second Riff Wave File, %d %s samples\n", nSamplingRate, flSeconds, nTotalSamples, pszSampleCh ); else { Msg( "%d %s samples @%dHz @%.1f kbps = %.1f seconds in %.2f ms, ratio = %.2f%%\n", nTotalSamples, pszSampleCh, nSamplingRate, flBitrate, flSeconds, nTicksTotal / 79800.0f, 100 * ( nTicksTotal / 79800000.0f ) / ( flSeconds ) ); } hdr.Init( nTotalSamples, nChannelCount, 16, nSamplingRate ); if( fOut ) { fseek( fOut, 0, SEEK_SET ); fwrite( &hdr, 1, sizeof( hdr ), fOut ); fclose( fOut ); } } void Mp3DecMgr::OnVjobsShutdown() // gets called before m_pRoot is about to be destructed and NULL'ed { Finish(); job_mp3dec::JobParams_t *pParams = GetWorkerJobParams(); if( m_nDecoderCrc != CRC32_ProcessSingleBuffer( pParams->m_eaDecoder, m_nDecoderSize ) ) { Warning( "MP3 Decoder is corrupted; please tell Sergiy\n" ); } MemAlloc_FreeAligned( pParams->m_eaDecoder ); } Mp3DecJoblet * Mp3DecMgr::NewDecode( uint nFlags ) { job_mp3dec::JobParams_t *pParams = GetWorkerJobParams(); // there are JOBLET_COUNT joblets in the ring buffer. The first m_nAllocatedNotKicked (counting from m_nGet index) // are already taken (allocated) and we cannot wait for them or allocate them because they aren't even kicked yet // So somebody later will kick them, but for now we have to let them be. // Cycle through the remaining joblets and find one that's free and allocate it (return a pointer to it) int nSleepCounter = 0; for( uint i = m_nAllocatedNotKicked; i < JOBLET_COUNT; ++i ) { // let's try to see if this joblet with this index is available for allocation uint nTryAllocateIndex = pParams->m_nPut + i; while( nTryAllocateIndex - pParams->m_nGet >= JOBLET_COUNT ) { // this joblet is in previous ring of the ring buffer. SPU is working on it. Perhaps it's free, // but even if it is, we need to let SPU realize that and advance m_nGet pointer. // this joblet logically is not allocated yet, but it occupies the same space in memory as one of the joblets previously allocated // in the previous ring of the joblet ring buffer. // there are probably workers working on this joblet, but by this line they may have exited. If they did // then the queue must be empty (put == get) Assert( pParams->m_nWorkers || pParams->m_nPut == pParams->m_nGet ); // at all times, put and get must be within this distance (the size of the ring buffer) Assert( pParams->m_nPut - pParams->m_nGet <= JOBLET_COUNT ); // wait for SPU to advance get pointer sys_timer_usleep( 60 ); ++nSleepCounter; } // if this joblet is free, we can now use it because SPU is past this point Mp3DecJoblet *pNextJoblet = &m_joblets[ nTryAllocateIndex & ( JOBLET_COUNT - 1 ) ]; #ifdef _DEBUG Mp3DecJoblet jobletState; __sync(); // try to flush pending DMA's, to increase the probability of atomic copy (still not guaranteed, but it's for debugging only) V_memcpy( &jobletState, pNextJoblet, sizeof( jobletState ) ); #endif if( !pNextJoblet->IsAllocated() ) { // we found a joblet that is not allocated and is not worked by SPU. Return it. V_memset( pNextJoblet, 0, sizeof( *pNextJoblet ) ); pNextJoblet->m_nFlags = nFlags | Mp3DecJoblet::FLAG_ALLOCATED; m_nAllocatedNotKicked++; // we'll need to kick this joblet return pNextJoblet; } else { // we found a joblet that spu finished working on, but it's not free. We must skip it. m_nAllocatedNotKicked++; } } if ( nSleepCounter >= 8 ) { // If we had to wait more than 0.5 ms, let's print something... Warning( " Mp3DecMgr::NewDecode() waited for more than %f\n", (float)nSleepCounter * 0.060f ); } return NULL; } // kick ALL pending allocated not kicked jobs void Mp3DecMgr::KickPending() { if( !m_nAllocatedNotKicked ) return; job_mp3dec::JobParams_t *pParams = GetWorkerJobParams(); __lwsync(); // order the previous writes with submitting this joblet for processing uint nNewPut = cellAtomicAdd32( &pParams->m_nPut, m_nAllocatedNotKicked ) + m_nAllocatedNotKicked; m_nAllocatedNotKicked = 0; Assert( nNewPut == pParams->m_nPut ); __lwsync(); // order joblet submission with starting another job uint nWorkersNeeded = MIN( m_nMaxSpuWorkers, ( nNewPut - pParams->m_nGet ) / 8 + 1 ); while( pParams->m_nWorkers < nWorkersNeeded ) { cellAtomicIncr32( ( uint32* ) &pParams->m_nWorkers ); // spawn another worker m_pRoot->m_queuePortSound.pushJob( &m_jobWorker.header, sizeof( m_jobWorker ), 0, 0 ); } } void Mp3DecMgr::DeleteDecode( Mp3DecJoblet *pJoblet ) { // free it up! Wait( pJoblet ); Assert( pJoblet->IsComplete() && pJoblet->IsAllocated() ); pJoblet->m_nFlags = 0; // it's free now, even if it's in the list of joblets to process }