//================ Copyright (c) Valve Corporation. All Rights Reserved. =========================== // // // //================================================================================================== //-------------------------------------------------------------------------------------------------- // Headers //-------------------------------------------------------------------------------------------------- #include "SpuMgr_spu.h" #include #ifndef _CERT #include #endif #include #include //-------------------------------------------------------------------------------------------------- // Globals //-------------------------------------------------------------------------------------------------- // singleton instance SpuMgr gSpuMgr __attribute__((aligned(128))); unsigned char gUnalignedMem[16] __attribute__((aligned(16))); MemCpyHeader gMemCpyHeader __attribute__((aligned(16))); //-------------------------------------------------------------------------------------------------- // //-------------------------------------------------------------------------------------------------- void SPU_memcpy( void *pBuf1, void *pBuf2 ) { uint32_t header; gSpuMgr.ReadMailbox( &header ); gSpuMgr.MemcpyLock(); gSpuMgr.DmaGetUNSAFE( &gMemCpyHeader, header, sizeof( MemCpyHeader ), 0 ); gSpuMgr.DmaDone( 0x1 ); DEBUG_ERROR( ( gMemCpyHeader.src & 0xf ) == 0 ); uint32_t sizeAligned; uint32_t sizeAlignedDown; uint32_t dstAlignedDown; uint32_t offset; memcpy( gUnalignedMem, gMemCpyHeader.cacheLine, 16 ); while ( gMemCpyHeader.size > 8192 ) { sizeAligned = 8192; dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 ); offset = gMemCpyHeader.dst - dstAlignedDown; gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 ); gSpuMgr.DmaDone( 0x1 ); if ( offset ) { memcpy( pBuf2, gUnalignedMem, offset ); } memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, sizeAligned ); gSpuMgr.DmaSync(); gSpuMgr.DmaPut( dstAlignedDown, pBuf2, SPUMGR_ALIGN_UP( sizeAligned + offset, 16 ), 0 ); gSpuMgr.DmaDone( 0x1 ); sizeAlignedDown = SPUMGR_ALIGN_DOWN( sizeAligned + offset, 16 ); memcpy( gUnalignedMem, (void *) ( (uint32_t) pBuf2 + sizeAlignedDown ), 16 ); gMemCpyHeader.size -= sizeAligned; gMemCpyHeader.dst += 8192; gMemCpyHeader.src += 8192; } sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size, 16 ); dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 ); offset = gMemCpyHeader.dst - dstAlignedDown; gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 ); gSpuMgr.DmaDone( 0x1 ); if ( offset ) { memcpy( pBuf2, gUnalignedMem, offset ); } memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, gMemCpyHeader.size ); sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size + offset, 16 ); gSpuMgr.DmaSync(); gSpuMgr.DmaPut( dstAlignedDown, pBuf2, sizeAligned, 0 ); gSpuMgr.DmaDone( 0x1 ); if ( gMemCpyHeader.blocking ) { gSpuMgr.WriteMailbox( 0 ); } gSpuMgr.MemcpyUnlock(); } //-------------------------------------------------------------------------------------------------- // DmaCheckAlignment // // Checks restrictions specified in SpuMgr::DmaGet //-------------------------------------------------------------------------------------------------- int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size) { #if !defined( _CERT ) uint32_t align = size; bool error = false; if (size >= 16 && ((size & 0xf) == 0)) { align = 16; } else if (size == 8 || size == 4 || size == 2 || size == 1) { error = ((src & 0xF) != (dest & 0xF)); } else { error = true; // bad size } return (!error && src && dest && SPUMGR_IS_ALIGNED(src, align) && SPUMGR_IS_ALIGNED(dest, align)); #else //!FINAL return 1; #endif //!FINAL } //-------------------------------------------------------------------------------------------------- // //-------------------------------------------------------------------------------------------------- int SpuMgr::Init() { // Start the decrementer since it is possible // that it has not been started by default const unsigned int kEventDec = 0x20; // Disable the decrementer event. unsigned int maskEvents = spu_readch(SPU_RdEventStatMask); spu_writech(SPU_WrEventMask, maskEvents & ~kEventDec); // Acknowledge any pending events and stop the decrementer. spu_writech(SPU_WrEventAck, kEventDec); // Write the decrementer value to start the decrementer. unsigned int decValue = spu_readch(SPU_RdDec); spu_writech(SPU_WrDec, decValue); // Enable events. spu_writech(SPU_WrEventMask, maskEvents | kEventDec); // Reset byte count ResetBytesTransferred(); // reset malloc count m_mallocCount = 0; // Read the effective address of the SPU locks. ReadMailbox( &m_lockEA ); ReadMailbox( &m_memcpyLockEA ); return 0; } //-------------------------------------------------------------------------------------------------- // //-------------------------------------------------------------------------------------------------- void SpuMgr::Term() { } //-------------------------------------------------------------------------------------------------- // SpuMgr::DmaGet // // DmaGet - alignment and size checking // DmaGetUNSAFE - no alignment or size checking (but will assert in debug) // _DmaGet - handles badly aligned dma's, should be a private member really (doesn't handle small dma's) // // DMA restrictions // An MFC supports naturally aligned DMA transfer sizes of 1, 2, 4, // 8, and 16 bytes and multiples of 16 bytes // Furthermore, if size is 1, 2, 4, or 8 bytes then lower 4 bits // of LS and EA must match // // Note: // Peak performance is achieved for transfers in which both the EA and // the LSA are 128-byte aligned and the size of the transfer is a multiple // of 128 bytes. //-------------------------------------------------------------------------------------------------- void SpuMgr::DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId) { DEBUG_ERROR( ea < 0xd0000000 ); DEBUG_ERROR( ea ); DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size)); // do the dma while (size) { uint32_t dmaSize = 0x4000; dmaSize = (size < dmaSize)? size: dmaSize; size -= dmaSize; // kick off dma spu_mfcdma64( (void*)ls, 0, ea, dmaSize, tagId, MFC_GET_CMD); m_numDMATransfers++; ls = (void*)((uint32_t)ls + dmaSize); ea += dmaSize; } // add up bytes transferred m_bytesRequested += size; m_bytesTransferred += size; } //-------------------------------------------------------------------------------------------------- // SpuMgr::_DmaGet // // Internal function - do not call this directly //-------------------------------------------------------------------------------------------------- void SpuMgr::_DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId) { uint32_t unaligned = false; uint32_t eaAligned = (uint32_t)ea; uint32_t sizeAligned = size; uint32_t lsAligned = (uint32_t)ls; uint32_t sizeOffset = 0; char *pTempBuff = NULL; // check if src is unaligned if (eaAligned & 0xF) { eaAligned = eaAligned & ~0xF; // round down sizeOffset = ea - eaAligned; sizeAligned += sizeOffset; unaligned = true; } // check if size is unaligned if (sizeAligned & 0xF) { sizeAligned = (sizeAligned + 0xF) & ~0xF; // round up unaligned = true; } // if we have adjusted the size, or if ls is unaligned, // we need to alloc temp buffer if (unaligned || (lsAligned & 0xF)) { pTempBuff = (char*)MemAlign(0x10, sizeAligned); lsAligned = (uint32_t)pTempBuff; unaligned = true; } // add up bytes transferred, for informational purposes m_bytesRequested += size; m_bytesTransferred += sizeAligned; // do the dma while (sizeAligned) { uint32_t dmaSize = 0x4000; dmaSize = (sizeAligned < dmaSize)? sizeAligned: dmaSize; sizeAligned -= dmaSize; // kick off dma spu_mfcdma64( (void*)lsAligned, 0, eaAligned, dmaSize, tagId, MFC_GET_CMD); m_numDMATransfers++; lsAligned += dmaSize; eaAligned += dmaSize; } if (unaligned) { // block for now till dma done because we do the memcpy right here DmaDone(1 << tagId); // copy data over memcpy(ls, pTempBuff + sizeOffset, size); // free temp buff Free(pTempBuff); } } //-------------------------------------------------------------------------------------------------- // SpuMgr::DmaGetSAFE // // DMA restrictions (look at SpuMgr::DmaGetUNSAFE in this file) are // handled transparently by this function //-------------------------------------------------------------------------------------------------- void SpuMgr::DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId) { DEBUG_ERROR( ea ); if( size < 0x10 ) { // lowest 4 bits of address have to match regardless, & // size can only be 1, 2, 4 or 8 B if( size==0x1 || size==0x2 || size==0x4 || size==0x8 ) { if( ((uint32_t)ls&0xF == ea&0xF) ) { DmaGetUNSAFE(ls,ea,size,tagId); } else { // small get not aligned within a 16B block _DmaGet(ls,ea,size,tagId); } } else { // if < 16B can only get 1,2,4 or 8B _DmaGet(ls,ea,size,tagId); } } else { if( (!(size & 0xF)) && // has to be multiple of 16B, & (((uint32_t)ls&0xF)==0) && // ea and ls have to be 16B aligned ((ea&0xF)==0) ) { // alignment is okay just dma DmaGetUNSAFE(ls,ea,size,tagId); } else { _DmaGet(ls,ea,size,tagId); } } } //-------------------------------------------------------------------------------------------------- // SpuMgr::DmaPut //-------------------------------------------------------------------------------------------------- void SpuMgr::DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId) { DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size)); // do the dma while (size) { uint32_t dmaSize = 0x4000; dmaSize = (size < dmaSize)? size: dmaSize; size -= dmaSize; // initiate dma to ppu spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD); ls = (void*)((uint32_t)ls + dmaSize); ea += dmaSize; } } //-------------------------------------------------------------------------------------------------- // SpuMgr::DmaSmallPut //-------------------------------------------------------------------------------------------------- void SpuMgr::DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId) { DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size)); uint32_t dmaSize = 1; if ((size % 8) == 0) { dmaSize = 8; } else if ((size % 4) == 0) { dmaSize = 4; } else if ((size % 2) == 0) { dmaSize = 2; } while (size) { size -= dmaSize; // initiate dma to ppu spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD); ls = (void*)((uint32_t)ls + dmaSize); ea += dmaSize; } } //-------------------------------------------------------------------------------------------------- // SpuMgr::DmaGetlist // // Gather data scattered around main mem, MFC will run through the list, and place the elements (based on ea address and size) // contiguously in ls. // // NOTE: if an individual list element size is <16B, the data will still be dma'd but the proceeding element will be placed // on the next 16B boundary. So it is possible to get lots of small elements, but you will be left with gaps in ls. // // ls - ls address of where items will be placed (contiguously) // lsList - ls address of actual list // sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements // sizeof(DMAList)) // tagId - works the same way as regular DMA's // // Alignment and Size Restrictions: // -ls and lsList must be 8B aligned // -size must be a multiple of 8B (sizeof(DMAList)) // -no more than 2048 list elements // // light error checking right now //-------------------------------------------------------------------------------------------------- void SpuMgr::DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId) { DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements // initiate dma list spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_GETL_CMD ); } //-------------------------------------------------------------------------------------------------- // SpuMgr::DmaGPutlist // // Scatter data held contiguously in ls, to main mem // // ls - ls address of where items exist (contiguously) to be scattered back to main mem // lsList - ls address of actual list // sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements * sizeof(DMAList)) // tagId - works the same way as regular DMA's // // Alignment and Size Restrictions: // ls and lsList must be 8B aligned, size must be a multiple of 8B (sizeof(DMAList)) // // light error checking right now //-------------------------------------------------------------------------------------------------- void SpuMgr::DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId) { DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements // initiate dma list spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_PUTL_CMD ); }