|
|
//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================
//
//
//
//==================================================================================================
//--------------------------------------------------------------------------------------------------
// Headers
//--------------------------------------------------------------------------------------------------
#include "SpuMgr_spu.h"
#include <cell/atomic.h>
#ifndef _CERT
#include <libsn_spu.h>
#endif
#include <stdlib.h>
#include <string.h>
//--------------------------------------------------------------------------------------------------
// Globals
//--------------------------------------------------------------------------------------------------
// singleton instance
SpuMgr gSpuMgr __attribute__((aligned(128))); unsigned char gUnalignedMem[16] __attribute__((aligned(16))); MemCpyHeader gMemCpyHeader __attribute__((aligned(16)));
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SPU_memcpy( void *pBuf1, void *pBuf2 ) { uint32_t header;
gSpuMgr.ReadMailbox( &header );
gSpuMgr.MemcpyLock();
gSpuMgr.DmaGetUNSAFE( &gMemCpyHeader, header, sizeof( MemCpyHeader ), 0 ); gSpuMgr.DmaDone( 0x1 );
DEBUG_ERROR( ( gMemCpyHeader.src & 0xf ) == 0 );
uint32_t sizeAligned; uint32_t sizeAlignedDown; uint32_t dstAlignedDown; uint32_t offset;
memcpy( gUnalignedMem, gMemCpyHeader.cacheLine, 16 );
while ( gMemCpyHeader.size > 8192 ) { sizeAligned = 8192; dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 ); offset = gMemCpyHeader.dst - dstAlignedDown;
gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 ); gSpuMgr.DmaDone( 0x1 );
if ( offset ) { memcpy( pBuf2, gUnalignedMem, offset ); }
memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, sizeAligned );
gSpuMgr.DmaSync(); gSpuMgr.DmaPut( dstAlignedDown, pBuf2, SPUMGR_ALIGN_UP( sizeAligned + offset, 16 ), 0 ); gSpuMgr.DmaDone( 0x1 );
sizeAlignedDown = SPUMGR_ALIGN_DOWN( sizeAligned + offset, 16 ); memcpy( gUnalignedMem, (void *) ( (uint32_t) pBuf2 + sizeAlignedDown ), 16 );
gMemCpyHeader.size -= sizeAligned;
gMemCpyHeader.dst += 8192; gMemCpyHeader.src += 8192; }
sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size, 16 ); dstAlignedDown = SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 ); offset = gMemCpyHeader.dst - dstAlignedDown;
gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 ); gSpuMgr.DmaDone( 0x1 );
if ( offset ) { memcpy( pBuf2, gUnalignedMem, offset ); }
memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, gMemCpyHeader.size );
sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size + offset, 16 );
gSpuMgr.DmaSync(); gSpuMgr.DmaPut( dstAlignedDown, pBuf2, sizeAligned, 0 ); gSpuMgr.DmaDone( 0x1 );
if ( gMemCpyHeader.blocking ) { gSpuMgr.WriteMailbox( 0 ); }
gSpuMgr.MemcpyUnlock(); }
//--------------------------------------------------------------------------------------------------
// DmaCheckAlignment
//
// Checks restrictions specified in SpuMgr::DmaGet
//--------------------------------------------------------------------------------------------------
int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size) { #if !defined( _CERT )
uint32_t align = size; bool error = false;
if (size >= 16 && ((size & 0xf) == 0)) { align = 16; } else if (size == 8 || size == 4 || size == 2 || size == 1) { error = ((src & 0xF) != (dest & 0xF)); } else { error = true; // bad size
}
return (!error && src && dest && SPUMGR_IS_ALIGNED(src, align) && SPUMGR_IS_ALIGNED(dest, align));
#else //!FINAL
return 1; #endif //!FINAL
}
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
int SpuMgr::Init() { // Start the decrementer since it is possible
// that it has not been started by default
const unsigned int kEventDec = 0x20;
// Disable the decrementer event.
unsigned int maskEvents = spu_readch(SPU_RdEventStatMask); spu_writech(SPU_WrEventMask, maskEvents & ~kEventDec);
// Acknowledge any pending events and stop the decrementer.
spu_writech(SPU_WrEventAck, kEventDec);
// Write the decrementer value to start the decrementer.
unsigned int decValue = spu_readch(SPU_RdDec); spu_writech(SPU_WrDec, decValue);
// Enable events.
spu_writech(SPU_WrEventMask, maskEvents | kEventDec);
// Reset byte count
ResetBytesTransferred();
// reset malloc count
m_mallocCount = 0;
// Read the effective address of the SPU locks.
ReadMailbox( &m_lockEA ); ReadMailbox( &m_memcpyLockEA );
return 0; }
//--------------------------------------------------------------------------------------------------
//
//--------------------------------------------------------------------------------------------------
void SpuMgr::Term() { }
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGet
//
// DmaGet - alignment and size checking
// DmaGetUNSAFE - no alignment or size checking (but will assert in debug)
// _DmaGet - handles badly aligned dma's, should be a private member really (doesn't handle small dma's)
//
// DMA restrictions
// An MFC supports naturally aligned DMA transfer sizes of 1, 2, 4,
// 8, and 16 bytes and multiples of 16 bytes
// Furthermore, if size is 1, 2, 4, or 8 bytes then lower 4 bits
// of LS and EA must match
//
// Note:
// Peak performance is achieved for transfers in which both the EA and
// the LSA are 128-byte aligned and the size of the transfer is a multiple
// of 128 bytes.
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId) { DEBUG_ERROR( ea < 0xd0000000 ); DEBUG_ERROR( ea ); DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
// do the dma
while (size) { uint32_t dmaSize = 0x4000; dmaSize = (size < dmaSize)? size: dmaSize; size -= dmaSize;
// kick off dma
spu_mfcdma64( (void*)ls, 0, ea, dmaSize, tagId, MFC_GET_CMD); m_numDMATransfers++;
ls = (void*)((uint32_t)ls + dmaSize); ea += dmaSize; }
// add up bytes transferred
m_bytesRequested += size; m_bytesTransferred += size; }
//--------------------------------------------------------------------------------------------------
// SpuMgr::_DmaGet
//
// Internal function - do not call this directly
//--------------------------------------------------------------------------------------------------
void SpuMgr::_DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId) { uint32_t unaligned = false; uint32_t eaAligned = (uint32_t)ea; uint32_t sizeAligned = size; uint32_t lsAligned = (uint32_t)ls; uint32_t sizeOffset = 0; char *pTempBuff = NULL;
// check if src is unaligned
if (eaAligned & 0xF) { eaAligned = eaAligned & ~0xF; // round down
sizeOffset = ea - eaAligned; sizeAligned += sizeOffset; unaligned = true; }
// check if size is unaligned
if (sizeAligned & 0xF) { sizeAligned = (sizeAligned + 0xF) & ~0xF; // round up
unaligned = true; }
// if we have adjusted the size, or if ls is unaligned,
// we need to alloc temp buffer
if (unaligned || (lsAligned & 0xF)) { pTempBuff = (char*)MemAlign(0x10, sizeAligned);
lsAligned = (uint32_t)pTempBuff; unaligned = true; }
// add up bytes transferred, for informational purposes
m_bytesRequested += size; m_bytesTransferred += sizeAligned;
// do the dma
while (sizeAligned) { uint32_t dmaSize = 0x4000; dmaSize = (sizeAligned < dmaSize)? sizeAligned: dmaSize; sizeAligned -= dmaSize;
// kick off dma
spu_mfcdma64( (void*)lsAligned, 0, eaAligned, dmaSize, tagId, MFC_GET_CMD); m_numDMATransfers++;
lsAligned += dmaSize; eaAligned += dmaSize; }
if (unaligned) { // block for now till dma done because we do the memcpy right here
DmaDone(1 << tagId);
// copy data over
memcpy(ls, pTempBuff + sizeOffset, size);
// free temp buff
Free(pTempBuff); } }
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGetSAFE
//
// DMA restrictions (look at SpuMgr::DmaGetUNSAFE in this file) are
// handled transparently by this function
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId) { DEBUG_ERROR( ea );
if( size < 0x10 ) { // lowest 4 bits of address have to match regardless, &
// size can only be 1, 2, 4 or 8 B
if( size==0x1 || size==0x2 || size==0x4 || size==0x8 ) { if( ((uint32_t)ls&0xF == ea&0xF) ) { DmaGetUNSAFE(ls,ea,size,tagId); } else { // small get not aligned within a 16B block
_DmaGet(ls,ea,size,tagId); } } else { // if < 16B can only get 1,2,4 or 8B
_DmaGet(ls,ea,size,tagId); } } else { if( (!(size & 0xF)) && // has to be multiple of 16B, &
(((uint32_t)ls&0xF)==0) && // ea and ls have to be 16B aligned
((ea&0xF)==0) ) { // alignment is okay just dma
DmaGetUNSAFE(ls,ea,size,tagId); } else { _DmaGet(ls,ea,size,tagId); } } }
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaPut
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId) { DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size)); // do the dma
while (size) { uint32_t dmaSize = 0x4000; dmaSize = (size < dmaSize)? size: dmaSize; size -= dmaSize;
// initiate dma to ppu
spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
ls = (void*)((uint32_t)ls + dmaSize); ea += dmaSize; } }
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaSmallPut
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId) { DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) ); // valid ea
DEBUG_ERROR( (uint32_t)ls < 0x40000 ); // valid ls
DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));
uint32_t dmaSize = 1;
if ((size % 8) == 0) { dmaSize = 8; } else if ((size % 4) == 0) { dmaSize = 4; } else if ((size % 2) == 0) { dmaSize = 2; }
while (size) { size -= dmaSize;
// initiate dma to ppu
spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);
ls = (void*)((uint32_t)ls + dmaSize); ea += dmaSize; } }
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGetlist
//
// Gather data scattered around main mem, MFC will run through the list, and place the elements (based on ea address and size)
// contiguously in ls.
//
// NOTE: if an individual list element size is <16B, the data will still be dma'd but the proceeding element will be placed
// on the next 16B boundary. So it is possible to get lots of small elements, but you will be left with gaps in ls.
//
// ls - ls address of where items will be placed (contiguously)
// lsList - ls address of actual list
// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements // sizeof(DMAList))
// tagId - works the same way as regular DMA's
//
// Alignment and Size Restrictions:
// -ls and lsList must be 8B aligned
// -size must be a multiple of 8B (sizeof(DMAList))
// -no more than 2048 list elements
//
// light error checking right now
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId) { DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
// initiate dma list
spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_GETL_CMD ); }
//--------------------------------------------------------------------------------------------------
// SpuMgr::DmaGPutlist
//
// Scatter data held contiguously in ls, to main mem
//
// ls - ls address of where items exist (contiguously) to be scattered back to main mem
// lsList - ls address of actual list
// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements * sizeof(DMAList))
// tagId - works the same way as regular DMA's
//
// Alignment and Size Restrictions:
// ls and lsList must be 8B aligned, size must be a multiple of 8B (sizeof(DMAList))
//
// light error checking right now
//--------------------------------------------------------------------------------------------------
void SpuMgr::DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId) { DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 ); // ls address must be 8B aligned
DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 ); // ea so aligned also, due to offset within 16B alignment restrictions
DEBUG_ERROR( (sizeList&0x7) == 0 ); // list size is a multiple of 8B
DEBUG_ERROR( sizeList<(2048*sizeof(DMAList))); // no more than 2048 list elements
// initiate dma list
spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_PUTL_CMD ); }
|