csgo/cstrike15_src/materialsystem/ps3gcm/SpuMgr_spu.cpp


								//================ Copyright (c) Valve Corporation. All Rights Reserved. ===========================

								//

								//

								//

								//==================================================================================================


								//--------------------------------------------------------------------------------------------------

								// Headers

								//--------------------------------------------------------------------------------------------------


								#include "SpuMgr_spu.h"

								#include <cell/atomic.h>


								#ifndef _CERT

								#include <libsn_spu.h>

								#endif


								#include <stdlib.h>

								#include <string.h>


								//--------------------------------------------------------------------------------------------------

								// Globals

								//--------------------------------------------------------------------------------------------------


								// singleton instance

								SpuMgr gSpuMgr __attribute__((aligned(128)));

								unsigned char gUnalignedMem[16] __attribute__((aligned(16)));

								MemCpyHeader gMemCpyHeader __attribute__((aligned(16)));


								//--------------------------------------------------------------------------------------------------

								//

								//--------------------------------------------------------------------------------------------------


								void SPU_memcpy( void *pBuf1, void *pBuf2 )

								{

									uint32_t header;


									gSpuMgr.ReadMailbox( &header );


									gSpuMgr.MemcpyLock();


									gSpuMgr.DmaGetUNSAFE( &gMemCpyHeader, header, sizeof( MemCpyHeader ), 0 );

									gSpuMgr.DmaDone( 0x1 );


									DEBUG_ERROR( ( gMemCpyHeader.src & 0xf ) == 0 );


									uint32_t sizeAligned;

									uint32_t sizeAlignedDown;

									uint32_t dstAlignedDown;

									uint32_t offset;


									memcpy( gUnalignedMem, gMemCpyHeader.cacheLine, 16 );


									while ( gMemCpyHeader.size > 8192 )

									{

										sizeAligned		= 8192;

										dstAlignedDown	= SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );

										offset			= gMemCpyHeader.dst - dstAlignedDown;


										gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );

										gSpuMgr.DmaDone( 0x1 );


										if ( offset )

										{

											memcpy( pBuf2, gUnalignedMem, offset );

										}


										memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, sizeAligned );


										gSpuMgr.DmaSync();

										gSpuMgr.DmaPut( dstAlignedDown, pBuf2, SPUMGR_ALIGN_UP( sizeAligned + offset, 16 ), 0 );

										gSpuMgr.DmaDone( 0x1 );


										sizeAlignedDown = SPUMGR_ALIGN_DOWN( sizeAligned + offset, 16 );

										memcpy( gUnalignedMem, (void *) ( (uint32_t) pBuf2 + sizeAlignedDown ), 16 );


										gMemCpyHeader.size -= sizeAligned;


										gMemCpyHeader.dst += 8192;

										gMemCpyHeader.src += 8192;

									}


									sizeAligned		= SPUMGR_ALIGN_UP( gMemCpyHeader.size, 16 );

									dstAlignedDown	= SPUMGR_ALIGN_DOWN( gMemCpyHeader.dst, 16 );

									offset			= gMemCpyHeader.dst - dstAlignedDown;


									gSpuMgr.DmaGetUNSAFE( pBuf1, gMemCpyHeader.src, sizeAligned, 0 );

									gSpuMgr.DmaDone( 0x1 );


									if ( offset )

									{

										memcpy( pBuf2, gUnalignedMem, offset );

									}


									memcpy( (void *) ( (uint32_t) pBuf2 + offset ), pBuf1, gMemCpyHeader.size );


									sizeAligned = SPUMGR_ALIGN_UP( gMemCpyHeader.size + offset, 16 );


									gSpuMgr.DmaSync();

									gSpuMgr.DmaPut( dstAlignedDown, pBuf2, sizeAligned, 0 );

									gSpuMgr.DmaDone( 0x1 );


									if ( gMemCpyHeader.blocking )

									{

										gSpuMgr.WriteMailbox( 0 );

									}


									gSpuMgr.MemcpyUnlock();

								}


								//--------------------------------------------------------------------------------------------------

								// DmaCheckAlignment

								//

								//   	Checks restrictions specified in SpuMgr::DmaGet

								//--------------------------------------------------------------------------------------------------


								int DmaCheckAlignment(uint32_t src, uint32_t dest, uint32_t size)

								{

								#if !defined( _CERT )


									uint32_t align = size;

									bool error = false;


									if (size >= 16 && ((size & 0xf) == 0))

									{

										align = 16;

									}

									else if (size == 8 || size == 4 || size == 2 || size == 1)

									{

										error = ((src & 0xF) != (dest & 0xF));

									}

									else

									{

										error = true;  // bad size

									}


									return (!error && src && dest &&

											SPUMGR_IS_ALIGNED(src, align) &&

											SPUMGR_IS_ALIGNED(dest, align));


								#else //!FINAL

									return 1;

								#endif //!FINAL

								}


								//--------------------------------------------------------------------------------------------------

								//

								//--------------------------------------------------------------------------------------------------


								int SpuMgr::Init()

								{

									// Start the decrementer since it is possible

									// that it has not been started by default


									const unsigned int kEventDec = 0x20;


									// Disable the decrementer event.

									unsigned int maskEvents = spu_readch(SPU_RdEventStatMask);

									spu_writech(SPU_WrEventMask, maskEvents & ~kEventDec);


									// Acknowledge any pending events and stop the decrementer.

									spu_writech(SPU_WrEventAck, kEventDec);


									// Write the decrementer value to start the decrementer.

									unsigned int decValue = spu_readch(SPU_RdDec);

									spu_writech(SPU_WrDec, decValue);


									// Enable events.

									spu_writech(SPU_WrEventMask, maskEvents | kEventDec);


									// Reset byte count

									ResetBytesTransferred();


									// reset malloc count

									m_mallocCount = 0;


									// Read the effective address of the SPU locks.

									ReadMailbox( &m_lockEA );

									ReadMailbox( &m_memcpyLockEA );


									return 0;

								}


								//--------------------------------------------------------------------------------------------------

								//

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::Term()

								{

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::DmaGet

								//

								// DmaGet       - alignment and size checking

								// DmaGetUNSAFE - no alignment or size checking (but will assert in debug)

								// _DmaGet  - handles badly aligned dma's, should be a private member really (doesn't handle small dma's)

								//

								// DMA restrictions

								//		An MFC supports naturally aligned DMA transfer sizes of 1, 2, 4,

								//		8, and 16 bytes and multiples of 16 bytes

								//		Furthermore, if size is 1, 2, 4, or 8 bytes then lower 4 bits

								//		of LS and EA must match

								//

								//	Note:

								//		Peak performance is achieved for transfers in which both the EA and

								//		the LSA are 128-byte aligned and the size of the transfer is a multiple

								//		of 128 bytes.

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::DmaGetUNSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)

								{

									DEBUG_ERROR( ea < 0xd0000000 );

									DEBUG_ERROR( ea );

									DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));


									// do the dma

									while (size)

									{

										uint32_t dmaSize = 0x4000;

										dmaSize = (size < dmaSize)? size: dmaSize;

										size -= dmaSize;


										// kick off dma

										spu_mfcdma64( (void*)ls, 0, ea, dmaSize, tagId, MFC_GET_CMD);

										m_numDMATransfers++;


										ls = (void*)((uint32_t)ls + dmaSize);

										ea += dmaSize;

									}


									// add up bytes transferred

									m_bytesRequested   += size;

									m_bytesTransferred += size;

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::_DmaGet

								//

								//	Internal function - do not call this directly

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::_DmaGet(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)

								{

									uint32_t unaligned = false;

									uint32_t eaAligned = (uint32_t)ea;

									uint32_t sizeAligned = size;

									uint32_t lsAligned = (uint32_t)ls;

									uint32_t sizeOffset = 0;

									char *pTempBuff = NULL;


									// check if src is unaligned

									if (eaAligned & 0xF)

									{

										eaAligned = eaAligned & ~0xF;	// round down

										sizeOffset = ea - eaAligned;

										sizeAligned += sizeOffset;

										unaligned = true;

									}


									// check if size is unaligned

									if (sizeAligned & 0xF)

									{

										sizeAligned = (sizeAligned + 0xF) & ~0xF;	// round up

										unaligned = true;

									}


									// if we have adjusted the size, or if ls is unaligned,

									// we need to alloc temp buffer

									if (unaligned || (lsAligned & 0xF))

									{

										pTempBuff = (char*)MemAlign(0x10, sizeAligned);


										lsAligned = (uint32_t)pTempBuff;

										unaligned = true;

									}


									// add up bytes transferred, for informational purposes

									m_bytesRequested += size;

									m_bytesTransferred += sizeAligned;


									// do the dma

									while (sizeAligned)

									{

										uint32_t dmaSize = 0x4000;

										dmaSize = (sizeAligned < dmaSize)? sizeAligned: dmaSize;

										sizeAligned -= dmaSize;


										// kick off dma

										spu_mfcdma64( (void*)lsAligned, 0, eaAligned, dmaSize, tagId, MFC_GET_CMD);

										m_numDMATransfers++;


										lsAligned += dmaSize;

										eaAligned += dmaSize;

									}


									if (unaligned)

									{

										// block for now till dma done because we do the memcpy right here

										DmaDone(1 << tagId);


										// copy data over

										memcpy(ls, pTempBuff + sizeOffset, size);


										// free temp buff

										Free(pTempBuff);

									}

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::DmaGetSAFE

								//

								//	DMA restrictions (look at SpuMgr::DmaGetUNSAFE in this file) are

								//	handled transparently by this function

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::DmaGetSAFE(void *ls, uint32_t ea, uint32_t size, uint32_t tagId)

								{

									DEBUG_ERROR( ea );


									if( size < 0x10 )

									{

										// lowest 4 bits of address have to match regardless, &

										// size can only be 1, 2, 4 or 8 B


										if( size==0x1 || size==0x2 || size==0x4 || size==0x8 )

										{

											if( ((uint32_t)ls&0xF == ea&0xF) )

											{

												DmaGetUNSAFE(ls,ea,size,tagId);

											}

											else

											{

												// small get not aligned within a 16B block

												_DmaGet(ls,ea,size,tagId);

											}

										}

										else

										{

											// if < 16B can only get 1,2,4 or 8B

											_DmaGet(ls,ea,size,tagId);

										}

									}

									else

									{

										if( (!(size & 0xF)) &&			// has to be multiple of 16B, &

											(((uint32_t)ls&0xF)==0) &&	// ea and ls have to be 16B aligned

											((ea&0xF)==0)  )

										{

											// alignment is okay just dma

											DmaGetUNSAFE(ls,ea,size,tagId);

										}

										else

										{

											_DmaGet(ls,ea,size,tagId);

										}

									}

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::DmaPut

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::DmaPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)

								{

									DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) );	// valid ea

									DEBUG_ERROR( (uint32_t)ls < 0x40000 );		// valid ls

									DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));


									// do the dma

									while (size)

									{

										uint32_t dmaSize = 0x4000;

										dmaSize = (size < dmaSize)? size: dmaSize;

										size -= dmaSize;


										// initiate dma to ppu

										spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);


										ls = (void*)((uint32_t)ls + dmaSize);

										ea += dmaSize;

									}

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::DmaSmallPut

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::DmaSmallPut(uint32_t ea, void *ls, uint32_t size, uint32_t tagId)

								{

									DEBUG_ERROR( (ea!=0) && (ea<0xd0000000) );	// valid ea

									DEBUG_ERROR( (uint32_t)ls < 0x40000 );		// valid ls

									DEBUG_ERROR(DmaCheckAlignment((uint32_t)ls, ea, size));


									uint32_t dmaSize = 1;


									if ((size % 8) == 0)

									{

										dmaSize = 8;

									}

									else if ((size % 4) == 0)

									{

										dmaSize = 4;

									}

									else if ((size % 2) == 0)

									{

										dmaSize = 2;

									}


									while (size)

									{

										size -= dmaSize;


										// initiate dma to ppu

										spu_mfcdma64( ls, 0, ea, dmaSize, tagId, MFC_PUT_CMD);


										ls = (void*)((uint32_t)ls + dmaSize);

										ea += dmaSize;

									}

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::DmaGetlist

								//

								// Gather data scattered around main mem, MFC will run through the list, and place the elements (based on ea address and size)

								// contiguously in ls.

								//

								// NOTE: if an individual list element size is <16B, the data will still be dma'd but the proceeding element will be placed

								// on the next 16B boundary. So it is possible to get lots of small elements, but you will be left with gaps in ls.

								//

								// ls - ls address of where items will be placed (contiguously)

								// lsList - ls address of actual list

								// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements // sizeof(DMAList))

								// tagId - works the same way as regular DMA's

								//

								// Alignment and Size Restrictions:

								// -ls and lsList must be 8B aligned

								// -size must be a multiple of 8B (sizeof(DMAList))

								// -no more than 2048 list elements

								//

								// light error checking right now

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::DmaGetList(void *ls, DMAList *pLS_List, uint32_t sizeList, uint32_t tagId)

								{

									DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 );	// ls address must be 8B aligned

									DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 );			// ea so aligned also, due to offset within 16B alignment restrictions

									DEBUG_ERROR( (sizeList&0x7) == 0 );				// list size is a multiple of 8B

									DEBUG_ERROR( sizeList<(2048*sizeof(DMAList)));	// no more than 2048 list elements


									// initiate dma list

									spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_GETL_CMD );

								}


								//--------------------------------------------------------------------------------------------------

								// SpuMgr::DmaGPutlist

								//

								// Scatter data held contiguously in ls, to main mem

								//

								//   ls - ls address of where items exist (contiguously) to be scattered back to main mem

								// lsList - ls address of actual list

								// sizeList - size of list in bytes (each list element is 8B (sizeof(DMAList)), so sizeList should be number of list elements * sizeof(DMAList))

								//   tagId - works the same way as regular DMA's

								//

								// Alignment and Size Restrictions:

								// ls and lsList must be 8B aligned, size must be a multiple of 8B (sizeof(DMAList))

								//

								//   light error checking right now

								//--------------------------------------------------------------------------------------------------


								void SpuMgr::DmaPutList(void *ls, DMAList* pLS_List, uint32_t sizeList, uint32_t tagId)

								{

									DEBUG_ERROR( ((uint32_t)pLS_List&0x7) == 0 );	// ls address must be 8B aligned

									DEBUG_ERROR( ((uint32_t)ls&0x7) == 0 );			// ea so aligned also, due to offset within 16B alignment restrictions

									DEBUG_ERROR( (sizeList&0x7) == 0 );				// list size is a multiple of 8B

									DEBUG_ERROR( sizeList<(2048*sizeof(DMAList)));	// no more than 2048 list elements


									// initiate dma list

									spu_mfcdma64( ls, 0, (uint32_t)pLS_List, sizeList, tagId, MFC_PUTL_CMD );

								}