Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

282 lines
8.8 KiB

//========== Copyright © Valve Corporation, All rights reserved. ========
#include "vjobs/pcring.h"
#include "vjobs/edgegeom_shared.h"
#ifdef SPU
#if EDGEGEOMRING_DEBUG_TRACE
#include "vjobs/edgegeomparams_shared.h" // debug
namespace job_edgegeom{ extern JobParams_t * g_lsJobParams; }
#endif
void CEdgeGeomRing::Test()
{
for( ;; )
{
cellDmaGetllar( this, m_eaThis, 0, 0 );
//if( 0 == __builtin_expect( spu_readch( MFC_RdAtomicStat ), 0 ) )
uint nStatusGetllar = cellDmaWaitAtomicStatus();(void)nStatusGetllar;
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter++;
#endif
cellDmaPutllc( this, m_eaThis, 0, 0 );
uint nStatusPutllc = cellDmaWaitAtomicStatus();
if( 0 == __builtin_expect( nStatusPutllc, 0 ) )
{
break; // succeeded
}
//VjobSpuLog("job_edgegeom Test failed(%d,%d)\n", nStatusGetllar, nStatusPutllc );
}
}
struct ALIGN16 FifoSnapshot_t
{
uint32 m_nSignal;
uint32 m_nPut;
uint32 m_nEnd;
uint32 m_nRingIncarnation;
void Snapshot( CEdgeGeomRing * pRing )
{
m_nPut = pRing->m_ibvbRing.m_nPut;
m_nEnd = pRing->m_ibvbRing.m_nEnd;
m_nRingIncarnation = pRing->m_nRingIncarnation;
// update the signal, since we're spinning
m_nSignal = cellDmaGetUint32( uintp( pRing->m_eaIbvbRingLabel ), DMATAG_SYNC, 0, 0 );
}
}
ALIGN16_POST;
uintp CEdgeGeomRing::Allocate( CellGcmContextData *pGcmCtx, uint nBytesUnaligned, uint nQueueTag )
{
// allocate in aligned chunks to make it all aligned
uint nBytesAligned = AlignValue( nBytesUnaligned, 32 );
AssertSpuMsg( nBytesAligned <= EDGEGEOMRING_MAX_ALLOCATION, "job_edgegeom allocates %u > %u from edge", nBytesAligned, EDGEGEOMRING_MAX_ALLOCATION );
uintp eaAllocation = 0;
SysFifo::PreparePutEnum_t nResult = SysFifo::PUT_PREPARE_FAILED;
uint nStatusGetllar, nStatusPutllc;
uint nSpins = 0, nAtomicCollisionEvent = 0, nWaitRsxSpins = 0;
uint nStoredSignal;
uint nSpuFlag = 1 << VjobSpuId();
union
{
FifoSnapshot_t fields;
__vector int vi4;
}snapshot;
snapshot.vi4 = (__vector int){-1,-1,-1,-1};
uint32 nJobId = job_edgegeom::g_lsJobParams->m_nEdgeJobId;
for(;; nSpins ++)
{
cellDmaGetllar( this, m_eaThis, 0, 0 );
//if( 0 == __builtin_expect( spu_readch( MFC_RdAtomicStat ), 0 ) )
nStatusGetllar = cellDmaWaitAtomicStatus();
{
// reservation succeeded
Assert( m_ibvbRing.m_nPut != 0xFFFFFFFF );
if( snapshot.fields.m_nPut == m_ibvbRing.m_nPut && snapshot.fields.m_nRingIncarnation == m_nRingIncarnation )
{
// the put didn't change, ring incarnation didn't change.
// Therefore, nobody changed this object - between
// last getllar, getting signal and this getllar,
// so it's atomic if we update the signal now.
m_ibvbRing.NotifySignalSafe( snapshot.fields.m_nSignal );
}
nResult = m_ibvbRing.PreparePut( nBytesAligned );
if( nResult != SysFifo::PUT_PREPARE_FAILED )
{
eaAllocation = m_ibvbRing.EaPut();
m_ibvbRing.Put( nBytesAligned );
nStoredSignal = m_ibvbRing.GetSignal();
m_ibvbRingSignal[nQueueTag] = nStoredSignal;
m_nAtomicCollisionSpins += nAtomicCollisionEvent;
m_nRsxWaitSpins += nWaitRsxSpins;
m_nUsedSpus |= nSpuFlag;
if( ( ( signed int )( nJobId - m_nMaxJobId[nQueueTag] ) ) > 0 )
{
m_nMaxJobId[nQueueTag] = nJobId;
}
if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
{
m_nRingIncarnation++; // we allocated, wrapping
}
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter++;
COMPILE_TIME_ASSERT( !( EDGEGEOMRING_DEBUG_TRACE & ( EDGEGEOMRING_DEBUG_TRACE - 1 ) ) );
m_nNextDebugTrace = ( m_nNextDebugTrace + 1 ) & ( EDGEGEOMRING_DEBUG_TRACE - 1 );
#endif
cellDmaPutllc( this, m_eaThis, 0, 0 );
nStatusPutllc = cellDmaWaitAtomicStatus();
if( 0 == __builtin_expect( nStatusPutllc, 0 ) )
{
break; // succeeded
}
}
else
{
nWaitRsxSpins ++;
}
}
snapshot.fields.Snapshot( this );
if( nSpins == 100000 && !IsCert() )
{
// VjobSpuLog( "job_edgegeom Allocate spinning: %d, %d, signal 0x%X;\n", nStatusGetllar, nStatusPutllc, nLastSeenSignal );
// DebuggerBreak();
}
}
if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
{
// need to clear cache
cellGcmSetInvalidateVertexCacheInline( pGcmCtx );
//VjobSpuLog( "job_edgegeom Allocate wrapped ring, invalidated vertex cache\n" );
}
else
{
Assert( nResult == SysFifo::PUT_PREPARED_NOWRAP );
}
Assert( nStoredSignal == m_ibvbRing.GetSignal() );
//VjobSpuLog( "alloc %X, signal %X, prev6 signal:%X, pcring put %x end %x\n", eaAllocation, nStoredSignal, m_ibvbRingSignal[(nTag-1)&3], m_ibvbRing.m_nPut, m_ibvbRing.m_nEnd );
#if EDGEGEOMRING_DEBUG_TRACE
if( m_eaDebugTrace && m_enableDebugTrace )
{
EdgeGeomDebugTrace_t trace;
trace.m_nAllocResult = (uint8)nResult;
trace.m_nQueueTag = (uint8)job_edgegeom::g_lsJobParams->m_nQueueTag;
trace.m_nJobId = job_edgegeom::g_lsJobParams->m_nEdgeJobId;
trace.m_nPut = m_ibvbRing.m_nPut;
trace.m_nEnd = m_ibvbRing.m_nEnd;
trace.m_eaEdgeGeomJts = job_edgegeom::g_lsJobParams->m_eaEdgeGeomJts;
for( uint i = 0; i < EDGEGEOMRING_JOBQUEUE_TAG_COUNT; ++i )
trace.m_nTagSignal[i] = m_ibvbRingSignal[i];
VjobDmaPutf( &trace, uintp( m_eaDebugTrace + m_nNextDebugTrace ), sizeof( trace ), VJOB_IOBUFFER_DMATAG, 0, 0 );
VjobWaitTagStatusAll( 1 << VJOB_IOBUFFER_DMATAG );
}
#endif
return eaAllocation;
}
#else
void CEdgeGeomRing::Init( void* eaBuffer, uint nBufferSize, uint nIoOffsetDelta, void * eaLocalBaseAddress, uint nLabel )
{
COMPILE_TIME_ASSERT( sizeof( CEdgeGeomRing_Mutable ) <= 128 ); // we need to fit into 128 bytes so that atomics work
m_ibvbRing.Init( (uintp)eaBuffer, nBufferSize );
m_eaLocalBaseAddress = (uint) eaLocalBaseAddress;
m_nIoOffsetDelta = nIoOffsetDelta;
m_nIbvbRingLabel = nLabel;
m_eaIbvbRingLabel = cellGcmGetLabelAddress( nLabel );
*m_eaIbvbRingLabel = m_ibvbRing.GetSignal();
m_ibvbRingSignal[0] = m_ibvbRing.GetSignal();
for( uint i = 0; i < EDGEGEOMRING_JOBQUEUE_TAG_COUNT; ++i )
{
m_ibvbRingSignal[i] = m_ibvbRingSignal[0];
}
V_memset( m_nMaxJobId, 0xFF, sizeof( m_nMaxJobId ) );
m_eaThis = (uint) this;
m_nDebuggerBreakMask = 0;
m_nAtomicCollisionSpins = 0;
m_nRsxWaitSpins = 0;
m_nRingIncarnation = 0;
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter = 0;
m_eaDebugTrace = NULL;
m_eaDebugTrace = ( EdgeGeomDebugTrace_t* )MemAlloc_AllocAligned( sizeof( EdgeGeomDebugTrace_t ) * EDGEGEOMRING_DEBUG_TRACE, 16 * 16 * 16 );
m_nNextDebugTrace = 0;
m_enableDebugTrace = true;
#endif
}
void CEdgeGeomRing::Shutdown()
{
#if EDGEGEOMRING_DEBUG_TRACE
MemAlloc_FreeAligned( m_eaDebugTrace );
#endif
}
void CEdgeGeomFeeder::Init( uint nIbvbRingSize )
{
m_nJobQueueTag = 0;
m_nSpawnedJobsWithTag = 0;
m_nTotalEdgeGeomJobCounter = 0;
m_nSpawnedJobsWithTagReserveAllocate = 0;
m_nIbvbRingSize = nIbvbRingSize;
}
void CEdgeGeomRing::Test()
{
#if EDGEGEOMRING_DEBUG_TRACE
m_nUseCounter++;
#endif
}
uintp CEdgeGeomRing::Allocate( CellGcmContextData *pGcmCtx, uint nBytesUnaligned, uint nQueueTag )
{
// this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible!
DebuggerBreak(); // this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible!
Warning( "this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible\n" );
// allocate in aligned chunks to make it all aligned
uint nBytesAligned = AlignValue( nBytesUnaligned, 32 );
AssertSpuMsg( nBytesAligned <= EDGEGEOMRING_MAX_ALLOCATION, "job_edgegeom allocates %u > %u from edge", nBytesAligned, EDGEGEOMRING_MAX_ALLOCATION );
uint nLastSeenSignal = m_ibvbRing.GetInvalidSignal();
uintp eaAllocation = 0;
for(;;)
{
V_memcpy( this, (void*)m_eaThis, sizeof( *this ) );
// emulate: reservation succeeded
if( nLastSeenSignal != m_ibvbRing.GetInvalidSignal() )
{
m_ibvbRing.NotifySignal( nLastSeenSignal );
}
SysFifo::PreparePutEnum_t nResult = m_ibvbRing.PreparePut( nBytesAligned );
if( nResult != SysFifo::PUT_PREPARE_FAILED )
{
if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
{
// need to clear cache
cellGcmSetInvalidateVertexCacheInline( pGcmCtx );
}
eaAllocation = m_ibvbRing.EaPut();
m_ibvbRing.Put( nBytesAligned );
m_ibvbRingSignal[nQueueTag] = m_ibvbRing.GetSignal();
V_memcpy( (void*)m_eaThis, this, sizeof( *this ) );
break; // succeeded
}
// update the signal, since we're spinning
nLastSeenSignal = VjobDmaGetUint32( uintp( m_eaIbvbRingLabel ), DMATAG_SYNC, 0, 0 );
}
return eaAllocation;
}
#endif