Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

281 lines
8.8 KiB

  1. //========== Copyright � Valve Corporation, All rights reserved. ========
  2. #include "vjobs/pcring.h"
  3. #include "vjobs/edgegeom_shared.h"
  4. #ifdef SPU
  5. #if EDGEGEOMRING_DEBUG_TRACE
  6. #include "vjobs/edgegeomparams_shared.h" // debug
  7. namespace job_edgegeom{ extern JobParams_t * g_lsJobParams; }
  8. #endif
  9. void CEdgeGeomRing::Test()
  10. {
  11. for( ;; )
  12. {
  13. cellDmaGetllar( this, m_eaThis, 0, 0 );
  14. //if( 0 == __builtin_expect( spu_readch( MFC_RdAtomicStat ), 0 ) )
  15. uint nStatusGetllar = cellDmaWaitAtomicStatus();(void)nStatusGetllar;
  16. #if EDGEGEOMRING_DEBUG_TRACE
  17. m_nUseCounter++;
  18. #endif
  19. cellDmaPutllc( this, m_eaThis, 0, 0 );
  20. uint nStatusPutllc = cellDmaWaitAtomicStatus();
  21. if( 0 == __builtin_expect( nStatusPutllc, 0 ) )
  22. {
  23. break; // succeeded
  24. }
  25. //VjobSpuLog("job_edgegeom Test failed(%d,%d)\n", nStatusGetllar, nStatusPutllc );
  26. }
  27. }
  28. struct ALIGN16 FifoSnapshot_t
  29. {
  30. uint32 m_nSignal;
  31. uint32 m_nPut;
  32. uint32 m_nEnd;
  33. uint32 m_nRingIncarnation;
  34. void Snapshot( CEdgeGeomRing * pRing )
  35. {
  36. m_nPut = pRing->m_ibvbRing.m_nPut;
  37. m_nEnd = pRing->m_ibvbRing.m_nEnd;
  38. m_nRingIncarnation = pRing->m_nRingIncarnation;
  39. // update the signal, since we're spinning
  40. m_nSignal = cellDmaGetUint32( uintp( pRing->m_eaIbvbRingLabel ), DMATAG_SYNC, 0, 0 );
  41. }
  42. }
  43. ALIGN16_POST;
  44. uintp CEdgeGeomRing::Allocate( CellGcmContextData *pGcmCtx, uint nBytesUnaligned, uint nQueueTag )
  45. {
  46. // allocate in aligned chunks to make it all aligned
  47. uint nBytesAligned = AlignValue( nBytesUnaligned, 32 );
  48. AssertSpuMsg( nBytesAligned <= EDGEGEOMRING_MAX_ALLOCATION, "job_edgegeom allocates %u > %u from edge", nBytesAligned, EDGEGEOMRING_MAX_ALLOCATION );
  49. uintp eaAllocation = 0;
  50. SysFifo::PreparePutEnum_t nResult = SysFifo::PUT_PREPARE_FAILED;
  51. uint nStatusGetllar, nStatusPutllc;
  52. uint nSpins = 0, nAtomicCollisionEvent = 0, nWaitRsxSpins = 0;
  53. uint nStoredSignal;
  54. uint nSpuFlag = 1 << VjobSpuId();
  55. union
  56. {
  57. FifoSnapshot_t fields;
  58. __vector int vi4;
  59. }snapshot;
  60. snapshot.vi4 = (__vector int){-1,-1,-1,-1};
  61. uint32 nJobId = job_edgegeom::g_lsJobParams->m_nEdgeJobId;
  62. for(;; nSpins ++)
  63. {
  64. cellDmaGetllar( this, m_eaThis, 0, 0 );
  65. //if( 0 == __builtin_expect( spu_readch( MFC_RdAtomicStat ), 0 ) )
  66. nStatusGetllar = cellDmaWaitAtomicStatus();
  67. {
  68. // reservation succeeded
  69. Assert( m_ibvbRing.m_nPut != 0xFFFFFFFF );
  70. if( snapshot.fields.m_nPut == m_ibvbRing.m_nPut && snapshot.fields.m_nRingIncarnation == m_nRingIncarnation )
  71. {
  72. // the put didn't change, ring incarnation didn't change.
  73. // Therefore, nobody changed this object - between
  74. // last getllar, getting signal and this getllar,
  75. // so it's atomic if we update the signal now.
  76. m_ibvbRing.NotifySignalSafe( snapshot.fields.m_nSignal );
  77. }
  78. nResult = m_ibvbRing.PreparePut( nBytesAligned );
  79. if( nResult != SysFifo::PUT_PREPARE_FAILED )
  80. {
  81. eaAllocation = m_ibvbRing.EaPut();
  82. m_ibvbRing.Put( nBytesAligned );
  83. nStoredSignal = m_ibvbRing.GetSignal();
  84. m_ibvbRingSignal[nQueueTag] = nStoredSignal;
  85. m_nAtomicCollisionSpins += nAtomicCollisionEvent;
  86. m_nRsxWaitSpins += nWaitRsxSpins;
  87. m_nUsedSpus |= nSpuFlag;
  88. if( ( ( signed int )( nJobId - m_nMaxJobId[nQueueTag] ) ) > 0 )
  89. {
  90. m_nMaxJobId[nQueueTag] = nJobId;
  91. }
  92. if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
  93. {
  94. m_nRingIncarnation++; // we allocated, wrapping
  95. }
  96. #if EDGEGEOMRING_DEBUG_TRACE
  97. m_nUseCounter++;
  98. COMPILE_TIME_ASSERT( !( EDGEGEOMRING_DEBUG_TRACE & ( EDGEGEOMRING_DEBUG_TRACE - 1 ) ) );
  99. m_nNextDebugTrace = ( m_nNextDebugTrace + 1 ) & ( EDGEGEOMRING_DEBUG_TRACE - 1 );
  100. #endif
  101. cellDmaPutllc( this, m_eaThis, 0, 0 );
  102. nStatusPutllc = cellDmaWaitAtomicStatus();
  103. if( 0 == __builtin_expect( nStatusPutllc, 0 ) )
  104. {
  105. break; // succeeded
  106. }
  107. }
  108. else
  109. {
  110. nWaitRsxSpins ++;
  111. }
  112. }
  113. snapshot.fields.Snapshot( this );
  114. if( nSpins == 100000 && !IsCert() )
  115. {
  116. // VjobSpuLog( "job_edgegeom Allocate spinning: %d, %d, signal 0x%X;\n", nStatusGetllar, nStatusPutllc, nLastSeenSignal );
  117. // DebuggerBreak();
  118. }
  119. }
  120. if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
  121. {
  122. // need to clear cache
  123. cellGcmSetInvalidateVertexCacheInline( pGcmCtx );
  124. //VjobSpuLog( "job_edgegeom Allocate wrapped ring, invalidated vertex cache\n" );
  125. }
  126. else
  127. {
  128. Assert( nResult == SysFifo::PUT_PREPARED_NOWRAP );
  129. }
  130. Assert( nStoredSignal == m_ibvbRing.GetSignal() );
  131. //VjobSpuLog( "alloc %X, signal %X, prev6 signal:%X, pcring put %x end %x\n", eaAllocation, nStoredSignal, m_ibvbRingSignal[(nTag-1)&3], m_ibvbRing.m_nPut, m_ibvbRing.m_nEnd );
  132. #if EDGEGEOMRING_DEBUG_TRACE
  133. if( m_eaDebugTrace && m_enableDebugTrace )
  134. {
  135. EdgeGeomDebugTrace_t trace;
  136. trace.m_nAllocResult = (uint8)nResult;
  137. trace.m_nQueueTag = (uint8)job_edgegeom::g_lsJobParams->m_nQueueTag;
  138. trace.m_nJobId = job_edgegeom::g_lsJobParams->m_nEdgeJobId;
  139. trace.m_nPut = m_ibvbRing.m_nPut;
  140. trace.m_nEnd = m_ibvbRing.m_nEnd;
  141. trace.m_eaEdgeGeomJts = job_edgegeom::g_lsJobParams->m_eaEdgeGeomJts;
  142. for( uint i = 0; i < EDGEGEOMRING_JOBQUEUE_TAG_COUNT; ++i )
  143. trace.m_nTagSignal[i] = m_ibvbRingSignal[i];
  144. VjobDmaPutf( &trace, uintp( m_eaDebugTrace + m_nNextDebugTrace ), sizeof( trace ), VJOB_IOBUFFER_DMATAG, 0, 0 );
  145. VjobWaitTagStatusAll( 1 << VJOB_IOBUFFER_DMATAG );
  146. }
  147. #endif
  148. return eaAllocation;
  149. }
  150. #else
  151. void CEdgeGeomRing::Init( void* eaBuffer, uint nBufferSize, uint nIoOffsetDelta, void * eaLocalBaseAddress, uint nLabel )
  152. {
  153. COMPILE_TIME_ASSERT( sizeof( CEdgeGeomRing_Mutable ) <= 128 ); // we need to fit into 128 bytes so that atomics work
  154. m_ibvbRing.Init( (uintp)eaBuffer, nBufferSize );
  155. m_eaLocalBaseAddress = (uint) eaLocalBaseAddress;
  156. m_nIoOffsetDelta = nIoOffsetDelta;
  157. m_nIbvbRingLabel = nLabel;
  158. m_eaIbvbRingLabel = cellGcmGetLabelAddress( nLabel );
  159. *m_eaIbvbRingLabel = m_ibvbRing.GetSignal();
  160. m_ibvbRingSignal[0] = m_ibvbRing.GetSignal();
  161. for( uint i = 0; i < EDGEGEOMRING_JOBQUEUE_TAG_COUNT; ++i )
  162. {
  163. m_ibvbRingSignal[i] = m_ibvbRingSignal[0];
  164. }
  165. V_memset( m_nMaxJobId, 0xFF, sizeof( m_nMaxJobId ) );
  166. m_eaThis = (uint) this;
  167. m_nDebuggerBreakMask = 0;
  168. m_nAtomicCollisionSpins = 0;
  169. m_nRsxWaitSpins = 0;
  170. m_nRingIncarnation = 0;
  171. #if EDGEGEOMRING_DEBUG_TRACE
  172. m_nUseCounter = 0;
  173. m_eaDebugTrace = NULL;
  174. m_eaDebugTrace = ( EdgeGeomDebugTrace_t* )MemAlloc_AllocAligned( sizeof( EdgeGeomDebugTrace_t ) * EDGEGEOMRING_DEBUG_TRACE, 16 * 16 * 16 );
  175. m_nNextDebugTrace = 0;
  176. m_enableDebugTrace = true;
  177. #endif
  178. }
  179. void CEdgeGeomRing::Shutdown()
  180. {
  181. #if EDGEGEOMRING_DEBUG_TRACE
  182. MemAlloc_FreeAligned( m_eaDebugTrace );
  183. #endif
  184. }
  185. void CEdgeGeomFeeder::Init( uint nIbvbRingSize )
  186. {
  187. m_nJobQueueTag = 0;
  188. m_nSpawnedJobsWithTag = 0;
  189. m_nTotalEdgeGeomJobCounter = 0;
  190. m_nSpawnedJobsWithTagReserveAllocate = 0;
  191. m_nIbvbRingSize = nIbvbRingSize;
  192. }
  193. void CEdgeGeomRing::Test()
  194. {
  195. #if EDGEGEOMRING_DEBUG_TRACE
  196. m_nUseCounter++;
  197. #endif
  198. }
  199. uintp CEdgeGeomRing::Allocate( CellGcmContextData *pGcmCtx, uint nBytesUnaligned, uint nQueueTag )
  200. {
  201. // this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible!
  202. DebuggerBreak(); // this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible!
  203. Warning( "this is not an actively supported and tested code path! It only exists here for single-threaded PPU-on-SPU mode debugging. Bit rot possible\n" );
  204. // allocate in aligned chunks to make it all aligned
  205. uint nBytesAligned = AlignValue( nBytesUnaligned, 32 );
  206. AssertSpuMsg( nBytesAligned <= EDGEGEOMRING_MAX_ALLOCATION, "job_edgegeom allocates %u > %u from edge", nBytesAligned, EDGEGEOMRING_MAX_ALLOCATION );
  207. uint nLastSeenSignal = m_ibvbRing.GetInvalidSignal();
  208. uintp eaAllocation = 0;
  209. for(;;)
  210. {
  211. V_memcpy( this, (void*)m_eaThis, sizeof( *this ) );
  212. // emulate: reservation succeeded
  213. if( nLastSeenSignal != m_ibvbRing.GetInvalidSignal() )
  214. {
  215. m_ibvbRing.NotifySignal( nLastSeenSignal );
  216. }
  217. SysFifo::PreparePutEnum_t nResult = m_ibvbRing.PreparePut( nBytesAligned );
  218. if( nResult != SysFifo::PUT_PREPARE_FAILED )
  219. {
  220. if( nResult == SysFifo::PUT_PREPARED_WRAPPED )
  221. {
  222. // need to clear cache
  223. cellGcmSetInvalidateVertexCacheInline( pGcmCtx );
  224. }
  225. eaAllocation = m_ibvbRing.EaPut();
  226. m_ibvbRing.Put( nBytesAligned );
  227. m_ibvbRingSignal[nQueueTag] = m_ibvbRing.GetSignal();
  228. V_memcpy( (void*)m_eaThis, this, sizeof( *this ) );
  229. break; // succeeded
  230. }
  231. // update the signal, since we're spinning
  232. nLastSeenSignal = VjobDmaGetUint32( uintp( m_eaIbvbRingLabel ), DMATAG_SYNC, 0, 0 );
  233. }
  234. return eaAllocation;
  235. }
  236. #endif