Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

382 lines
13 KiB

  1. //========== Copyright � Valve Corporation, All rights reserved. ========
  2. #include "vjobchain.h"
  3. #include "vjobutils.h"
  4. #include "vjobs/root.h"
  5. #include "tier1/strtools.h"
  6. #include "tier0/miniprofiler.h"
  7. CMiniProfiler g_mpRun, g_mpJoin, g_mpPush, g_mpPush2;
  8. int VjobChain::Init( VJobsRoot * pRoot, uint nMaxContention, const char* pFormatName, ... )
  9. {
  10. Assert( !( uintp( &m_guard ) & 0x7F ) );
  11. {
  12. va_list args
  13. va_start( args, pFormatName );
  14. V_vsnprintf( m_name, sizeof( m_name ), pFormatName, args );
  15. }
  16. cell::Spurs::JobChainAttribute attr;
  17. uint8_t nVjobChainPriority[8] = {0,12,12,12,12,12,0,0}; // priority lower than the main job queue, in order to yield
  18. attr.initialize( &attr, m_headBlock, 128, 1, nVjobChainPriority, nMaxContention, true, 0,1, false, 256, 1 );
  19. attr.setName( m_name );
  20. m_pLastBlock = NULL; // NOT running by default
  21. CELL_MUST_SUCCEED( JobChain::createWithAttribute( &pRoot->m_spurs, &m_spursJobChain, &attr ) );
  22. V_memset( &m_notifyArea, 0, sizeof( m_notifyArea ) );
  23. V_memset( &m_jobNotify, 0, sizeof( m_jobNotify ) );
  24. m_jobNotify.header = *( pRoot->m_pJobNotify );
  25. m_jobNotify.header.useInOutBuffer = 1;
  26. Assert( !( uint( &m_jobNotify ) & 63 ) ); // should be 64-byte aligned
  27. AddInputDma( &m_jobNotify, sizeof( m_notifyArea ), &m_notifyArea );
  28. m_notifyArea.m_nCopyFrom = 1;
  29. m_notifyArea.m_nCopyTo = 0; // SPU will mark copyTo = 1, PPU will mark it back to 0; at this time, we may actually mark the notify as completed
  30. m_jobNotify.workArea.userData[1] = 0; // function: default
  31. CELL_MUST_SUCCEED( m_guard.initialize( &m_spursJobChain, &m_guard, 1 /*notifyCount*/, 1 /*requestSpuCount(ignored)*/, 1 /*autoReset*/ ) );
  32. m_headBlock[0] = CELL_SPURS_JOB_COMMAND_SYNC; // wait for all previous list commands to finish
  33. m_headBlock[1] = CELL_SPURS_JOB_COMMAND_JOB( &m_jobNotify );
  34. m_headBlock[2] = CELL_SPURS_JOB_COMMAND_GUARD( &m_guard );
  35. m_headBlock[BLOCK_COMMANDS] = ( uint64 )-1ll;
  36. CELL_MUST_SUCCEED( m_spursJobChain.run() );
  37. m_nSpinWaitNotify = 0;
  38. return CELL_OK;
  39. }
  40. int VjobChain::Run()
  41. {
  42. if( !IsRunning() )
  43. {
  44. CMiniProfilerGuard mpg( &g_mpRun );
  45. Assert( m_notifyArea.m_nCopyTo ); // the jobchain must be joined if its not in Running state
  46. m_notifyArea.m_nCopyTo = 0;
  47. m_pLastBlock = m_headBlock;
  48. m_nCurrentBlockCommands = 3; // right after the SYNC-JOB(notify)-GUARD prefix
  49. m_headBlock[m_nCurrentBlockCommands] = CELL_SPURS_JOB_COMMAND_JTS;
  50. // __lwsync(); // make sure we complete sync reset and write JTS before notify - probably not necessary because the guard should have a barrier for sure
  51. m_guard.notify(); // let the jobchain go
  52. return CELL_OK;
  53. }
  54. else
  55. {
  56. return CELL_OK; // it's valid to try to run a running chain in our interface...
  57. }
  58. }
  59. int VjobChain::End( )
  60. {
  61. if( IsRunning() )
  62. {
  63. Assert( m_pLastBlock[m_nCurrentBlockCommands] == CELL_SPURS_JOB_COMMAND_JTS );
  64. Assert( m_notifyArea.m_nCopyTo == 0 ); // make sure we reset sync correctly
  65. m_pLastBlock[m_nCurrentBlockCommands] = CELL_SPURS_JOB_COMMAND_RESET_PC( m_headBlock );
  66. return 0;
  67. }
  68. else
  69. return -1; // you should not end non-running instance
  70. }
  71. void VjobChain::Shutdown()
  72. {
  73. if( IsRunning() )
  74. {
  75. m_pLastBlock[m_nCurrentBlockCommands] = CELL_SPURS_JOB_COMMAND_END;
  76. }
  77. m_spursJobChain.shutdown();
  78. m_spursJobChain.join();
  79. }
  80. int VjobChain::Join()
  81. {
  82. if( IsRunning() )
  83. {
  84. #ifdef _DEBUG
  85. CellSpursJobChainInfo info;
  86. m_spursJobChain.getInfo( &info );
  87. #endif
  88. CMiniProfilerGuard mpg( &g_mpJoin );
  89. // wait for reset sync notification to come through
  90. volatile job_notify::NotifyArea_t *pNotify = &m_notifyArea;
  91. Assert( pNotify->m_nCopyFrom );
  92. while( !pNotify->m_nCopyTo )
  93. {
  94. ++m_nSpinWaitNotify;
  95. }
  96. if( m_nSpinWaitNotify )
  97. {
  98. // <HACK> Sergiy : I'm taking this out for now because jobchain double-buffering is effectively temporarily hosed
  99. // Warning( "VjobChain %s: stall in join, %d spins\n", m_name, m_nSpinWaitNotify );
  100. m_nSpinWaitNotify = 0;
  101. }
  102. // free up the memory of the jobs that are now known to have dispatched
  103. if( m_headBlock != m_pLastBlock )
  104. {
  105. uint64 *pBlock = m_headBlock;
  106. do
  107. {
  108. Assert( pBlock[BLOCK_COMMANDS] == (uint64)-1ll );
  109. uint64 eaNext = pBlock[BLOCK_COMMANDS - 1];
  110. Assert( ( eaNext & 0xFFFFFFFF00000007ull ) == 3 );
  111. pBlock = ( uint64 * )( uintp( eaNext ) & ~7 );
  112. Assert( pBlock[BLOCK_COMMANDS] == (uint64)-1ll );
  113. delete[]pBlock;
  114. }
  115. while( pBlock != m_pLastBlock );
  116. }
  117. m_pLastBlock = NULL; // idle state
  118. return CELL_OK;
  119. }
  120. else
  121. return 0; // valid to join twice;
  122. }
  123. void VjobChain::Push( uint64 nCommand )
  124. {
  125. Assert( IsRunning() );
  126. uint64 * pNextCommand = &m_pLastBlock[m_nCurrentBlockCommands++]; // JTS to patch
  127. if( m_nCurrentBlockCommands < BLOCK_COMMANDS )
  128. {
  129. CMiniProfilerGuard mpg( &g_mpPush );
  130. pNextCommand[1] = CELL_SPURS_JOB_COMMAND_JTS;
  131. __lwsync(); // ordering: create JobHeader, insert next JTS --> patch current JTS with JOB command
  132. *pNextCommand = nCommand;
  133. }
  134. else
  135. {
  136. CMiniProfilerGuard mpg( &g_mpPush2 );
  137. m_pLastBlock = new uint64[BLOCK_COMMANDS+1];
  138. m_pLastBlock[BLOCK_COMMANDS] = ( uint64 )-1ll; // marker
  139. m_pLastBlock[0] = nCommand;
  140. m_pLastBlock[m_nCurrentBlockCommands = 1] = CELL_SPURS_JOB_COMMAND_JTS;
  141. __lwsync(); // ordering: create JobHeader, allocate & reset new segment with JOB command in it --> patch JTS in old segment with NEXT command
  142. *pNextCommand = CELL_SPURS_JOB_COMMAND_NEXT( m_pLastBlock );
  143. }
  144. }
  145. void VjobChain::Push( const uint64 * nCommands, uint nCommandCount )
  146. {
  147. // todo: make it more optimal by removing extra lwsync's
  148. for( uint i = 0; i < nCommandCount; ++i )
  149. {
  150. Push( nCommands[i] );
  151. }
  152. }
  153. int VjobChain2::Init( VJobsRoot * pRoot, uint nMaxContention, const char* pName )
  154. {
  155. m_vjobChainRing = ( VjobChain * )MemAlloc_AllocAligned( VJOB_CHAINS * sizeof( VjobChain ), 128 );
  156. for( uint i = 0; i < 2; ++i )
  157. {
  158. int nError = m_vjobChainRing[i].Init( pRoot, nMaxContention, "%s%d", pName, i );
  159. if( nError )
  160. return nError;
  161. }
  162. m_nCurrentChain = 0;
  163. return 0;
  164. }
  165. void VjobChain2::Begin()
  166. {
  167. m_vjobChainRing[( m_nCurrentChain + 1 ) % VJOB_CHAINS].Join();
  168. VjobChain & jobchain = Jobchain();
  169. jobchain.Join(); // join the jobchain that we'll be using now
  170. jobchain.Run();
  171. }
  172. void VjobChain2::End()
  173. {
  174. VjobChain & jobchain = Jobchain();
  175. jobchain.End();
  176. m_nCurrentChain = ( m_nCurrentChain + 1 ) % VJOB_CHAINS; // swap the job chain
  177. }
  178. void VjobChain2::Shutdown()
  179. {
  180. for( uint i = 0; i < 2; ++i )
  181. {
  182. m_vjobChainRing[i].Shutdown();
  183. }
  184. MemAlloc_FreeAligned( m_vjobChainRing );
  185. }
  186. int VjobChain3::Init( VJobsRoot * pRoot, uint nMaxContention, uint nMinCommandsPerBuffer, uint8_t nVjobChainPriority[8], const char* pName, uint nDmaTags )
  187. {
  188. m_pName = pName;
  189. const uint nSizeOfJobDescriptor = 128, nMaxGrabbedJob = 4;
  190. // we need at least 4 commands
  191. uint nBufferSize = sizeof( VjobBufferHeader_t ) + sizeof( uint64 ) * MAX( nMinCommandsPerBuffer, VjobBuffer_t::VERBATIM_COMMAND_COUNT + 2 ); // +2 is for user's command and JTN
  192. nBufferSize = AlignValue( nBufferSize, 128 );
  193. m_nMaxCommandsPerBuffer = ( nBufferSize - sizeof( VjobBufferHeader_t ) ) / sizeof( uint64 );
  194. uint nAllocationSize = sizeof( cell::Spurs::JobChain ) + nBufferSize * BUFFER_COUNT;
  195. m_pSpursJobChain = ( cell::Spurs::JobChain* )MemAlloc_AllocAligned( nAllocationSize, 128 );
  196. V_memset( m_pSpursJobChain, 0, nAllocationSize );
  197. m_pBuffers[0] = ( VjobBuffer_t * )( m_pSpursJobChain + 1 );
  198. m_nFrontBuffer = 0;
  199. m_pFrontBuffer = m_pBuffers[0];
  200. for( int i = 1; i < BUFFER_COUNT; ++i )
  201. {
  202. m_pBuffers[i] = ( VjobBuffer_t * )( uintp( m_pBuffers[ i - 1 ] ) + nBufferSize );
  203. }
  204. cell::Spurs::JobChainAttribute attr;
  205. attr.initialize( &attr, m_pFrontBuffer->m_spursCommands, nSizeOfJobDescriptor, nMaxGrabbedJob, nVjobChainPriority, nMaxContention, true, nDmaTags, nDmaTags + 1, false, 256, 1 );
  206. attr.setName( pName );
  207. CELL_MUST_SUCCEED( JobChain::createWithAttribute( &pRoot->m_spurs, m_pSpursJobChain, &attr ) );
  208. for( int i = 0; i < BUFFER_COUNT; ++i )
  209. {
  210. Assert( !( uintp( m_pBuffers[i] ) & 0x7F ) );
  211. m_pBuffers[i]->Init( pRoot, m_pSpursJobChain );
  212. }
  213. StartCommandBuffer( 0, CELL_SPURS_JOB_COMMAND_NOP );
  214. CELL_MUST_SUCCEED( m_pSpursJobChain->run() );
  215. #ifdef _DEBUG
  216. sys_timer_usleep( 100 );
  217. #endif
  218. m_nSpinWaitNotify = 0;
  219. return CELL_OK;
  220. }
  221. void VjobBuffer_t::Init( VJobsRoot * pRoot, cell::Spurs::JobChain * pSpursJobChain )
  222. {
  223. m_jobNotify.header = *( pRoot->m_pJobNotify );
  224. m_jobNotify.header.useInOutBuffer = 1;
  225. AddInputDma( &m_jobNotify, sizeof( m_notifyArea ), &m_notifyArea );
  226. m_notifyArea.m_nCopyFrom = 1;
  227. // SPU will mark copyTo = 1, PPU will mark it back to 0; at this time, we may actually mark the notify as completed; 1 means "previous buffer is free"
  228. // Then we'll start command buffer, which will reset the ready flag. But then we run the jobchain, which will run job_notify and set the flag back again, thus starting the ring
  229. m_notifyArea.m_nCopyTo = 1;
  230. m_jobNotify.workArea.userData[1] = 0; // function: default
  231. uint nCommands = 0;
  232. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_SYNC; // wait for all previous list commands to finish
  233. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_JOB( &m_jobNotify );
  234. #ifdef VJOBCHAIN3_GUARD
  235. Assert( !( uintp( &m_guard ) & -128 ) );
  236. CELL_MUST_SUCCEED( m_guard.initialize( pSpursJobChain, &m_guard, 1 /*notifyCount*/, 1 /*requestSpuCount(ignored)*/, 1 /*autoReset*/ ) );
  237. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_GUARD( &m_guard );
  238. #endif
  239. #ifdef _DEBUG
  240. m_jobNotify2.header = *( pRoot->m_pJobNotify );
  241. m_jobNotify2.header.useInOutBuffer = 1;
  242. AddInputDma( &m_jobNotify2, sizeof( m_notifyArea2 ), &m_notifyArea2 );
  243. m_jobNotify2.workArea.userData[1] = 0; // function: default
  244. m_notifyArea2.m_nCopyFrom = 1;
  245. m_notifyArea2.m_nCopyTo = 0; // just for debugging, to see when this job gets executed
  246. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_JOB( &m_jobNotify2 );
  247. #endif
  248. Assert( nCommands == VjobBuffer_t::VERBATIM_COMMAND_COUNT );
  249. m_spursCommands[VjobBuffer_t::VERBATIM_COMMAND_COUNT] = CELL_SPURS_JOB_COMMAND_JTS;
  250. }
  251. void VjobChain3::WaitForEntryNotify( VjobBuffer_t * pBuffer )
  252. {
  253. volatile job_notify::NotifyArea_t *pNotify = &pBuffer->m_notifyArea;
  254. Assert( pNotify->m_nCopyFrom );
  255. while( !pNotify->m_nCopyTo )
  256. {
  257. ++m_nSpinWaitNotify;
  258. sys_timer_usleep( 30 );
  259. }
  260. if( m_nSpinWaitNotify )
  261. {
  262. Warning( "VjobChain %s: stall in WaitForEntryNotify, %d spins\n", m_pName, m_nSpinWaitNotify );
  263. m_nSpinWaitNotify = 0;
  264. }
  265. }
  266. uint64* VjobChain3::SwapCommandBuffer( uint64 nInsertCommand )
  267. {
  268. uint64 * pSpursIsSpinningHere = &m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ];
  269. Assert( m_nFrontBufferCommandCount < m_nMaxCommandsPerBuffer );
  270. uint nNext1Buffer = ( m_nFrontBuffer + 1 ) % BUFFER_COUNT, nNext2Buffer = ( m_nFrontBuffer + 2 ) % BUFFER_COUNT;
  271. VjobBuffer_t * pNext1Buffer = m_pBuffers[ nNext1Buffer ], * pNext2Buffer = m_pBuffers[ nNext2Buffer ];
  272. // before we can declare the next1 buffer "front", we need to make sure it's fully ready to accept commands, i.e. that it was fully read by SPURS
  273. // for that, we check the next2 buffer notification area
  274. WaitForEntryNotify( pNext1Buffer );
  275. WaitForEntryNotify( pNext2Buffer );
  276. // if next2 buffer has been notified, next1 must have been notified long ago
  277. Assert( pNext1Buffer->m_notifyArea.m_nCopyTo );
  278. uint64* pInsertionPoint = StartCommandBuffer( nNext1Buffer, nInsertCommand );
  279. Assert( pNext1Buffer == m_pFrontBuffer );
  280. // implicit lwsync is here
  281. *pSpursIsSpinningHere = CELL_SPURS_JOB_COMMAND_NEXT( pNext1Buffer->m_spursCommands ); // jump to the next buffer
  282. return pInsertionPoint;
  283. }
  284. uint64* VjobChain3::StartCommandBuffer( uint nNext1Buffer, uint64 nInsertCommand )
  285. {
  286. m_nFrontBuffer = nNext1Buffer;
  287. m_pFrontBuffer = m_pBuffers[ nNext1Buffer ];
  288. // the ready marker is presumed to be present; SPURS must have gone through this buffer in the previous ring, otherwise we can't use it
  289. Assert( m_pFrontBuffer->m_notifyArea.m_nCopyTo == 1 );
  290. // reset the ready marker; SPURS didn't get to this buffer yet (we're about to reuse them and we didn't jump to it yet)
  291. m_pFrontBuffer->m_notifyArea.m_nCopyTo = 0;
  292. #ifdef _DEBUG
  293. m_pFrontBuffer->m_notifyArea2.m_nCopyFrom++;
  294. m_pFrontBuffer->m_notifyArea2.m_nCopyTo = 0;
  295. #endif
  296. uint64 * pCommand = &m_pFrontBuffer->m_spursCommands[ VjobBuffer_t::VERBATIM_COMMAND_COUNT ];
  297. *pCommand = nInsertCommand;
  298. m_pFrontBuffer->m_spursCommands[ VjobBuffer_t::VERBATIM_COMMAND_COUNT + 1 ] = CELL_SPURS_JOB_COMMAND_JTS;
  299. m_nFrontBufferCommandCount = VjobBuffer_t::VERBATIM_COMMAND_COUNT + 1;
  300. #ifdef VJOBCHAIN3_GUARD
  301. m_pFrontBuffer->m_guard.notify(); // let the jobchain go through
  302. // implicit lwsync is here
  303. #else
  304. __lwsync();
  305. #endif
  306. return pCommand;
  307. }
  308. void VjobChain3::End()
  309. {
  310. Assert( m_nFrontBufferCommandCount < m_nMaxCommandsPerBuffer );
  311. m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ] = CELL_SPURS_JOB_COMMAND_END;
  312. m_pSpursJobChain->shutdown();
  313. }
  314. void VjobChain3::Join()
  315. {
  316. Assert( m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ] == CELL_SPURS_JOB_COMMAND_END );
  317. m_pSpursJobChain->join();
  318. MemAlloc_FreeAligned( m_pSpursJobChain );
  319. m_pSpursJobChain = NULL;
  320. }