Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

175 lines
7.0 KiB

  1. //========== Copyright � Valve Corporation, All rights reserved. ========
  2. #include "ps3/vjobchain4.h"
  3. vec_uint4 g_cellSpursJts16 = ( vec_uint4 ){ uint( CELL_SPURS_JOB_COMMAND_JTS >> 32 ), uint( CELL_SPURS_JOB_COMMAND_JTS ), uint( CELL_SPURS_JOB_COMMAND_JTS >> 32 ), uint( CELL_SPURS_JOB_COMMAND_JTS ) };
  4. #ifndef SPU
  5. #include "ps3/vjobutils.h"
  6. #include "vjobs/root.h"
  7. #include "tier1/strtools.h"
  8. #include "tier0/miniprofiler.h"
  9. int VjobChain4::Init( VJobsRoot * pRoot, uint nMaxContention, uint nMinCommandsPerBuffer, uint8_t nVjobChainPriority[8], uint nSizeOfJobDescriptor, uint nMaxGrabbedJob, const char* pName, uint nDmaTags )
  10. {
  11. m_pName = pName;
  12. m_eaThis = this;
  13. // we need at least 4 commands
  14. uint nBufferSize = sizeof( VjobChain4BufferHeader_t ) + sizeof( uint64 ) * MAX( nMinCommandsPerBuffer, VjobChain4Buffer_t::VERBATIM_COMMAND_COUNT + 2 ); // +2 is for user's command and JTN
  15. nBufferSize = AlignValue( nBufferSize, 128 );
  16. m_nMaxCommandsPerBuffer = ( nBufferSize - sizeof( VjobChain4BufferHeader_t ) ) / sizeof( uint64 );
  17. uint nAllocationSize = sizeof( cell::Spurs::JobChain ) + nBufferSize * BUFFER_COUNT;
  18. m_pSpursJobChain = ( cell::Spurs::JobChain* )MemAlloc_AllocAligned( nAllocationSize, 128 );
  19. V_memset( m_pSpursJobChain, 0, nAllocationSize );
  20. m_pBuffers[0] = ( VjobChain4Buffer_t * )( m_pSpursJobChain + 1 );
  21. m_nFrontBuffer = 0;
  22. m_pFrontBuffer = m_pBuffers[0];
  23. for( int i = 1; i < BUFFER_COUNT; ++i )
  24. {
  25. m_pBuffers[i] = ( VjobChain4Buffer_t * )( uintp( m_pBuffers[ i - 1 ] ) + nBufferSize );
  26. }
  27. cell::Spurs::JobChainAttribute attr;
  28. attr.initialize( &attr, m_pFrontBuffer->m_spursCommands, nSizeOfJobDescriptor, nMaxGrabbedJob, nVjobChainPriority, nMaxContention, true, nDmaTags, nDmaTags + 1, false, Max<uint>( 256, nSizeOfJobDescriptor ), 1 );
  29. attr.setName( pName );
  30. CELL_MUST_SUCCEED( JobChain::createWithAttribute( &pRoot->m_spurs, m_pSpursJobChain, &attr ) );
  31. for( int i = 0; i < BUFFER_COUNT; ++i )
  32. {
  33. Assert( !( uintp( m_pBuffers[i] ) & 0x7F ) );
  34. m_pBuffers[i]->Init( pRoot, m_pSpursJobChain, m_nMaxCommandsPerBuffer );
  35. }
  36. *StartCommandBuffer( 0 ) = CELL_SPURS_JOB_COMMAND_NOP;
  37. CELL_MUST_SUCCEED( m_pSpursJobChain->run() );
  38. #ifdef _DEBUG
  39. sys_timer_usleep( 100 );
  40. #endif
  41. m_nSpinWaitNotify = 0;
  42. return CELL_OK;
  43. }
  44. void VjobChain4Buffer_t::Init( VJobsRoot * pRoot, cell::Spurs::JobChain * pSpursJobChain, uint nMaxCommandsPerBuffer )
  45. {
  46. Assert( 0 == ( 0x7F & uintp( &m_jobNotify ) ) );
  47. m_jobNotify.header = *( pRoot->m_pJobNotify );
  48. m_jobNotify.header.useInOutBuffer = 1;
  49. AddInputDma( &m_jobNotify, sizeof( m_notifyArea ), &m_notifyArea );
  50. m_notifyArea.m_nCopyFrom = 1;
  51. // SPU will mark copyTo = 1, PPU will mark it back to 0; at this time, we may actually mark the notify as completed; 1 means "previous buffer is free"
  52. // Then we'll start command buffer, which will reset the ready flag. But then we run the jobchain, which will run job_notify and set the flag back again, thus starting the ring
  53. m_notifyArea.m_nCopyTo = 1;
  54. m_jobNotify.workArea.userData[1] = 0; // function: default
  55. uint nCommands = 0;
  56. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_SYNC; // wait for all previous list commands to finish
  57. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_JOB( &m_jobNotify );
  58. Assert( nCommands == VjobChain4Buffer_t::VERBATIM_COMMAND_COUNT );
  59. while( nCommands < nMaxCommandsPerBuffer )
  60. {
  61. m_spursCommands[nCommands++] = CELL_SPURS_JOB_COMMAND_JTS;
  62. }
  63. }
  64. void VjobChain4::End()
  65. {
  66. Assert( m_nFrontBufferCommandCount < m_nMaxCommandsPerBuffer );
  67. m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ] = CELL_SPURS_JOB_COMMAND_END;
  68. m_pSpursJobChain->shutdown();
  69. }
  70. void VjobChain4::Join()
  71. {
  72. Assert( m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ] == CELL_SPURS_JOB_COMMAND_END );
  73. m_pSpursJobChain->join();
  74. MemAlloc_FreeAligned( m_pSpursJobChain );
  75. m_pSpursJobChain = NULL;
  76. }
  77. #endif
  78. void VjobChain4::WaitForEntryNotify( VjobChain4Buffer_t * eaBuffer )
  79. {
  80. volatile job_notify::NotifyArea_t *eaNotify = &eaBuffer->m_notifyArea;
  81. // it doesn't matter what DMA tag we'll use for synchronous DMA get
  82. Assert( VjobDmaGetUint32( (uint)&eaNotify->m_nCopyFrom, DMATAG_SYNC, 0, 0 ) );
  83. while( !VjobDmaGetUint32( (uint)&eaNotify->m_nCopyTo, DMATAG_SYNC, 0, 0 ) )
  84. {
  85. ++m_nSpinWaitNotify;
  86. #ifndef SPU
  87. sys_timer_usleep( 30 );
  88. #endif
  89. }
  90. if( m_nSpinWaitNotify )
  91. {
  92. VjobSpuLog( "VjobChain: stall in WaitForEntryNotify, %d spins\n", m_nSpinWaitNotify );
  93. m_nSpinWaitNotify = 0;
  94. }
  95. }
  96. uint64* VjobChain4::SwapCommandBuffer( )
  97. {
  98. uint64 * eaSpursIsSpinningHere = &m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ];
  99. Assert( m_nFrontBufferCommandCount < m_nMaxCommandsPerBuffer );
  100. uint nNext1Buffer = ( m_nFrontBuffer + 1 ) % BUFFER_COUNT, nNext2Buffer = ( m_nFrontBuffer + 2 ) % BUFFER_COUNT;
  101. VjobChain4Buffer_t * eaNext1Buffer = m_pBuffers[ nNext1Buffer ], * eaNext2Buffer = m_pBuffers[ nNext2Buffer ];
  102. // before we can declare the next1 buffer "front", we need to make sure it's fully ready to accept commands, i.e. that it was fully read by SPURS
  103. // for that, we check the next2 buffer notification area
  104. WaitForEntryNotify( eaNext1Buffer );
  105. WaitForEntryNotify( eaNext2Buffer );
  106. // if next2 buffer has been notified, next1 must have been notified long ago
  107. Assert( VjobDmaGetUint32( (uint)&eaNext1Buffer->m_notifyArea.m_nCopyTo, DMATAG_SYNC, 0, 0 ) );
  108. uint64* pInsertionPoint = StartCommandBuffer( nNext1Buffer );
  109. Assert( eaNext1Buffer == m_pFrontBuffer );
  110. // implicit lwsync is here
  111. VjobDmaPutfUint64( CELL_SPURS_JOB_COMMAND_NEXT( eaNext1Buffer->m_spursCommands ), (uint)eaSpursIsSpinningHere, DMATAG_SYNC ); // jump to the next buffer
  112. return pInsertionPoint;
  113. }
  114. #ifndef SPU
  115. void FillSpursJts( uint64 * eaCommands, uint nBufferCount )
  116. {
  117. for( uint i = 0; i < nBufferCount; ++i )
  118. eaCommands[ i ] = CELL_SPURS_JOB_COMMAND_JTS;
  119. }
  120. #endif
  121. //
  122. // Initializes the buffer BEFORE the jobchain can jump to it. It's important to only jump to the next buffer
  123. // after this function returns (either by inserting NEXT into previous buffer, or by call Run() on the jobchain)
  124. // because this function lacks the necessary synchronization to operate safely on a buffer in-flight
  125. //
  126. uint64* VjobChain4::StartCommandBuffer( uint nNext1Buffer )
  127. {
  128. m_nFrontBuffer = nNext1Buffer;
  129. m_pFrontBuffer = m_pBuffers[ nNext1Buffer ];
  130. // the ready marker is presumed to be present; SPURS must have gone through this buffer in the previous ring, otherwise we can't use it
  131. Assert( VjobDmaGetUint32( (uint)&m_pFrontBuffer->m_notifyArea.m_nCopyTo, DMATAG_SYNC, 0, 0) == 1 );
  132. // reset the ready marker; SPURS didn't get to this buffer yet (we're about to reuse them and we didn't jump to it yet)
  133. VjobDmaPutfUint32( 0, (uint)&m_pFrontBuffer->m_notifyArea.m_nCopyTo, DMATAG_SYNC );
  134. uint64 * eaCommand = &m_pFrontBuffer->m_spursCommands[ VjobChain4Buffer_t::VERBATIM_COMMAND_COUNT ];
  135. //VjobDmaPutfUint64( nInsertCommand, (uint)eaCommand, DMATAG_SYNC );
  136. FillSpursJts( eaCommand, m_nMaxCommandsPerBuffer - VjobChain4Buffer_t::VERBATIM_COMMAND_COUNT );
  137. m_nFrontBufferCommandCount = VjobChain4Buffer_t::VERBATIM_COMMAND_COUNT + 1;
  138. LWSYNC_PPU_ONLY();
  139. return eaCommand;
  140. }