//========== Copyright © Valve Corporation, All rights reserved. ======== #if !defined( VJOBS_CHAINUTILS_HDR ) && defined( _PS3 ) #define VJOBS_CHAINUTILS_HDR #include "tier0/platform.h" #include #include "ps3/job_notify.h" struct VJobsRoot; // // The chain consists of blocks of commands; the head block has SYNC-JOB(notify)-GUARD sequence // initially the chain waits on the GUARD , and is ready to "run". "Run" means releasing the guard. // I'm using GUARD instead of JTS because patching JTS will render it unpatcheable until jobchain execution completes; // and if it's unpatcheable, it effectively can't be used as a guard for the next cycle // Each block refers to the next one by inserting NEXT in the very last slot // the last block is pointed to by m_pLastBlock, which is NULL initially (before entering the "run" state) // // struct ALIGN128 VjobChain { public: cell::Spurs::JobChain m_spursJobChain; cell::Spurs::JobGuard m_guard; CellSpursJob64 m_jobNotify; job_notify::NotifyArea_t m_notifyArea; enum { BLOCK_COMMANDS = 256 }; uint64 m_headBlock[BLOCK_COMMANDS+1]; // in one variant, the first entry in this list is used for END command; overwrite it with NOP to release the list uint64 * m_pLastBlock; uint m_nCurrentBlockCommands; uint m_nSpinWaitNotify; char m_name[16]; public: int Init( VJobsRoot * pRoot, uint nMaxContention, const char* pFormatName, ... ); bool IsRunning()const { return m_pLastBlock != NULL; } int Run(); void Push( uint64 nCommand ); void Push( const uint64 * nCommands, uint nCommandCount ); int End( ); int Join(); void Shutdown(); JobChain & Jobchain() { return m_spursJobChain; } }ALIGN128_POST; // VjobChain2 hosts 2 jobchains and double-buffers between them class VjobChain2 { public: int Init( VJobsRoot * pRoot, uint nMaxContention, const char* pName ); void Begin(); void End(); void Shutdown(); VjobChain& Jobchain(){ return m_vjobChainRing[m_nCurrentChain]; } protected: enum{VJOB_CHAINS = 2}; VjobChain *m_vjobChainRing; // may be more than double-buffered if necessary uint m_nCurrentChain; }; //#define VJOBCHAIN3_GUARD struct ALIGN128 VjobBufferHeader_t { public: #ifdef VJOBCHAIN3_GUARD cell::Spurs::JobGuard m_guard; #endif CellSpursJob64 m_jobNotify; job_notify::NotifyArea_t m_notifyArea; #ifdef _DEBUG CellSpursJob64 m_jobNotify2; job_notify::NotifyArea_t m_notifyArea2; #endif } ALIGN128_POST; struct VjobBuffer_t: public VjobBufferHeader_t { public: enum ConstEnum_t { VERBATIM_COMMAND_COUNT = 2 // we employ syncronization scheme: SYNC, JOB(notify), ... #ifdef VJOBCHAIN3_GUARD // we add GUARD, ... + 1 #endif #ifdef _DEBUG + 1 // we add JOB(notify2), ... #else #endif }; uint64 m_spursCommands[16]; // there will be at least verbatim commands, a user command, and a NEXT void Init( VJobsRoot * pRoot, cell::Spurs::JobChain * pSpursJobChain ); }; // VjobChain3 has only 1 jobchain but double-buffers it to facilitate continuous wait-free execution class VjobChain3 { protected: enum ConstEnum_t { BUFFER_COUNT = 4 }; cell::Spurs::JobChain *m_pSpursJobChain; VjobBuffer_t * m_pBuffers[BUFFER_COUNT]; VjobBuffer_t * m_pFrontBuffer; uint m_nFrontBuffer; // the buffer currently in use uint m_nMaxCommandsPerBuffer; // max count of commands fitting into one buffer uint m_nFrontBufferCommandCount; // count of commands in the current front buffer uint m_nSpinWaitNotify; // did we spin waiting for job_notify ? if we did, we probably need to increase the command buffer size uint64 m_nLastCommandPushed; // at the beginning of the scene, it's considered to be synced up const char * m_pName; public: int Init( VJobsRoot * pRoot, uint nMaxContention, uint nMinCommandsPerBuffer, uint8_t nVjobChainPriority[8], const char* pName, uint nDmaTags ); uint64* Push( uint64 nCommand ); uint64* PushSyncJobSync( uint64 nCommand ); void PushSync(); void Shutdown(){End();Join();} void End(); void Join(); protected: void WaitForEntryNotify( VjobBuffer_t * pBuffer ); uint64* StartCommandBuffer( uint nNext1Buffer, uint64 nInsertCommand ); uint64* SwapCommandBuffer( uint64 nInsertCommand ); }; inline uint64* VjobChain3::Push( uint64 nCommand ) { uint64 * pInsertionPoint; if( m_nFrontBufferCommandCount == m_nMaxCommandsPerBuffer - 1 ) { // time to switch the buffer pInsertionPoint = SwapCommandBuffer( nCommand ); } else { m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount + 1 ] = CELL_SPURS_JOB_COMMAND_JTS; __lwsync(); // Important: this sync ensures that both the command header AND JTS are written before SPU sees them pInsertionPoint = &m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ]; *pInsertionPoint = nCommand; m_nFrontBufferCommandCount ++; } m_nLastCommandPushed = nCommand; return pInsertionPoint; } inline void VjobChain3::PushSync() { Push( CELL_SPURS_JOB_COMMAND_LWSYNC ); } inline uint64* VjobChain3::PushSyncJobSync( uint64 nCommand ) { if( m_nLastCommandPushed != CELL_SPURS_JOB_COMMAND_LWSYNC ) { // we need to wait for previous jobs to finish in order to patch the state efficiently // todo: double-buffer the states to avoid stalls, but only if we become SPU-bound here (un Push( CELL_SPURS_JOB_COMMAND_LWSYNC ); } uint64 * pInsertionPoint = Push( nCommand ); // this is instead of stalling successor because I'm not sure if it stalls all logical successors (some of which may be picked up by other SPUs) // the SYNC here will ensure completion of the previous job before the new jobs will be pushed Push( CELL_SPURS_JOB_COMMAND_LWSYNC ); return pInsertionPoint; } #endif