You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
189 lines
5.5 KiB
189 lines
5.5 KiB
//========== Copyright © Valve Corporation, All rights reserved. ========
|
|
#if !defined( VJOBS_CHAINUTILS_HDR ) && defined( _PS3 )
|
|
#define VJOBS_CHAINUTILS_HDR
|
|
|
|
#include "tier0/platform.h"
|
|
#include <cell/spurs.h>
|
|
#include "ps3/job_notify.h"
|
|
struct VJobsRoot;
|
|
|
|
//
|
|
// The chain consists of blocks of commands; the head block has SYNC-JOB(notify)-GUARD sequence
|
|
// initially the chain waits on the GUARD , and is ready to "run". "Run" means releasing the guard.
|
|
// I'm using GUARD instead of JTS because patching JTS will render it unpatcheable until jobchain execution completes;
|
|
// and if it's unpatcheable, it effectively can't be used as a guard for the next cycle
|
|
// Each block refers to the next one by inserting NEXT in the very last slot
|
|
// the last block is pointed to by m_pLastBlock, which is NULL initially (before entering the "run" state)
|
|
//
|
|
//
|
|
struct ALIGN128 VjobChain
|
|
{
|
|
public:
|
|
cell::Spurs::JobChain m_spursJobChain;
|
|
cell::Spurs::JobGuard m_guard;
|
|
CellSpursJob64 m_jobNotify;
|
|
job_notify::NotifyArea_t m_notifyArea;
|
|
|
|
enum { BLOCK_COMMANDS = 256 };
|
|
uint64 m_headBlock[BLOCK_COMMANDS+1]; // in one variant, the first entry in this list is used for END command; overwrite it with NOP to release the list
|
|
uint64 * m_pLastBlock;
|
|
uint m_nCurrentBlockCommands;
|
|
uint m_nSpinWaitNotify;
|
|
|
|
char m_name[16];
|
|
public:
|
|
int Init( VJobsRoot * pRoot, uint nMaxContention, const char* pFormatName, ... );
|
|
bool IsRunning()const { return m_pLastBlock != NULL; }
|
|
|
|
int Run();
|
|
void Push( uint64 nCommand );
|
|
void Push( const uint64 * nCommands, uint nCommandCount );
|
|
int End( );
|
|
int Join();
|
|
void Shutdown();
|
|
|
|
JobChain & Jobchain() { return m_spursJobChain; }
|
|
|
|
}ALIGN128_POST;
|
|
|
|
|
|
// VjobChain2 hosts 2 jobchains and double-buffers between them
|
|
class VjobChain2
|
|
{
|
|
public:
|
|
int Init( VJobsRoot * pRoot, uint nMaxContention, const char* pName );
|
|
void Begin();
|
|
void End();
|
|
void Shutdown();
|
|
|
|
VjobChain& Jobchain(){ return m_vjobChainRing[m_nCurrentChain]; }
|
|
|
|
protected:
|
|
enum{VJOB_CHAINS = 2};
|
|
VjobChain *m_vjobChainRing; // may be more than double-buffered if necessary
|
|
uint m_nCurrentChain;
|
|
};
|
|
|
|
|
|
//#define VJOBCHAIN3_GUARD
|
|
|
|
struct ALIGN128 VjobBufferHeader_t
|
|
{
|
|
public:
|
|
#ifdef VJOBCHAIN3_GUARD
|
|
cell::Spurs::JobGuard m_guard;
|
|
#endif
|
|
CellSpursJob64 m_jobNotify;
|
|
job_notify::NotifyArea_t m_notifyArea;
|
|
#ifdef _DEBUG
|
|
CellSpursJob64 m_jobNotify2;
|
|
job_notify::NotifyArea_t m_notifyArea2;
|
|
#endif
|
|
}
|
|
ALIGN128_POST;
|
|
|
|
struct VjobBuffer_t: public VjobBufferHeader_t
|
|
{
|
|
public:
|
|
enum ConstEnum_t
|
|
{
|
|
VERBATIM_COMMAND_COUNT = 2 // we employ syncronization scheme: SYNC, JOB(notify), ...
|
|
#ifdef VJOBCHAIN3_GUARD // we add GUARD, ...
|
|
+ 1
|
|
#endif
|
|
#ifdef _DEBUG
|
|
+ 1 // we add JOB(notify2), ...
|
|
#else
|
|
|
|
#endif
|
|
};
|
|
uint64 m_spursCommands[16]; // there will be at least verbatim commands, a user command, and a NEXT
|
|
|
|
void Init( VJobsRoot * pRoot, cell::Spurs::JobChain * pSpursJobChain );
|
|
};
|
|
|
|
// VjobChain3 has only 1 jobchain but double-buffers it to facilitate continuous wait-free execution
|
|
class VjobChain3
|
|
{
|
|
protected:
|
|
enum ConstEnum_t {
|
|
BUFFER_COUNT = 4
|
|
};
|
|
cell::Spurs::JobChain *m_pSpursJobChain;
|
|
VjobBuffer_t * m_pBuffers[BUFFER_COUNT];
|
|
VjobBuffer_t * m_pFrontBuffer;
|
|
uint m_nFrontBuffer; // the buffer currently in use
|
|
|
|
uint m_nMaxCommandsPerBuffer; // max count of commands fitting into one buffer
|
|
uint m_nFrontBufferCommandCount; // count of commands in the current front buffer
|
|
uint m_nSpinWaitNotify; // did we spin waiting for job_notify ? if we did, we probably need to increase the command buffer size
|
|
uint64 m_nLastCommandPushed; // at the beginning of the scene, it's considered to be synced up
|
|
|
|
|
|
const char * m_pName;
|
|
|
|
public:
|
|
|
|
int Init( VJobsRoot * pRoot, uint nMaxContention, uint nMinCommandsPerBuffer, uint8_t nVjobChainPriority[8], const char* pName, uint nDmaTags );
|
|
uint64* Push( uint64 nCommand );
|
|
uint64* PushSyncJobSync( uint64 nCommand );
|
|
void PushSync();
|
|
void Shutdown(){End();Join();}
|
|
void End();
|
|
void Join();
|
|
|
|
protected:
|
|
void WaitForEntryNotify( VjobBuffer_t * pBuffer );
|
|
uint64* StartCommandBuffer( uint nNext1Buffer, uint64 nInsertCommand );
|
|
uint64* SwapCommandBuffer( uint64 nInsertCommand );
|
|
};
|
|
|
|
|
|
|
|
inline uint64* VjobChain3::Push( uint64 nCommand )
|
|
{
|
|
uint64 * pInsertionPoint;
|
|
if( m_nFrontBufferCommandCount == m_nMaxCommandsPerBuffer - 1 )
|
|
{
|
|
// time to switch the buffer
|
|
pInsertionPoint = SwapCommandBuffer( nCommand );
|
|
}
|
|
else
|
|
{
|
|
m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount + 1 ] = CELL_SPURS_JOB_COMMAND_JTS;
|
|
__lwsync(); // Important: this sync ensures that both the command header AND JTS are written before SPU sees them
|
|
pInsertionPoint = &m_pFrontBuffer->m_spursCommands[ m_nFrontBufferCommandCount ];
|
|
*pInsertionPoint = nCommand;
|
|
m_nFrontBufferCommandCount ++;
|
|
}
|
|
m_nLastCommandPushed = nCommand;
|
|
return pInsertionPoint;
|
|
}
|
|
|
|
|
|
inline void VjobChain3::PushSync()
|
|
{
|
|
Push( CELL_SPURS_JOB_COMMAND_LWSYNC );
|
|
}
|
|
|
|
|
|
inline uint64* VjobChain3::PushSyncJobSync( uint64 nCommand )
|
|
{
|
|
if( m_nLastCommandPushed != CELL_SPURS_JOB_COMMAND_LWSYNC )
|
|
{
|
|
// we need to wait for previous jobs to finish in order to patch the state efficiently
|
|
// todo: double-buffer the states to avoid stalls, but only if we become SPU-bound here (un
|
|
Push( CELL_SPURS_JOB_COMMAND_LWSYNC );
|
|
}
|
|
uint64 * pInsertionPoint = Push( nCommand );
|
|
|
|
// this is instead of stalling successor because I'm not sure if it stalls all logical successors (some of which may be picked up by other SPUs)
|
|
// the SYNC here will ensure completion of the previous job before the new jobs will be pushed
|
|
Push( CELL_SPURS_JOB_COMMAND_LWSYNC );
|
|
|
|
return pInsertionPoint;
|
|
}
|
|
|
|
|
|
|
|
#endif
|