//===== Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ======// // // Purpose: // // $NoKeywords: $ // //===========================================================================// #ifndef DYNAMICVB_H #define DYNAMICVB_H #ifdef _WIN32 #pragma once #endif #include "shaderapidx8_global.h" ///////////////////////////// // D. Sim Dietrich Jr. // sim.dietrich@nvidia.com ////////////////////// // Helper function to unbind an vertex buffer void Unbind( IDirect3DVertexBuffer9 *pVertexBuffer ); #define X360_VERTEX_BUFFER_SIZE_MULTIPLIER 2.0 //minimum of 1, only affects dynamic buffers //#define X360_BLOCK_ON_VB_FLUSH //uncomment to block until all data is consumed when a flush is requested. Otherwise we only block when absolutely necessary //#define SPEW_VERTEX_BUFFER_STALLS //uncomment to allow buffer stall spewing. #define MB (1024.0f*1024.0f) class CVertexBuffer { public: CVertexBuffer( D3DDeviceWrapper * pD3D, VertexFormat_t fmt, DWORD theFVF, int vertexSize, int theVertexCount, const char *pTextureBudgetName, bool bSoftwareVertexProcessing, bool dynamic = false ); #ifdef _GAMECONSOLE CVertexBuffer(); void Init( D3DDeviceWrapper * pD3D, VertexFormat_t fmt, DWORD theFVF, uint8 *pVertexData, int vertexSize, int theVertexCount ); #endif ~CVertexBuffer(); LPDIRECT3DVERTEXBUFFER GetInterface() const { // If this buffer still exists, then Late Creation didn't happen. Best case: we'll render the wrong image. Worst case: Crash. Assert( !m_pSysmemBuffer ); return m_pVB; } // Use at beginning of frame to force a flush of VB contents on first draw void FlushAtFrameStart() { m_bFlush = true; } // lock, unlock unsigned char* Lock( int numVerts, int& baseVertexIndex ); unsigned char* Modify( bool bReadOnly, int firstVertex, int numVerts ); void Unlock( int numVerts ); void HandleLateCreation( ); // Vertex size int VertexSize() const { return m_VertexSize; } // Vertex count int VertexCount() const { return m_VertexCount; } #ifdef _X360 // For some VBs, memory allocation is managed by CGPUBufferAllocator, via ShaderAPI const GPUBufferHandle_t *GetBufferAllocationHandle( void ); void SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle ); bool IsPooled( void ) { creturn m_GPUBufferHandle.IsValid(); } // Expose the data pointer for read-only CPU access to the data // (double-indirection supports relocation of the data by CGPUBufferAllocator) const byte **GetBufferDataPointerAddress( void ); #endif // _X360 static int BufferCount() { #ifdef _DEBUG return s_BufferCount; #else return 0; #endif } // UID unsigned int UID() const { #ifdef RECORDING return m_UID; #else return 0; #endif } void HandlePerFrameTextureStats( int frame ) { #ifdef VPROF_ENABLED if ( m_Frame != frame && !m_bDynamic ) { m_Frame = frame; m_pFrameCounter += m_nBufferSize; } #endif } // Do we have enough room without discarding? bool HasEnoughRoom( int numVertices ) const; // Is this dynamic? bool IsDynamic() const { return m_bDynamic; } bool IsExternal() const { return m_bExternalMemory; } // Block until this part of the vertex buffer is free void BlockUntilUnused( int nBufferSize ); // used to alter the characteristics after creation // allows one dynamic vb to be shared for multiple formats void ChangeConfiguration( int vertexSize, int totalSize ) { Assert( m_bDynamic && !m_bLocked && vertexSize ); m_VertexSize = vertexSize; m_VertexCount = m_nBufferSize / vertexSize; } // Compute the next offset for the next lock int NextLockOffset( ) const; // Returns the allocated size int AllocationSize() const; // Returns the number of vertices we have enough room for int NumVerticesUntilFlush() const { #if defined( _X360 ) if( m_AllocationRing.Count() ) { //Cycle through the ring buffer and see what memory is free now int iNode = m_AllocationRing.Head(); while( m_AllocationRing.IsValidIndex( iNode ) ) { if( Dx9Device()->IsFencePending( m_AllocationRing[iNode].m_Fence ) ) break; iNode = m_AllocationRing.Next( iNode ); } if( m_AllocationRing.IsValidIndex( iNode ) ) { int iEndFreeOffset = m_AllocationRing[iNode].m_iEndOffset; if( iEndFreeOffset < m_Position ) { //Wrapped. Making the arbitrary decision that the return value for this function *should* handle the singe giant allocation case which requires contiguous memory if( iEndFreeOffset > (m_iNextBlockingPosition - m_Position) ) return iEndFreeOffset / m_VertexSize; else return (m_iNextBlockingPosition - m_Position) / m_VertexSize; } } else { //we didn't block on any fence return m_VertexCount; } } return m_VertexCount; #else return (m_nBufferSize - NextLockOffset()) / m_VertexSize; #endif } // Marks a fence indicating when this buffer was used void MarkUsedInRendering() { #ifdef _X360 if ( m_bDynamic && m_pVB ) { Assert( m_AllocationRing.Count() > 0 ); m_AllocationRing[m_AllocationRing.Tail()].m_Fence = Dx9Device()->GetCurrentFence(); } #endif } private: void Create( D3DDeviceWrapper *pD3D ); inline void ReallyUnlock( int unlockBytes ) { #if DX_TO_GL_ABSTRACTION // Knowing how much data was actually written is critical for performance under OpenGL. #if SHADERAPI_NO_D3DDeviceWrapper m_pVB->UnlockActualSize( unlockBytes ); #else Dx9Device()->UnlockActualSize( m_pVB, unlockBytes ); #endif #else unlockBytes; // Unused here #if SHADERAPI_NO_D3DDeviceWrapper m_pVB->Unlock(); #else Dx9Device()->Unlock( m_pVB ); #endif #endif } enum LOCK_FLAGS { LOCKFLAGS_FLUSH = D3DLOCK_NOSYSLOCK | D3DLOCK_DISCARD, #if !defined( _X360 ) LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK | D3DLOCK_NOOVERWRITE #else // X360BUG: forcing all locks to gpu flush, otherwise bizarre mesh corruption on decals // Currently iterating with microsoft 360 support to track source of gpu corruption LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK #endif }; LPDIRECT3DVERTEXBUFFER m_pVB; #ifdef _X360 struct DynamicBufferAllocation_t { DWORD m_Fence; //track whether this memory is safe to use again. int m_iStartOffset; int m_iEndOffset; unsigned int m_iZPassIdx; // The zpass during which this allocation was made }; int m_iNextBlockingPosition; // m_iNextBlockingPosition >= m_Position where another allocation is still in use. unsigned char *m_pAllocatedMemory; int m_iAllocationSize; //Total size of the ring buffer, usually more than what was asked for IDirect3DVertexBuffer9 m_D3DVertexBuffer; //Only need one shared D3D header for our usage patterns. CUtlLinkedList m_AllocationRing; //tracks what chunks of our memory are potentially still in use by D3D #endif VertexFormat_t m_VertexBufferFormat; // yes, Vertex, only used for allocation tracking int m_nBufferSize; int m_Position; int m_VertexCount; int m_VertexSize; DWORD m_TheFVF; byte *m_pSysmemBuffer; int m_nSysmemBufferStartBytes; uint m_nLockCount; unsigned char m_bDynamic : 1; unsigned char m_bLocked : 1; unsigned char m_bFlush : 1; unsigned char m_bExternalMemory : 1; unsigned char m_bSoftwareVertexProcessing : 1; unsigned char m_bLateCreateShouldDiscard : 1; #ifdef VPROF_ENABLED int m_Frame; int *m_pFrameCounter; int *m_pGlobalCounter; #endif #ifdef _DEBUG static int s_BufferCount; #endif #ifdef RECORDING unsigned int m_UID; #endif }; #if defined( _X360 ) #include "UtlMap.h" MEMALLOC_DEFINE_EXTERNAL_TRACKING( XMem_CVertexBuffer ); #endif //----------------------------------------------------------------------------- // constructor, destructor //----------------------------------------------------------------------------- inline CVertexBuffer::CVertexBuffer(D3DDeviceWrapper * pD3D, VertexFormat_t fmt, DWORD theFVF, int vertexSize, int vertexCount, const char *pTextureBudgetName, bool bSoftwareVertexProcessing, bool dynamic ) : m_pVB(0), m_Position(0), m_VertexSize(vertexSize), m_VertexCount(vertexCount), m_bFlush(true), m_bLocked(false), m_bExternalMemory( false ), m_nBufferSize(vertexSize * vertexCount), m_TheFVF( theFVF ), m_bSoftwareVertexProcessing( bSoftwareVertexProcessing ), m_bDynamic(dynamic), m_VertexBufferFormat( fmt ), m_bLateCreateShouldDiscard( false ) #ifdef _X360 ,m_pAllocatedMemory(NULL) ,m_iNextBlockingPosition(0) ,m_iAllocationSize(0) #endif #ifdef VPROF_ENABLED ,m_Frame( -1 ) #endif { MEM_ALLOC_CREDIT_( pTextureBudgetName ); #ifdef RECORDING // assign a UID static unsigned int uid = 0; m_UID = uid++; #endif #ifdef _DEBUG ++s_BufferCount; #endif #ifdef VPROF_ENABLED if ( !m_bDynamic ) { char name[256]; V_strcpy_safe( name, "TexGroup_global_" ); V_strcat_safe( name, pTextureBudgetName, sizeof(name) ); m_pGlobalCounter = g_VProfCurrentProfile.FindOrCreateCounter( name, COUNTER_GROUP_TEXTURE_GLOBAL ); V_strcpy_safe( name, "TexGroup_frame_" ); V_strcat_safe( name, pTextureBudgetName, sizeof(name) ); m_pFrameCounter = g_VProfCurrentProfile.FindOrCreateCounter( name, COUNTER_GROUP_TEXTURE_PER_FRAME ); } else { m_pGlobalCounter = g_VProfCurrentProfile.FindOrCreateCounter( "TexGroup_global_" TEXTURE_GROUP_DYNAMIC_VERTEX_BUFFER, COUNTER_GROUP_TEXTURE_GLOBAL ); } #endif if ( !g_pShaderUtil->IsRenderThreadSafe() ) { m_pSysmemBuffer = ( byte * )malloc( m_nBufferSize ); m_nSysmemBufferStartBytes = 0; } else { m_pSysmemBuffer = NULL; Create( pD3D ); } #ifdef VPROF_ENABLED if ( IsX360() || !m_bDynamic ) { Assert( m_pGlobalCounter ); *m_pGlobalCounter += m_nBufferSize; } #endif } void CVertexBuffer::Create( D3DDeviceWrapper *pD3D ) { D3DVERTEXBUFFER_DESC desc; memset( &desc, 0x00, sizeof( desc ) ); desc.Format = D3DFMT_VERTEXDATA; desc.Size = m_nBufferSize; desc.Type = D3DRTYPE_VERTEXBUFFER; desc.Pool = m_bDynamic ? D3DPOOL_DEFAULT : D3DPOOL_MANAGED; desc.FVF = m_TheFVF; #if defined( IS_WINDOWS_PC ) && defined( SHADERAPIDX9 ) && 0 // this may not be supported on all platforms extern bool g_ShaderDeviceUsingD3D9Ex; if ( g_ShaderDeviceUsingD3D9Ex ) { desc.Pool = D3DPOOL_DEFAULT; } #endif desc.Usage = D3DUSAGE_WRITEONLY; if ( m_bDynamic ) { desc.Usage |= D3DUSAGE_DYNAMIC; // Dynamic meshes should never be compressed (slows down writing to them) Assert( CompressionType( m_TheFVF ) == VERTEX_COMPRESSION_NONE ); } if ( m_bSoftwareVertexProcessing ) { desc.Usage |= D3DUSAGE_SOFTWAREPROCESSING; } #if !defined( _X360 ) RECORD_COMMAND( DX8_CREATE_VERTEX_BUFFER, 6 ); RECORD_INT( m_UID ); RECORD_INT( m_nBufferSize ); RECORD_INT( desc.Usage ); RECORD_INT( desc.FVF ); RECORD_INT( desc.Pool ); RECORD_INT( m_bDynamic ); HRESULT hr = pD3D->CreateVertexBuffer( m_nBufferSize, desc.Usage, desc.FVF, desc.Pool, &m_pVB, NULL ); if ( hr == D3DERR_OUTOFVIDEOMEMORY || hr == E_OUTOFMEMORY ) { // Don't have the memory for this. Try flushing all managed resources // out of vid mem and try again. // FIXME: need to record this pD3D->EvictManagedResources(); pD3D->CreateVertexBuffer( m_nBufferSize, desc.Usage, desc.FVF, desc.Pool, &m_pVB, NULL ); } #ifdef _DEBUG if ( hr != D3D_OK ) { switch ( hr ) { case D3DERR_INVALIDCALL: Assert( !"D3DERR_INVALIDCALL" ); break; case D3DERR_OUTOFVIDEOMEMORY: Assert( !"D3DERR_OUTOFVIDEOMEMORY" ); break; case E_OUTOFMEMORY: Assert( !"E_OUTOFMEMORY" ); break; default: Assert( 0 ); break; } } #endif Assert( m_pVB ); #else // _X360 if ( m_bDynamic ) { m_iAllocationSize = m_nBufferSize * X360_VERTEX_BUFFER_SIZE_MULTIPLIER; Assert( m_iAllocationSize >= m_nBufferSize ); m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationSize, MAXULONG_PTR, 0, PAGE_READWRITE | MEM_LARGE_PAGES | PAGE_WRITECOMBINE ); } else { // Fall back to allocating a standalone VB // NOTE: write-combining (PAGE_WRITECOMBINE) is deliberately not used, since it slows down CPU access to the data (decals+defragmentation) m_iAllocationSize = m_nBufferSize; m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationSize, MAXULONG_PTR, 0, PAGE_READWRITE ); } if ( m_pAllocatedMemory ) { MemAlloc_RegisterExternalAllocation( XMem_CVertexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) ); } else { size_t nUsedMemory, nFreeMemory; g_pMemAlloc->GlobalMemoryStatus( &nUsedMemory, &nFreeMemory ); Warning( "Failed to XPhysicalAlloc %.2fmb %s vertex buffer, with %.2fmb total memory free - memory is either exhausted or fragmented!\n", m_iAllocationSize/MB, m_bDynamic?"dynamic":"static", nFreeMemory/MB ); Assert( m_pAllocatedMemory ); // If this assert fires, we're probably out of memory } m_iNextBlockingPosition = m_iAllocationSize; #endif #ifdef MEASURE_DRIVER_ALLOCATIONS int nMemUsed = 1024; VPROF_INCREMENT_GROUP_COUNTER( "vb count", COUNTER_GROUP_NO_RESET, 1 ); VPROF_INCREMENT_GROUP_COUNTER( "vb driver mem", COUNTER_GROUP_NO_RESET, nMemUsed ); VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, nMemUsed ); #endif // Track VB allocations #if !defined( _X360 ) g_VBAllocTracker->CountVB( m_pVB, m_bDynamic, m_nBufferSize, m_VertexSize, m_VertexBufferFormat ); #else g_VBAllocTracker->CountVB( this, m_bDynamic, m_iAllocationSize, m_VertexSize, m_VertexBufferFormat ); #endif } #ifdef _GAMECONSOLE void *AllocateTempBuffer( size_t nSizeInBytes ); //----------------------------------------------------------------------------- // This variant is for when we already have the data in physical memory //----------------------------------------------------------------------------- inline CVertexBuffer::CVertexBuffer( ) : m_pVB( 0 ), m_Position( 0 ), m_VertexSize( 0 ), m_VertexCount( 0 ), m_bFlush( false ), m_bLocked( false ), m_bExternalMemory( true ), m_nBufferSize( 0 ), m_bDynamic( false ) #ifdef VPROF_ENABLED ,m_Frame( -1 ) #endif { #ifdef _X360 m_iAllocationSize = 0; m_pAllocatedMemory = 0; m_iNextBlockingPosition = 0; #endif } #include "tier0/memdbgoff.h" inline void CVertexBuffer::Init( D3DDeviceWrapper *pD3D, VertexFormat_t fmt, DWORD theFVF, uint8 *pVertexData, int vertexSize, int vertexCount ) { m_nBufferSize = vertexSize * vertexCount; m_Position = m_nBufferSize; m_VertexSize = vertexSize; m_VertexCount = vertexCount; #ifdef _X360 m_iAllocationSize = m_nBufferSize; m_pAllocatedMemory = pVertexData; m_iNextBlockingPosition = m_iAllocationSize; #endif m_pVB = new( AllocateTempBuffer( sizeof( IDirect3DVertexBuffer9 ) ) ) IDirect3DVertexBuffer9; #ifdef _X360 XGSetVertexBufferHeader( m_nBufferSize, 0, 0, 0, m_pVB ); XGOffsetResourceAddress( m_pVB, pVertexData ); #elif _PS3 memset( &m_pVB->m_vtxDesc, 0, sizeof(m_pVB->m_vtxDesc) ); // m_pVB->m_vtxDesc.Format = ?; m_pVB->m_vtxDesc.Type = D3DRTYPE_VERTEXBUFFER; m_pVB->m_vtxDesc.Usage = D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC; m_pVB->m_vtxDesc.Pool = D3DPOOL_DEFAULT; m_pVB->m_vtxDesc.Size = m_nBufferSize; // m_pVB->m_vtxDesc.FVF = theFVF; m_pVB->m_pBuffer = new( AllocateTempBuffer( sizeof( CPs3gcmBuffer ) ) ) CPs3gcmBuffer; m_pVB->m_pBuffer->m_lmBlock.AttachToExternalMemory( kAllocPs3GcmVertexBufferDynamic, ( uintp )pVertexData - ( uintp )g_ps3gcmGlobalState.m_pLocalBaseAddress, m_nBufferSize ); #endif } #include "tier0/memdbgon.h" #endif // _X360 inline CVertexBuffer::~CVertexBuffer() { // Track VB allocations #if !defined( _X360 ) if ( m_pVB != NULL ) { g_VBAllocTracker->UnCountVB( m_pVB ); } #else if ( m_pVB && m_pVB->IsSet( Dx9Device() ) ) { Unbind( m_pVB ); } if ( !m_bExternalMemory ) { g_VBAllocTracker->UnCountVB( m_pAllocatedMemory ); } #endif if ( !m_bExternalMemory ) { #ifdef MEASURE_DRIVER_ALLOCATIONS int nMemUsed = 1024; VPROF_INCREMENT_GROUP_COUNTER( "vb count", COUNTER_GROUP_NO_RESET, -1 ); VPROF_INCREMENT_GROUP_COUNTER( "vb driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed ); VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed ); #endif #ifdef VPROF_ENABLED if ( IsX360() || !m_bDynamic ) { Assert( m_pGlobalCounter ); *m_pGlobalCounter -= m_nBufferSize; } #endif #ifdef _DEBUG --s_BufferCount; #endif } Unlock( 0 ); if ( m_pSysmemBuffer ) { free( m_pSysmemBuffer ); m_pSysmemBuffer = NULL; } #if !defined( _X360 ) if ( m_pVB && !m_bExternalMemory ) { RECORD_COMMAND( DX8_DESTROY_VERTEX_BUFFER, 1 ); RECORD_INT( m_UID ); #if SHADERAPI_NO_D3DDeviceWrapper m_pVB->Release(); #else Dx9Device()->Release( m_pVB ); #endif } #else if ( m_pAllocatedMemory && !m_bExternalMemory ) { MemAlloc_RegisterExternalDeallocation( XMem_CVertexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) ); XPhysicalFree( m_pAllocatedMemory ); } m_pAllocatedMemory = NULL; m_pVB = NULL; #endif } //----------------------------------------------------------------------------- // Compute the next offset for the next lock //----------------------------------------------------------------------------- inline int CVertexBuffer::NextLockOffset( ) const { #if !defined( _X360 ) int nNextOffset = ( m_Position + m_VertexSize - 1 ) / m_VertexSize; nNextOffset *= m_VertexSize; return nNextOffset; #else return m_Position; //position is already aligned properly on unlocks for 360. #endif } //----------------------------------------------------------------------------- // Do we have enough room without discarding? //----------------------------------------------------------------------------- inline bool CVertexBuffer::HasEnoughRoom( int numVertices ) const { #if defined( _X360 ) return numVertices <= m_VertexCount; //the ring buffer will free room as needed #elif defined( _PS3 ) // Dynamic VBs are used with dynamic IBs that use 16-bit indices, so do not ever let this number exceed 65536 int nMaxVertexCount = m_bDynamic ? MIN( 65536, m_VertexCount ) : m_VertexCount; return ( NextLockOffset() / m_VertexSize + numVertices ) <= nMaxVertexCount; #else return (NextLockOffset() + (numVertices * m_VertexSize)) <= m_nBufferSize; #endif } //----------------------------------------------------------------------------- // Block until this part of the index buffer is free //----------------------------------------------------------------------------- inline void CVertexBuffer::BlockUntilUnused( int nBufferSize ) { Assert( nBufferSize <= m_nBufferSize ); #ifdef _X360 int nLockOffset = NextLockOffset(); Assert( (m_AllocationRing.Count() != 0) || ((m_Position == 0) && (m_iNextBlockingPosition == m_iAllocationSize)) ); if ( (m_iNextBlockingPosition - nLockOffset) >= nBufferSize ) return; Assert( (m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset == 0) || ((m_iNextBlockingPosition == m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset) && (m_Position <= m_iNextBlockingPosition)) ); int iMinBlockPosition = nLockOffset + nBufferSize; if( iMinBlockPosition > m_iAllocationSize ) { //Allocation requires us to wrap iMinBlockPosition = nBufferSize; m_Position = 0; //modify the last allocation so that it uses up the whole tail end of the buffer. Makes other code simpler Assert( m_AllocationRing.Count() != 0 ); m_AllocationRing[m_AllocationRing.Tail()].m_iEndOffset = m_iAllocationSize; //treat all allocations between the current position and the tail end of the ring as freed since they will be before we unblock while( m_AllocationRing.Count() ) { unsigned int head = m_AllocationRing.Head(); if( m_AllocationRing[head].m_iStartOffset == 0 ) break; m_AllocationRing.Remove( head ); } } //now we go through the allocations until we find the last fence we care about. Treat everything up until that fence as freed. DWORD FinalFence = 0; unsigned int iFinalAllocationZPassIdx = 0; while( m_AllocationRing.Count() ) { unsigned int head = m_AllocationRing.Head(); if( m_AllocationRing[head].m_iEndOffset >= iMinBlockPosition ) { //When this frees, we'll finally have enough space for the allocation FinalFence = m_AllocationRing[head].m_Fence; iFinalAllocationZPassIdx = m_AllocationRing[head].m_iZPassIdx; m_iNextBlockingPosition = m_AllocationRing[head].m_iEndOffset; m_AllocationRing.Remove( head ); break; } m_AllocationRing.Remove( head ); } Assert( FinalFence != 0 ); if( Dx9Device()->IsFencePending( FinalFence ) ) { #ifdef SPEW_VERTEX_BUFFER_STALLS float st = Plat_FloatTime(); #endif if ( ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) && ( iFinalAllocationZPassIdx == ShaderAPI()->GetConsoleZPassCounter() ) ) { // We're about to overrun our VB ringbuffer in a single Z prepass. To avoid rendering corruption, close out the // Z prepass and continue. This will reduce early-Z rejection efficiency and could cause a momentary framerate drop, // but it's better than rendering corruption. Warning( "Dynamic VB ring buffer overrun in Z Prepass. Tell Thorsten.\n" ); ShaderAPI()->EndConsoleZPass(); } Dx9Device()->BlockOnFence( FinalFence ); #ifdef SPEW_VERTEX_BUFFER_STALLS float dt = Plat_FloatTime() - st; Warning( "Blocked locking dynamic vertex buffer for %f ms!\n", 1000.0 * dt ); #endif } #endif } //----------------------------------------------------------------------------- // lock, unlock //----------------------------------------------------------------------------- inline unsigned char* CVertexBuffer::Lock( int numVerts, int& baseVertexIndex ) { #if defined( _X360 ) if ( m_pVB && m_pVB->IsSet( Dx9Device() ) ) { Unbind( m_pVB ); } #endif m_nLockCount = numVerts; unsigned char* pLockedData = 0; baseVertexIndex = 0; int nBufferSize = numVerts * m_VertexSize; Assert( IsPC() || ( IsGameConsole() && !m_bLocked ) ); // Ensure there is enough space in the VB for this data if ( numVerts > m_VertexCount ) { Assert( 0 ); return 0; } if ( !IsX360() && !m_pVB && !m_pSysmemBuffer ) return 0; DWORD dwFlags; if ( m_bDynamic ) { dwFlags = LOCKFLAGS_APPEND; #if !defined( _X360 ) // If either the user forced us to flush, // or there is not enough space for the vertex data, // then flush the buffer contents if ( !m_Position || m_bFlush || !HasEnoughRoom(numVerts) ) { if ( m_pSysmemBuffer || !g_pShaderUtil->IsRenderThreadSafe() ) m_bLateCreateShouldDiscard = true; m_bFlush = false; m_Position = 0; dwFlags = LOCKFLAGS_FLUSH; } #else if( m_bFlush ) { # if ( defined( X360_BLOCK_ON_VB_FLUSH ) ) { if( m_AllocationRing.Count() ) { DWORD FinalFence = m_AllocationRing[m_AllocationRing.Tail()].m_Fence; m_AllocationRing.RemoveAll(); m_Position = 0; m_iNextBlockingPosition = m_iAllocationSize; # if ( defined( SPEW_VERTEX_BUFFER_STALLS ) ) if( Dx9Device()->IsFencePending( FinalFence ) ) { float st = Plat_FloatTime(); # endif Dx9Device()->BlockOnFence( FinalFence ); # if ( defined ( SPEW_VERTEX_BUFFER_STALLS ) ) float dt = Plat_FloatTime() - st; Warning( "Blocked FLUSHING dynamic vertex buffer for %f ms!\n", 1000.0 * dt ); } # endif } } # endif m_bFlush = false; } #endif } else { // Since we are a static VB, always lock the beginning of the buffer. dwFlags = D3DLOCK_NOSYSLOCK; m_Position = 0; } if ( IsX360() && m_bDynamic ) { // Block until we have enough room in the buffer, this affects the result of NextLockOffset() in wrap conditions. BlockUntilUnused( nBufferSize ); m_pVB = NULL; } int nLockOffset = NextLockOffset( ); RECORD_COMMAND( DX8_LOCK_VERTEX_BUFFER, 4 ); RECORD_INT( m_UID ); RECORD_INT( nLockOffset ); RECORD_INT( nBufferSize ); RECORD_INT( dwFlags ); #if !defined( _X360 ) // If the caller isn't in the thread that owns the render lock, need to return a system memory pointer--cannot talk to GL from // the non-current thread. if ( !m_pSysmemBuffer && !g_pShaderUtil->IsRenderThreadSafe() ) { m_pSysmemBuffer = ( byte * )malloc( m_nBufferSize ); m_nSysmemBufferStartBytes = nLockOffset; Assert( ( m_nSysmemBufferStartBytes % m_VertexSize ) == 0 ); } if ( m_pSysmemBuffer != NULL ) { // Ensure that we're never moving backwards in a buffer--this code would need to be rewritten if so. // We theorize this can happen if you hit the end of a buffer and then wrap before drawing--but // this would probably break in other places as well. Assert( nLockOffset >= m_nSysmemBufferStartBytes ); pLockedData = m_pSysmemBuffer + nLockOffset; } else { #if SHADERAPI_NO_D3DDeviceWrapper m_pVB->Lock( nLockOffset, nBufferSize, reinterpret_cast< void** >( &pLockedData ), dwFlags ); #else Dx9Device()->Lock( m_pVB, nLockOffset, nBufferSize, reinterpret_cast< void** >( &pLockedData ), dwFlags ); #endif } #else pLockedData = m_pAllocatedMemory + nLockOffset; #endif Assert( pLockedData != 0 ); m_bLocked = true; if ( !IsX360() ) { baseVertexIndex = nLockOffset / m_VertexSize; } else { baseVertexIndex = 0; } return pLockedData; } inline unsigned char* CVertexBuffer::Modify( bool bReadOnly, int firstVertex, int numVerts ) { unsigned char* pLockedData = 0; // D3D still returns a pointer when you call lock with 0 verts, so just in // case it's actually doing something, don't even try to lock the buffer with 0 verts. if ( numVerts == 0 ) return NULL; m_nLockCount = numVerts; // If this hits, m_pSysmemBuffer logic needs to be added to this code. Assert( g_pShaderUtil->IsRenderThreadSafe() ); Assert( !m_pSysmemBuffer ); // if this hits, then we need to add code to handle it Assert( m_pVB && !m_bDynamic ); if ( firstVertex + numVerts > m_VertexCount ) { Assert( 0 ); return NULL; } DWORD dwFlags = D3DLOCK_NOSYSLOCK; if ( bReadOnly ) { dwFlags |= D3DLOCK_READONLY; } RECORD_COMMAND( DX8_LOCK_VERTEX_BUFFER, 4 ); RECORD_INT( m_UID ); RECORD_INT( firstVertex * m_VertexSize ); RECORD_INT( numVerts * m_VertexSize ); RECORD_INT( dwFlags ); // mmw: for forcing all dynamic... LOCKFLAGS_FLUSH ); #if !defined( _X360 ) #if SHADERAPI_NO_D3DDeviceWrapper m_pVB->Lock( firstVertex * m_VertexSize, numVerts * m_VertexSize, reinterpret_cast< void** >( &pLockedData ), dwFlags ); #else Dx9Device()->Lock( m_pVB, firstVertex * m_VertexSize, numVerts * m_VertexSize, reinterpret_cast< void** >( &pLockedData ), dwFlags ); #endif #else if ( m_pVB->IsSet( Dx9Device() ) ) { Unbind( m_pVB ); } pLockedData = m_pAllocatedMemory + (firstVertex * m_VertexSize); #endif m_Position = firstVertex * m_VertexSize; Assert( pLockedData != 0 ); m_bLocked = true; return pLockedData; } inline void CVertexBuffer::Unlock( int numVerts ) { if ( !m_bLocked ) return; if ( !IsX360() && !m_pVB && !m_pSysmemBuffer ) return; int nLockOffset = NextLockOffset(); int nBufferSize = numVerts * m_VertexSize; RECORD_COMMAND( DX8_UNLOCK_VERTEX_BUFFER, 1 ); RECORD_INT( m_UID ); #if !defined( _X360 ) if ( m_pSysmemBuffer != NULL ) { } else { #if DX_TO_GL_ABSTRACTION Assert( numVerts <= (int)m_nLockCount ); int unlockBytes = ( m_bDynamic ? nBufferSize : ( m_nLockCount * m_VertexSize ) ); #else int unlockBytes = 0; #endif ReallyUnlock( unlockBytes ); } m_Position = nLockOffset + nBufferSize; #else if ( m_bDynamic ) { if ( numVerts > 0 ) { DynamicBufferAllocation_t LockData; LockData.m_Fence = Dx9Device()->GetCurrentFence(); //This isn't the correct fence, but it's all we have access to for now and it'll provide marginal safety if something goes really wrong. LockData.m_iStartOffset = nLockOffset; LockData.m_iEndOffset = LockData.m_iStartOffset + nBufferSize; LockData.m_iZPassIdx = ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) ? ShaderAPI()->GetConsoleZPassCounter() : 0; // Round dynamic locks to 4k boundaries for GPU cache reasons LockData.m_iEndOffset = ALIGN_VALUE( LockData.m_iEndOffset, 4096 ); if( LockData.m_iEndOffset > m_iAllocationSize ) LockData.m_iEndOffset = m_iAllocationSize; m_AllocationRing.AddToTail( LockData ); m_Position = LockData.m_iEndOffset; void* pLockedData = m_pAllocatedMemory + LockData.m_iStartOffset; //Always re-use the same vertex buffer header based on the assumption that D3D copies it off in the draw calls. m_pVB = &m_D3DVertexBuffer; XGSetVertexBufferHeader( nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); XGOffsetResourceAddress( m_pVB, pLockedData ); // Invalidate the GPU caches for this memory. Dx9Device()->InvalidateGpuCache( pLockedData, nBufferSize, 0 ); } } else { if ( !m_pVB ) { m_pVB = &m_D3DVertexBuffer; XGSetVertexBufferHeader( m_nBufferSize, 0, D3DPOOL_DEFAULT, 0, m_pVB ); XGOffsetResourceAddress( m_pVB, m_pAllocatedMemory ); } m_Position = nLockOffset + nBufferSize; // Invalidate the GPU caches for this memory. Dx9Device()->InvalidateGpuCache( m_pAllocatedMemory, m_nBufferSize, 0 ); } #endif m_bLocked = false; } inline void CVertexBuffer::HandleLateCreation( ) { if ( !m_pSysmemBuffer ) { return; } if( !m_pVB ) { bool bPrior = g_VBAllocTracker->TrackMeshAllocations( "HandleLateCreation" ); Create( Dx9Device() ); if ( !bPrior ) { g_VBAllocTracker->TrackMeshAllocations( NULL ); } } void* pWritePtr = NULL; int dataToWriteBytes = m_bDynamic ? ( m_Position - m_nSysmemBufferStartBytes ) : ( m_nLockCount * m_VertexSize ); DWORD dwFlags = D3DLOCK_NOSYSLOCK; if ( m_bDynamic ) { dwFlags |= ( m_bLateCreateShouldDiscard ? D3DLOCK_DISCARD : D3DLOCK_NOOVERWRITE ); } // Always clear this. m_bLateCreateShouldDiscard = false; // If we've wrapped might as well transfer the whole VB if (dataToWriteBytes < 1) { dataToWriteBytes = m_VertexCount * VertexSize(); m_nSysmemBufferStartBytes = 0; } // Don't use the Lock function, it does a bunch of stuff we don't want. #if SHADERAPI_NO_D3DDeviceWrapper HRESULT hr = m_pVB->Lock( m_nSysmemBufferStartBytes, dataToWriteBytes, &pWritePtr, dwFlags); #else HRESULT hr = Dx9Device()->Lock( m_pVB, m_nSysmemBufferStartBytes, dataToWriteBytes, &pWritePtr, dwFlags ); #endif // If this fails we're about to crash. Consider skipping the update and leaving // m_pSysmemBuffer around to try again later. (For example in case of device loss) Assert( SUCCEEDED( hr ) ); hr; memcpy( pWritePtr, m_pSysmemBuffer + m_nSysmemBufferStartBytes, dataToWriteBytes ); ReallyUnlock( dataToWriteBytes ); free( m_pSysmemBuffer ); m_pSysmemBuffer = NULL; } // Returns the allocated size inline int CVertexBuffer::AllocationSize() const { #ifdef _X360 return m_iAllocationSize; #else return m_VertexCount * m_VertexSize; #endif } #endif // DYNAMICVB_H