Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1056 lines
30 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. //===========================================================================//
  6. #ifndef DYNAMICIB_H
  7. #define DYNAMICIB_H
  8. #ifdef _WIN32
  9. #pragma once
  10. #endif
  11. #include "locald3dtypes.h"
  12. #include "recording.h"
  13. #include "shaderapidx8_global.h"
  14. #include "shaderapidx8.h"
  15. #include "shaderapi/ishaderutil.h"
  16. #include "materialsystem/ivballoctracker.h"
  17. #include "tier1/memstack.h"
  18. #include "gpubufferallocator.h"
  19. /////////////////////////////
  20. // D. Sim Dietrich Jr.
  21. // [email protected]
  22. /////////////////////////////
  23. #ifdef _WIN32
  24. #pragma warning (disable:4189)
  25. #endif
  26. #include "locald3dtypes.h"
  27. #include "tier1/strtools.h"
  28. #include "tier1/utlqueue.h"
  29. #include "tier0/memdbgon.h"
  30. // Helper function to unbind an index buffer
  31. void Unbind( IDirect3DIndexBuffer9 *pIndexBuffer );
  32. #define X360_INDEX_BUFFER_SIZE_MULTIPLIER 4.0 //minimum of 1, only affects dynamic buffers
  33. //#define X360_BLOCK_ON_IB_FLUSH //uncomment to block until all data is consumed when a flush is requested. Otherwise we only block when absolutely necessary
  34. #define SPEW_INDEX_BUFFER_STALLS //uncomment to allow buffer stall spewing.
  35. class CIndexBuffer
  36. {
  37. public:
  38. CIndexBuffer( IDirect3DDevice9 *pD3D, int count, bool bSoftwareVertexProcessing, bool dynamic = false );
  39. #ifdef _X360
  40. CIndexBuffer();
  41. void Init( IDirect3DDevice9 *pD3D, uint16 *pIndexMemory, int count );
  42. #endif
  43. int AddRef() { return ++m_nReferenceCount; }
  44. int Release()
  45. {
  46. int retVal = --m_nReferenceCount;
  47. if ( retVal == 0 )
  48. delete this;
  49. return retVal;
  50. }
  51. LPDIRECT3DINDEXBUFFER GetInterface() const
  52. {
  53. // If this buffer still exists, then Late Creation didn't happen. Best case: we'll render the wrong image. Worst case: Crash.
  54. Assert( !m_pSysmemBuffer );
  55. return m_pIB;
  56. }
  57. // Use at beginning of frame to force a flush of VB contents on first draw
  58. void FlushAtFrameStart() { m_bFlush = true; }
  59. // lock, unlock
  60. unsigned short *Lock( bool bReadOnly, int numIndices, int &startIndex, int startPosition = -1 );
  61. void Unlock( int numIndices );
  62. void HandleLateCreation( );
  63. // Index position
  64. int IndexPosition() const { return m_Position; }
  65. // Index size
  66. int IndexSize() const { return sizeof(unsigned short); }
  67. // Index count
  68. int IndexCount() const { return m_IndexCount; }
  69. #if _X360
  70. // For some IBs, memory allocation is managed by CGPUBufferAllocator, via ShaderAPI
  71. const GPUBufferHandle_t *GetBufferAllocationHandle( void );
  72. void SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle );
  73. bool IsPooled( void ) { return m_GPUBufferHandle.IsValid(); }
  74. // Expose the data pointer for read-only CPU access to the data
  75. // (double-indirection supports relocation of the data by CGPUBufferAllocator)
  76. const byte **GetBufferDataPointerAddress( void );
  77. #endif // _X360
  78. // Do we have enough room without discarding?
  79. bool HasEnoughRoom( int numIndices ) const;
  80. bool IsDynamic() const { return m_bDynamic; }
  81. bool IsExternal() const { return m_bExternalMemory; }
  82. // Block until there's a free portion of the buffer of this size, m_Position will be updated to point at where this section starts
  83. void BlockUntilUnused( int nAllocationSize );
  84. #ifdef CHECK_INDICES
  85. void UpdateShadowIndices( unsigned short *pData )
  86. {
  87. Assert( m_LockedStartIndex + m_LockedNumIndices <= m_NumIndices );
  88. memcpy( m_pShadowIndices + m_LockedStartIndex, pData, m_LockedNumIndices * IndexSize() );
  89. }
  90. unsigned short GetShadowIndex( int i )
  91. {
  92. Assert( i >= 0 && i < (int)m_NumIndices );
  93. return m_pShadowIndices[i];
  94. }
  95. #endif
  96. // UID
  97. unsigned int UID() const
  98. {
  99. #ifdef RECORDING
  100. return m_UID;
  101. #else
  102. return 0;
  103. #endif
  104. }
  105. void HandlePerFrameTextureStats( int frame )
  106. {
  107. #ifdef VPROF_ENABLED
  108. if ( m_Frame != frame && !m_bDynamic )
  109. {
  110. m_Frame = frame;
  111. VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_frame_" TEXTURE_GROUP_STATIC_INDEX_BUFFER,
  112. COUNTER_GROUP_TEXTURE_PER_FRAME, IndexCount() * IndexSize() );
  113. }
  114. #endif
  115. }
  116. static int BufferCount()
  117. {
  118. #ifdef _DEBUG
  119. return s_BufferCount;
  120. #else
  121. return 0;
  122. #endif
  123. }
  124. inline int AllocationSize() const;
  125. inline int AllocationCount() const;
  126. // Marks a fence indicating when this buffer was used
  127. void MarkUsedInRendering()
  128. {
  129. #ifdef _X360
  130. if ( m_bDynamic && m_pIB )
  131. {
  132. Assert( m_AllocationRing.Count() > 0 );
  133. m_AllocationRing[m_AllocationRing.Tail()].m_Fence = Dx9Device()->GetCurrentFence();
  134. }
  135. #endif
  136. }
  137. private :
  138. void Create( IDirect3DDevice9 *pD3D );
  139. inline void ReallyUnlock( int unlockBytes )
  140. {
  141. #if DX_TO_GL_ABSTRACTION
  142. // Knowing how much data was actually written is critical for performance under OpenGL.
  143. m_pIB->UnlockActualSize( unlockBytes );
  144. #else
  145. unlockBytes; // Unused here
  146. m_pIB->Unlock();
  147. #endif
  148. }
  149. enum LOCK_FLAGS
  150. {
  151. LOCKFLAGS_FLUSH = D3DLOCK_NOSYSLOCK | D3DLOCK_DISCARD,
  152. #if !defined( _X360 )
  153. LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK | D3DLOCK_NOOVERWRITE
  154. #else
  155. // X360BUG: forcing all locks to gpu flush, otherwise bizarre mesh corruption on decals
  156. // Currently iterating with microsoft 360 support to track source of gpu corruption
  157. LOCKFLAGS_APPEND = D3DLOCK_NOSYSLOCK
  158. #endif
  159. };
  160. LPDIRECT3DINDEXBUFFER m_pIB;
  161. #ifdef _X360
  162. struct DynamicBufferAllocation_t
  163. {
  164. DWORD m_Fence; //track whether this memory is safe to use again.
  165. int m_iStartOffset;
  166. int m_iEndOffset;
  167. unsigned int m_iZPassIdx; // The zpass during which this allocation was made
  168. };
  169. int m_iNextBlockingPosition; // m_iNextBlockingPosition >= m_Position where another allocation is still in use.
  170. unsigned char *m_pAllocatedMemory;
  171. int m_iAllocationCount; //The total number of indices the buffer we allocated can hold. Usually greater than the number of indices asked for
  172. IDirect3DIndexBuffer9 m_D3DIndexBuffer; //Only need one shared D3D header for our usage patterns.
  173. CUtlLinkedList<DynamicBufferAllocation_t> m_AllocationRing; //tracks what chunks of our memory are potentially still in use by D3D
  174. GPUBufferHandle_t m_GPUBufferHandle; // Handle to a memory allocation within a shared physical memory pool (see CGPUBufferAllocator)
  175. #endif
  176. int m_IndexCount;
  177. int m_Position;
  178. byte *m_pSysmemBuffer;
  179. int m_nSysmemBufferStartBytes;
  180. unsigned char m_bLocked : 1;
  181. unsigned char m_bFlush : 1;
  182. unsigned char m_bDynamic : 1;
  183. unsigned char m_bExternalMemory : 1;
  184. unsigned char m_bSoftwareVertexProcessing : 1;
  185. unsigned char m_bLateCreateShouldDiscard : 1;
  186. #ifdef VPROF_ENABLED
  187. int m_Frame;
  188. #endif
  189. CInterlockedInt m_nReferenceCount;
  190. #ifdef _DEBUG
  191. static int s_BufferCount;
  192. #endif
  193. #ifdef RECORDING
  194. unsigned int m_UID;
  195. #endif
  196. #if !defined( _X360 )
  197. //LockedBufferContext m_LockData;
  198. #endif
  199. protected:
  200. #ifdef CHECK_INDICES
  201. unsigned short *m_pShadowIndices;
  202. unsigned int m_NumIndices;
  203. #endif
  204. unsigned int m_LockedStartIndex;
  205. unsigned int m_LockedNumIndices;
  206. private:
  207. // Must use reference counting functions above
  208. ~CIndexBuffer();
  209. };
  210. #if defined( _X360 )
  211. #include "utlmap.h"
  212. MEMALLOC_DECLARE_EXTERNAL_TRACKING( XMem_CIndexBuffer );
  213. #endif
  214. //-----------------------------------------------------------------------------
  215. // constructor, destructor
  216. //-----------------------------------------------------------------------------
  217. inline CIndexBuffer::CIndexBuffer( IDirect3DDevice9 *pD3D, int count,
  218. bool bSoftwareVertexProcessing, bool dynamic ) :
  219. m_pIB(0),
  220. m_Position(0),
  221. m_bFlush(true),
  222. m_bLocked(false),
  223. m_bExternalMemory(false),
  224. m_bDynamic(dynamic),
  225. m_bSoftwareVertexProcessing( bSoftwareVertexProcessing ),
  226. m_bLateCreateShouldDiscard( false )
  227. #ifdef _X360
  228. ,m_pAllocatedMemory(NULL)
  229. ,m_iNextBlockingPosition(0)
  230. ,m_iAllocationCount(0)
  231. #endif
  232. #ifdef VPROF_ENABLED
  233. ,m_Frame( -1 )
  234. #endif
  235. , m_nReferenceCount( 0 )
  236. {
  237. // For write-combining, ensure we always have locked memory aligned to 4-byte boundaries
  238. count = ALIGN_VALUE( count, 2 );
  239. m_IndexCount = count;
  240. MEM_ALLOC_CREDIT_( m_bDynamic ? ( "D3D: " TEXTURE_GROUP_DYNAMIC_INDEX_BUFFER ) : ( "D3D: " TEXTURE_GROUP_STATIC_INDEX_BUFFER ) );
  241. #ifdef CHECK_INDICES
  242. m_pShadowIndices = NULL;
  243. #endif
  244. #ifdef RECORDING
  245. // assign a UID
  246. static unsigned int uid = 0;
  247. m_UID = uid++;
  248. #endif
  249. #ifdef _DEBUG
  250. ++s_BufferCount;
  251. #endif
  252. #ifdef CHECK_INDICES
  253. m_pShadowIndices = new unsigned short[ m_IndexCount ];
  254. m_NumIndices = m_IndexCount;
  255. #endif
  256. if ( g_pShaderUtil->GetThreadMode() != MATERIAL_SINGLE_THREADED || !ThreadInMainThread() )
  257. {
  258. m_pSysmemBuffer = ( byte * )malloc( count * IndexSize() );
  259. m_nSysmemBufferStartBytes = 0;
  260. }
  261. else
  262. {
  263. m_pSysmemBuffer = NULL;
  264. Create( pD3D );
  265. }
  266. #else // _X360
  267. int nBufferSize = (count * IndexSize());
  268. if ( m_bDynamic )
  269. {
  270. m_iAllocationCount = count * X360_INDEX_BUFFER_SIZE_MULTIPLIER;
  271. Assert( m_iAllocationCount >= count );
  272. m_iAllocationCount = ALIGN_VALUE( m_iAllocationCount, 2 );
  273. m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( m_iAllocationCount * IndexSize(), MAXULONG_PTR, 0, PAGE_READWRITE | MEM_LARGE_PAGES | PAGE_WRITECOMBINE );
  274. }
  275. else if ( MeshMgr()->AllocatePooledIB( this, nBufferSize, TEXTURE_GROUP_STATIC_INDEX_BUFFER ) )
  276. {
  277. // Successfully allocated in a shared ShaderAPI memory pool (SetBufferAllocationHandle will have been called to set the pointer and stream offset)
  278. m_iAllocationCount = count;
  279. Assert( m_pAllocatedMemory );
  280. }
  281. else
  282. {
  283. // Fall back to allocating a standalone IB
  284. // NOTE: write-combining (PAGE_WRITECOMBINE) is deliberately not used, since it slows down CPU access to the data (decals+defragmentation)
  285. m_iAllocationCount = count;
  286. m_pAllocatedMemory = (unsigned char*)XPhysicalAlloc( nBufferSize, MAXULONG_PTR, 0, PAGE_READWRITE );
  287. }
  288. if ( m_pAllocatedMemory && !IsPooled() )
  289. {
  290. MemAlloc_RegisterExternalAllocation( XMem_CIndexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) );
  291. if ( !m_bDynamic )
  292. {
  293. // Track non-pooled physallocs, to help tune CGPUBufferAllocator usage
  294. g_SizeIndividualIBPhysAllocs += XPhysicalSize( m_pAllocatedMemory );
  295. g_NumIndividualIBPhysAllocs++;
  296. }
  297. }
  298. m_iNextBlockingPosition = m_iAllocationCount;
  299. #endif // _X360
  300. #ifdef VPROF_ENABLED
  301. if ( !m_bDynamic )
  302. {
  303. VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_global_" TEXTURE_GROUP_STATIC_INDEX_BUFFER,
  304. COUNTER_GROUP_TEXTURE_GLOBAL, IndexCount() * IndexSize() );
  305. }
  306. else if ( IsX360() )
  307. {
  308. VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_global_" TEXTURE_GROUP_DYNAMIC_INDEX_BUFFER,
  309. COUNTER_GROUP_TEXTURE_GLOBAL, IndexCount() * IndexSize() );
  310. }
  311. #endif
  312. }
  313. void CIndexBuffer::Create( IDirect3DDevice9 *pD3D )
  314. {
  315. D3DINDEXBUFFER_DESC desc;
  316. memset( &desc, 0x00, sizeof( desc ) );
  317. desc.Format = D3DFMT_INDEX16;
  318. desc.Size = sizeof(unsigned short) * m_IndexCount;
  319. desc.Type = D3DRTYPE_INDEXBUFFER;
  320. desc.Pool = D3DPOOL_DEFAULT;
  321. desc.Usage = D3DUSAGE_WRITEONLY;
  322. if ( m_bDynamic )
  323. {
  324. desc.Usage |= D3DUSAGE_DYNAMIC;
  325. }
  326. if ( m_bSoftwareVertexProcessing )
  327. {
  328. desc.Usage |= D3DUSAGE_SOFTWAREPROCESSING;
  329. }
  330. RECORD_COMMAND( DX8_CREATE_INDEX_BUFFER, 6 );
  331. RECORD_INT( m_UID );
  332. RECORD_INT( m_IndexCount * IndexSize() );
  333. RECORD_INT( desc.Usage );
  334. RECORD_INT( desc.Format );
  335. RECORD_INT( desc.Pool );
  336. RECORD_INT( m_bDynamic );
  337. #if !defined( _X360 )
  338. HRESULT hr = pD3D->CreateIndexBuffer(
  339. m_IndexCount * IndexSize(),
  340. desc.Usage,
  341. desc.Format,
  342. desc.Pool,
  343. &m_pIB,
  344. NULL );
  345. if ( hr != D3D_OK )
  346. {
  347. Warning( "CreateIndexBuffer failed!\n" );
  348. }
  349. if ( ( hr == D3DERR_OUTOFVIDEOMEMORY ) || ( hr == E_OUTOFMEMORY ) )
  350. {
  351. // Don't have the memory for this. Try flushing all managed resources
  352. // out of vid mem and try again.
  353. // FIXME: need to record this
  354. pD3D->EvictManagedResources();
  355. hr = pD3D->CreateIndexBuffer( m_IndexCount * IndexSize(),
  356. desc.Usage, desc.Format, desc.Pool, &m_pIB, NULL );
  357. }
  358. Assert( m_pIB );
  359. Assert( hr == D3D_OK );
  360. #ifdef MEASURE_DRIVER_ALLOCATIONS
  361. int nMemUsed = 1024;
  362. VPROF_INCREMENT_GROUP_COUNTER( "ib count", COUNTER_GROUP_NO_RESET, 1 );
  363. VPROF_INCREMENT_GROUP_COUNTER( "ib driver mem", COUNTER_GROUP_NO_RESET, nMemUsed );
  364. VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, nMemUsed );
  365. #endif
  366. #if defined( _DEBUG )
  367. if ( IsPC() && m_pIB && !m_pSysmemBuffer )
  368. {
  369. D3DINDEXBUFFER_DESC aDesc;
  370. m_pIB->GetDesc( &aDesc );
  371. Assert( memcmp( &aDesc, &desc, sizeof( desc ) ) == 0 );
  372. }
  373. #endif
  374. }
  375. #ifdef _X360
  376. void *AllocateTempBuffer( size_t nSizeInBytes );
  377. inline CIndexBuffer::CIndexBuffer() :
  378. m_pIB(0),
  379. m_Position(0),
  380. m_bFlush(false),
  381. m_bLocked(false),
  382. m_bExternalMemory( true ),
  383. m_bDynamic( false )
  384. #ifdef VPROF_ENABLED
  385. ,m_Frame( -1 )
  386. #endif
  387. {
  388. m_IndexCount = 0;
  389. #ifdef CHECK_INDICES
  390. m_pShadowIndices = NULL;
  391. #endif
  392. m_iAllocationCount = 0;
  393. m_pAllocatedMemory = NULL;
  394. m_iNextBlockingPosition = 0;
  395. }
  396. #include "tier0/memdbgoff.h"
  397. inline void CIndexBuffer::Init( IDirect3DDevice9 *pD3D, uint16 *pIndexMemory, int count )
  398. {
  399. m_IndexCount = count;
  400. m_Position = count;
  401. m_iAllocationCount = count;
  402. m_pAllocatedMemory = (uint8*)pIndexMemory;
  403. m_iNextBlockingPosition = m_iAllocationCount;
  404. int nBufferSize = count * sizeof(uint16);
  405. m_pIB = new( AllocateTempBuffer( sizeof( IDirect3DIndexBuffer9 ) ) ) IDirect3DIndexBuffer9;
  406. XGSetIndexBufferHeader( nBufferSize, 0, D3DFMT_INDEX16, 0, 0, m_pIB );
  407. XGOffsetResourceAddress( m_pIB, pIndexMemory );
  408. }
  409. #include "tier0/memdbgon.h"
  410. #endif // _X360
  411. inline CIndexBuffer::~CIndexBuffer()
  412. {
  413. #ifdef _DEBUG
  414. if ( !m_bExternalMemory )
  415. {
  416. --s_BufferCount;
  417. }
  418. #endif
  419. Unlock(0);
  420. #ifdef CHECK_INDICES
  421. if ( m_pShadowIndices )
  422. {
  423. delete [] m_pShadowIndices;
  424. m_pShadowIndices = NULL;
  425. }
  426. #endif
  427. if ( m_pSysmemBuffer )
  428. {
  429. free( m_pSysmemBuffer );
  430. m_pSysmemBuffer = NULL;
  431. }
  432. #ifdef MEASURE_DRIVER_ALLOCATIONS
  433. if ( !m_bExternalMemory )
  434. {
  435. int nMemUsed = 1024;
  436. VPROF_INCREMENT_GROUP_COUNTER( "ib count", COUNTER_GROUP_NO_RESET, -1 );
  437. VPROF_INCREMENT_GROUP_COUNTER( "ib driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed );
  438. VPROF_INCREMENT_GROUP_COUNTER( "total driver mem", COUNTER_GROUP_NO_RESET, -nMemUsed );
  439. }
  440. #endif
  441. #if !defined( _X360 )
  442. if ( m_pIB )
  443. {
  444. RECORD_COMMAND( DX8_DESTROY_INDEX_BUFFER, 1 );
  445. RECORD_INT( m_UID );
  446. m_pIB->Release();
  447. }
  448. #else
  449. if ( m_pIB && m_pIB->IsSet( Dx9Device() ) )
  450. {
  451. Unbind( m_pIB );
  452. }
  453. if ( m_pAllocatedMemory && !m_bExternalMemory )
  454. {
  455. if ( IsPooled() )
  456. {
  457. MeshMgr()->DeallocatePooledIB( this );
  458. }
  459. else
  460. {
  461. MemAlloc_RegisterExternalDeallocation( XMem_CIndexBuffer, m_pAllocatedMemory, XPhysicalSize( m_pAllocatedMemory ) );
  462. if ( !m_bDynamic )
  463. {
  464. // Track non-pooled physallocs, to help tune CGPUBufferAllocator usage
  465. g_SizeIndividualIBPhysAllocs -= XPhysicalSize( m_pAllocatedMemory );
  466. g_NumIndividualIBPhysAllocs--;
  467. }
  468. XPhysicalFree( m_pAllocatedMemory );
  469. }
  470. }
  471. m_pAllocatedMemory = NULL;
  472. m_pIB = NULL;
  473. #endif
  474. #ifdef VPROF_ENABLED
  475. if ( !m_bExternalMemory )
  476. {
  477. if ( !m_bDynamic )
  478. {
  479. VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_global_" TEXTURE_GROUP_STATIC_INDEX_BUFFER,
  480. COUNTER_GROUP_TEXTURE_GLOBAL, - IndexCount() * IndexSize() );
  481. }
  482. else if ( IsX360() )
  483. {
  484. VPROF_INCREMENT_GROUP_COUNTER( "TexGroup_global_" TEXTURE_GROUP_DYNAMIC_INDEX_BUFFER,
  485. COUNTER_GROUP_TEXTURE_GLOBAL, - IndexCount() * IndexSize() );
  486. }
  487. }
  488. #endif
  489. }
  490. #ifdef _X360
  491. //-----------------------------------------------------------------------------
  492. // Get memory allocation data
  493. //-----------------------------------------------------------------------------
  494. inline const GPUBufferHandle_t *CIndexBuffer::GetBufferAllocationHandle( void )
  495. {
  496. Assert( IsPooled() );
  497. return ( IsPooled() ? &m_GPUBufferHandle : NULL );
  498. }
  499. //-----------------------------------------------------------------------------
  500. // Update memory allocation data
  501. //-----------------------------------------------------------------------------
  502. inline void CIndexBuffer::SetBufferAllocationHandle( const GPUBufferHandle_t &bufferAllocationHandle )
  503. {
  504. // This IB's memory has been reallocated or freed, update our cached pointer and the D3D header
  505. // NOTE: this should never be called while any rendering is in flight!
  506. Assert( ( m_pAllocatedMemory == NULL ) || IsPooled() );
  507. if ( ( m_pAllocatedMemory == NULL ) || IsPooled() )
  508. {
  509. m_GPUBufferHandle = bufferAllocationHandle;
  510. m_pAllocatedMemory = m_GPUBufferHandle.pMemory;
  511. if ( m_pIB )
  512. {
  513. int nBufferSize = m_IndexCount * IndexSize();
  514. XGSetIndexBufferHeader( nBufferSize, 0, D3DFMT_INDEX16, 0, 0, m_pIB );
  515. XGOffsetResourceAddress( m_pIB, m_pAllocatedMemory );
  516. }
  517. }
  518. }
  519. //-----------------------------------------------------------------------------
  520. // Expose the data pointer for read-only CPU access to the data
  521. //-----------------------------------------------------------------------------
  522. inline const byte **CIndexBuffer::GetBufferDataPointerAddress( void )
  523. {
  524. if ( m_bDynamic /* FIXME: || m_bExternalMemory */ )
  525. return NULL;
  526. return (const byte **)&m_pAllocatedMemory;
  527. }
  528. #endif // _X360
  529. //-----------------------------------------------------------------------------
  530. // Do we have enough room without discarding?
  531. //-----------------------------------------------------------------------------
  532. inline bool CIndexBuffer::HasEnoughRoom( int numIndices ) const
  533. {
  534. #if !defined( _X360 )
  535. return ( numIndices + m_Position ) <= m_IndexCount;
  536. #else
  537. return numIndices <= m_IndexCount; //the ring buffer will free room as needed
  538. #endif
  539. }
  540. //-----------------------------------------------------------------------------
  541. // Block until this part of the index buffer is free
  542. //-----------------------------------------------------------------------------
  543. inline void CIndexBuffer::BlockUntilUnused( int nAllocationSize )
  544. {
  545. Assert( nAllocationSize <= m_IndexCount );
  546. #ifdef _X360
  547. Assert( (m_AllocationRing.Count() != 0) || ((m_Position == 0) && (m_iNextBlockingPosition == m_iAllocationCount)) );
  548. if ( (m_iNextBlockingPosition - m_Position) >= nAllocationSize )
  549. return;
  550. Assert( (m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset == 0) || ((m_iNextBlockingPosition == m_AllocationRing[m_AllocationRing.Head()].m_iStartOffset) && (m_Position <= m_iNextBlockingPosition)) );
  551. int iMinBlockPosition = m_Position + nAllocationSize;
  552. if( iMinBlockPosition > m_iAllocationCount )
  553. {
  554. //Allocation requires us to wrap
  555. iMinBlockPosition = nAllocationSize;
  556. m_Position = 0;
  557. //modify the last allocation so that it uses up the whole tail end of the buffer. Makes other code simpler
  558. Assert( m_AllocationRing.Count() != 0 );
  559. m_AllocationRing[m_AllocationRing.Tail()].m_iEndOffset = m_iAllocationCount;
  560. //treat all allocations between the current position and the tail end of the ring as freed since they will be before we unblock
  561. while( m_AllocationRing.Count() )
  562. {
  563. unsigned int head = m_AllocationRing.Head();
  564. if( m_AllocationRing[head].m_iStartOffset == 0 )
  565. break;
  566. m_AllocationRing.Remove( head );
  567. }
  568. }
  569. //now we go through the allocations until we find the last fence we care about. Treat everything up until that fence as freed.
  570. DWORD FinalFence = 0;
  571. unsigned int iFinalAllocationZPassIdx = 0;
  572. while( m_AllocationRing.Count() )
  573. {
  574. unsigned int head = m_AllocationRing.Head();
  575. if( m_AllocationRing[head].m_iEndOffset >= iMinBlockPosition )
  576. {
  577. //When this frees, we'll finally have enough space for the allocation
  578. FinalFence = m_AllocationRing[head].m_Fence;
  579. iFinalAllocationZPassIdx = m_AllocationRing[head].m_iZPassIdx;
  580. m_iNextBlockingPosition = m_AllocationRing[head].m_iEndOffset;
  581. m_AllocationRing.Remove( head );
  582. break;
  583. }
  584. m_AllocationRing.Remove( head );
  585. }
  586. Assert( FinalFence != 0 );
  587. if( Dx9Device()->IsFencePending( FinalFence ) )
  588. {
  589. #ifdef SPEW_INDEX_BUFFER_STALLS
  590. float st = Plat_FloatTime();
  591. #endif
  592. if ( ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) &&
  593. ( iFinalAllocationZPassIdx == ShaderAPI()->Get360ZPassCounter() ) )
  594. {
  595. // We're about to overrun our IB ringbuffer in a single Z prepass. To avoid rendering corruption, close out the
  596. // Z prepass and continue. This will reduce early-Z rejection efficiency and could cause a momentary framerate drop,
  597. // but it's better than rendering corruption.
  598. Warning( "Dynamic IB ring buffer overrun in Z Prepass. Tell Thorsten.\n" );
  599. ShaderAPI()->End360ZPass();
  600. }
  601. Dx9Device()->BlockOnFence( FinalFence );
  602. #ifdef SPEW_INDEX_BUFFER_STALLS
  603. float dt = Plat_FloatTime() - st;
  604. Warning( "Blocked locking dynamic index buffer for %f ms!\n", 1000.0 * dt );
  605. #endif
  606. }
  607. #endif
  608. }
  609. //-----------------------------------------------------------------------------
  610. // lock, unlock
  611. //-----------------------------------------------------------------------------
  612. inline unsigned short* CIndexBuffer::Lock( bool bReadOnly, int numIndices, int& startIndex, int startPosition )
  613. {
  614. Assert( !m_bLocked );
  615. #if defined( _X360 )
  616. if ( m_pIB && m_pIB->IsSet( Dx9Device() ) )
  617. {
  618. Unbind( m_pIB );
  619. }
  620. #endif
  621. unsigned short* pLockedData = NULL;
  622. // For write-combining, ensure we always have locked memory aligned to 4-byte boundaries
  623. if( m_bDynamic )
  624. numIndices = ALIGN_VALUE( numIndices, 2 );
  625. // Ensure there is enough space in the IB for this data
  626. if ( numIndices > m_IndexCount )
  627. {
  628. Error( "too many indices for index buffer. . tell a programmer (%d>%d)\n", ( int )numIndices, ( int )m_IndexCount );
  629. Assert( false );
  630. return 0;
  631. }
  632. if ( !IsX360() && !m_pIB && !m_pSysmemBuffer )
  633. return 0;
  634. DWORD dwFlags;
  635. if ( m_bDynamic )
  636. {
  637. // startPosition now can be != -1, when calling in here with a static (staging) buffer.
  638. #if !defined( _X360 )
  639. dwFlags = LOCKFLAGS_APPEND;
  640. // If either user forced us to flush,
  641. // or there is not enough space for the vertex data,
  642. // then flush the buffer contents
  643. // xbox must not append at position 0 because nooverwrite cannot be guaranteed
  644. if ( !m_Position || m_bFlush || !HasEnoughRoom(numIndices) )
  645. {
  646. if ( m_pSysmemBuffer || !g_pShaderUtil->IsRenderThreadSafe() )
  647. m_bLateCreateShouldDiscard = true;
  648. m_bFlush = false;
  649. m_Position = 0;
  650. dwFlags = LOCKFLAGS_FLUSH;
  651. }
  652. #else
  653. if ( m_bFlush )
  654. {
  655. # if ( defined( X360_BLOCK_ON_IB_FLUSH ) )
  656. {
  657. if( m_AllocationRing.Count() )
  658. {
  659. DWORD FinalFence = m_AllocationRing[m_AllocationRing.Tail()].m_Fence;
  660. m_AllocationRing.RemoveAll();
  661. m_Position = 0;
  662. m_iNextBlockingPosition = m_iAllocationCount;
  663. # if ( defined( SPEW_VERTEX_BUFFER_STALLS ) )
  664. if( Dx9Device()->IsFencePending( FinalFence ) )
  665. {
  666. float st = Plat_FloatTime();
  667. # endif
  668. Dx9Device()->BlockOnFence( FinalFence );
  669. # if ( defined ( SPEW_VERTEX_BUFFER_STALLS ) )
  670. float dt = Plat_FloatTime() - st;
  671. Warning( "Blocked FLUSHING dynamic index buffer for %f ms!\n", 1000.0 * dt );
  672. }
  673. # endif
  674. }
  675. }
  676. # endif
  677. m_bFlush = false;
  678. }
  679. #endif
  680. }
  681. else
  682. {
  683. dwFlags = D3DLOCK_NOSYSLOCK;
  684. }
  685. if ( bReadOnly )
  686. {
  687. dwFlags |= D3DLOCK_READONLY;
  688. }
  689. int position = m_Position;
  690. if( startPosition >= 0 )
  691. {
  692. position = startPosition;
  693. }
  694. RECORD_COMMAND( DX8_LOCK_INDEX_BUFFER, 4 );
  695. RECORD_INT( m_UID );
  696. RECORD_INT( position * IndexSize() );
  697. RECORD_INT( numIndices * IndexSize() );
  698. RECORD_INT( dwFlags );
  699. m_LockedStartIndex = position;
  700. m_LockedNumIndices = numIndices;
  701. HRESULT hr = D3D_OK;
  702. #if !defined( _X360 )
  703. // If the caller isn't in the thread that owns the render lock, need to return a system memory pointer--cannot talk to GL from
  704. // the non-current thread.
  705. if ( !m_pSysmemBuffer && !g_pShaderUtil->IsRenderThreadSafe() )
  706. {
  707. m_pSysmemBuffer = ( byte * )malloc( m_IndexCount * IndexSize() );
  708. m_nSysmemBufferStartBytes = position * IndexSize();
  709. }
  710. if ( m_pSysmemBuffer != NULL )
  711. {
  712. // Ensure that we're never moving backwards in a buffer--this code would need to be rewritten if so.
  713. // We theorize this can happen if you hit the end of a buffer and then wrap before drawing--but
  714. // this would probably break in other places as well.
  715. Assert( position * IndexSize() >= m_nSysmemBufferStartBytes );
  716. pLockedData = ( unsigned short * )( m_pSysmemBuffer + ( position * IndexSize() ) );
  717. }
  718. else
  719. {
  720. hr = m_pIB->Lock( position * IndexSize(), numIndices * IndexSize(),
  721. reinterpret_cast< void** >( &pLockedData ), dwFlags );
  722. }
  723. #else
  724. if ( m_bDynamic )
  725. {
  726. // Block until earlier parts of the buffer are free
  727. BlockUntilUnused( numIndices );
  728. position = m_Position;
  729. m_pIB = NULL;
  730. Assert( (m_Position + numIndices) <= m_iAllocationCount );
  731. }
  732. else
  733. {
  734. //static, block until last lock finished?
  735. m_Position = position;
  736. }
  737. pLockedData = (unsigned short *)(m_pAllocatedMemory + (position * IndexSize()));
  738. #endif
  739. switch ( hr )
  740. {
  741. case D3DERR_INVALIDCALL:
  742. Msg( "D3DERR_INVALIDCALL - Index Buffer Lock Failed in %s on line %d(offset %d, size %d, flags 0x%x)\n", V_UnqualifiedFileName(__FILE__), __LINE__, position * IndexSize(), numIndices * IndexSize(), dwFlags );
  743. break;
  744. case D3DERR_DRIVERINTERNALERROR:
  745. Msg( "D3DERR_DRIVERINTERNALERROR - Index Buffer Lock Failed in %s on line %d (offset %d, size %d, flags 0x%x)\n", V_UnqualifiedFileName(__FILE__), __LINE__, position * IndexSize(), numIndices * IndexSize(), dwFlags );
  746. break;
  747. case D3DERR_OUTOFVIDEOMEMORY:
  748. Msg( "D3DERR_OUTOFVIDEOMEMORY - Index Buffer Lock Failed in %s on line %d (offset %d, size %d, flags 0x%x)\n", V_UnqualifiedFileName(__FILE__), __LINE__, position * IndexSize(), numIndices * IndexSize(), dwFlags );
  749. break;
  750. }
  751. Assert( pLockedData != NULL );
  752. if ( !IsX360() )
  753. {
  754. startIndex = position;
  755. }
  756. else
  757. {
  758. startIndex = 0;
  759. }
  760. Assert( m_bLocked == false );
  761. m_bLocked = true;
  762. return pLockedData;
  763. }
  764. inline void CIndexBuffer::Unlock( int numIndices )
  765. {
  766. #if defined( _X360 )
  767. Assert( (m_Position + numIndices) <= m_iAllocationCount );
  768. #else
  769. Assert( (m_Position + numIndices) <= m_IndexCount );
  770. #endif
  771. if ( !m_bLocked )
  772. return;
  773. // For write-combining, ensure we always have locked memory aligned to 4-byte boundaries
  774. // if( m_bDynamic )
  775. // numIndices = ALIGN_VALUE( numIndices, 2 );
  776. if ( !IsX360() && !m_pIB && !m_pSysmemBuffer )
  777. return;
  778. RECORD_COMMAND( DX8_UNLOCK_INDEX_BUFFER, 1 );
  779. RECORD_INT( m_UID );
  780. #if !defined( _X360 )
  781. if ( m_pSysmemBuffer )
  782. {
  783. }
  784. else
  785. {
  786. #if DX_TO_GL_ABSTRACTION
  787. // Knowing how much data was actually written is critical for performance under OpenGL.
  788. // Important notes: numIndices indicates how much we could move the current position. For dynamic buffer, it should indicate the # of actually written indices, for static buffers it's typically 0.
  789. // If it's a dynamic buffer (where we actually care about perf), assume the caller isn't lying about numIndices, otherwise just assume they wrote the entire thing.
  790. // If you modify this code, be sure to test on both AMD and NVidia drivers!
  791. Assert( numIndices <= (int)m_LockedNumIndices );
  792. int unlockBytes = ( m_bDynamic ? numIndices : m_LockedNumIndices ) * IndexSize();
  793. #else
  794. int unlockBytes = 0;
  795. #endif
  796. ReallyUnlock( unlockBytes );
  797. }
  798. #else
  799. if ( m_bDynamic )
  800. {
  801. Assert( (m_Position == 0) || (m_AllocationRing[m_AllocationRing.Tail()].m_iEndOffset == m_Position) );
  802. DynamicBufferAllocation_t LockData;
  803. LockData.m_Fence = Dx9Device()->GetCurrentFence(); //This isn't the correct fence, but it's all we have access to for now and it'll provide marginal safety if something goes really wrong.
  804. LockData.m_iStartOffset = m_Position;
  805. LockData.m_iEndOffset = LockData.m_iStartOffset + numIndices;
  806. LockData.m_iZPassIdx = ( Dx9Device()->GetDeviceState() & D3DDEVICESTATE_ZPASS_BRACKET ) ? ShaderAPI()->Get360ZPassCounter() : 0;
  807. Assert( (LockData.m_iStartOffset == 0) || (LockData.m_iStartOffset == m_AllocationRing[m_AllocationRing.Tail()].m_iEndOffset) );
  808. m_AllocationRing.AddToTail( LockData );
  809. void* pLockedData = m_pAllocatedMemory + (LockData.m_iStartOffset * IndexSize());
  810. //Always re-use the same index buffer header based on the assumption that D3D copies it off in the draw calls.
  811. m_pIB = &m_D3DIndexBuffer;
  812. XGSetIndexBufferHeader( numIndices * IndexSize(), 0, D3DFMT_INDEX16, 0, 0, m_pIB );
  813. XGOffsetResourceAddress( m_pIB, pLockedData );
  814. // Invalidate the GPU caches for this memory.
  815. // FIXME: Should dynamic allocations be 4k aligned?
  816. Dx9Device()->InvalidateGpuCache( pLockedData, numIndices * IndexSize(), 0 );
  817. }
  818. else
  819. {
  820. if ( !m_pIB )
  821. {
  822. int nBufferSize = m_IndexCount * IndexSize();
  823. XGSetIndexBufferHeader( nBufferSize, 0, D3DFMT_INDEX16, 0, 0, &m_D3DIndexBuffer );
  824. XGOffsetResourceAddress( &m_D3DIndexBuffer, m_pAllocatedMemory );
  825. m_pIB = &m_D3DIndexBuffer;
  826. }
  827. // Invalidate the GPU caches for this memory.
  828. Dx9Device()->InvalidateGpuCache( m_pAllocatedMemory, m_IndexCount * IndexSize(), 0 );
  829. }
  830. #endif
  831. m_Position += numIndices;
  832. m_bLocked = false;
  833. m_LockedStartIndex = 0;
  834. m_LockedNumIndices = 0;
  835. }
  836. inline void CIndexBuffer::HandleLateCreation( )
  837. {
  838. if ( !m_pSysmemBuffer )
  839. {
  840. return;
  841. }
  842. if( !m_pIB )
  843. {
  844. bool bPrior = g_VBAllocTracker->TrackMeshAllocations( "HandleLateCreation" );
  845. Create( Dx9Device() );
  846. if ( !bPrior )
  847. {
  848. g_VBAllocTracker->TrackMeshAllocations( NULL );
  849. }
  850. }
  851. void* pWritePtr = NULL;
  852. const int dataToWriteBytes = ( m_Position * IndexSize() ) - m_nSysmemBufferStartBytes;
  853. DWORD dwFlags = D3DLOCK_NOSYSLOCK;
  854. if ( m_bDynamic )
  855. dwFlags |= ( m_bLateCreateShouldDiscard ? D3DLOCK_DISCARD : D3DLOCK_NOOVERWRITE );
  856. // Always clear this.
  857. m_bLateCreateShouldDiscard = false;
  858. // Don't use the Lock function, it does a bunch of stuff we don't want.
  859. HRESULT hr = m_pIB->Lock( m_nSysmemBufferStartBytes,
  860. dataToWriteBytes,
  861. &pWritePtr,
  862. dwFlags);
  863. // If this fails we're about to crash. Consider skipping the update and leaving
  864. // m_pSysmemBuffer around to try again later. (For example in case of device loss)
  865. Assert( SUCCEEDED( hr ) ); hr;
  866. memcpy( pWritePtr, m_pSysmemBuffer + m_nSysmemBufferStartBytes, dataToWriteBytes );
  867. ReallyUnlock( dataToWriteBytes );
  868. free( m_pSysmemBuffer );
  869. m_pSysmemBuffer = NULL;
  870. }
  871. // Returns the allocated size
  872. inline int CIndexBuffer::AllocationSize() const
  873. {
  874. #ifdef _X360
  875. return m_iAllocationCount * IndexSize();
  876. #else
  877. return m_IndexCount * IndexSize();
  878. #endif
  879. }
  880. inline int CIndexBuffer::AllocationCount() const
  881. {
  882. #ifdef _X360
  883. return m_iAllocationCount;
  884. #else
  885. return m_IndexCount;
  886. #endif
  887. }
  888. #ifdef _WIN32
  889. #pragma warning (default:4189)
  890. #endif
  891. #include "tier0/memdbgoff.h"
  892. #endif // DYNAMICIB_H