Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2191 lines
80 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. //=============================================================================
  6. #include "pch_materialsystem.h"
  7. #define MATSYS_INTERNAL
  8. #include "cmatlightmaps.h"
  9. #include "colorspace.h"
  10. #include "IHardwareConfigInternal.h"
  11. #include "cmaterialsystem.h"
  12. // NOTE: This must be the last file included!!!
  13. #include "tier0/memdbgon.h"
  14. #include "bitmap/float_bm.h"
  15. static ConVar mat_lightmap_pfms( "mat_lightmap_pfms", "0", FCVAR_MATERIAL_SYSTEM_THREAD, "Outputs .pfm files containing lightmap data for each lightmap page when a level exits." ); // Write PFM files for each lightmap page in the game directory when exiting a level
  16. #define USE_32BIT_LIGHTMAPS_ON_360 //uncomment to use 32bit lightmaps, be sure to keep this in sync with the same #define in stdshaders/lightmappedgeneric_ps2_3_x.h
  17. #ifdef _X360
  18. #define X360_USE_SIMD_LIGHTMAP
  19. #endif
  20. //-----------------------------------------------------------------------------
  21. inline IMaterialInternal* CMatLightmaps::GetCurrentMaterialInternal() const
  22. {
  23. return GetMaterialSystem()->GetRenderContextInternal()->GetCurrentMaterialInternal();
  24. }
  25. inline void CMatLightmaps::SetCurrentMaterialInternal(IMaterialInternal* pCurrentMaterial)
  26. {
  27. return GetMaterialSystem()->GetRenderContextInternal()->SetCurrentMaterialInternal( pCurrentMaterial );
  28. }
  29. inline IMaterialInternal *CMatLightmaps::GetMaterialInternal( MaterialHandle_t idx ) const
  30. {
  31. return GetMaterialSystem()->GetMaterialInternal( idx );
  32. }
  33. inline const IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal() const
  34. {
  35. return GetMaterialSystem()->GetRenderContextInternal();
  36. }
  37. inline IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal()
  38. {
  39. return GetMaterialSystem()->GetRenderContextInternal();
  40. }
  41. inline const CMaterialDict *CMatLightmaps::GetMaterialDict() const
  42. {
  43. return GetMaterialSystem()->GetMaterialDict();
  44. }
  45. inline CMaterialDict *CMatLightmaps::GetMaterialDict()
  46. {
  47. return GetMaterialSystem()->GetMaterialDict();
  48. }
  49. //-----------------------------------------------------------------------------
  50. //
  51. //-----------------------------------------------------------------------------
  52. CMatLightmaps::CMatLightmaps()
  53. {
  54. m_currentWhiteLightmapMaterial = NULL;
  55. m_pLightmapPages = NULL;
  56. m_NumLightmapPages = 0;
  57. m_numSortIDs = 0;
  58. m_nUpdatingLightmapsStackDepth = 0;
  59. m_nLockedLightmap = -1;
  60. m_pLightmapDataPtrArray = NULL;
  61. m_eLightmapsState = STATE_DEFAULT;
  62. }
  63. //-----------------------------------------------------------------------------
  64. //
  65. //-----------------------------------------------------------------------------
  66. void CMatLightmaps::Shutdown( )
  67. {
  68. // Clean up all lightmaps
  69. CleanupLightmaps();
  70. }
  71. //-----------------------------------------------------------------------------
  72. // Assign enumeration IDs to all materials
  73. //-----------------------------------------------------------------------------
  74. void CMatLightmaps::EnumerateMaterials( void )
  75. {
  76. // iterate in sorted order
  77. int id = 0;
  78. for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )
  79. {
  80. GetMaterialInternal(i)->SetEnumerationID( id );
  81. ++id;
  82. }
  83. }
  84. //-----------------------------------------------------------------------------
  85. // Gets the maximum lightmap page size...
  86. //-----------------------------------------------------------------------------
  87. int CMatLightmaps::GetMaxLightmapPageWidth() const
  88. {
  89. // FIXME: It's unclear which we want here.
  90. // It doesn't drastically increase primitives per DrawIndexedPrimitive
  91. // call at the moment to increase it, so let's not for now.
  92. // If we're using dynamic textures though, we want bigger that's for sure.
  93. // The tradeoff here is how much memory we waste if we don't fill the lightmap
  94. // We need to go to 512x256 textures because that's the only way bumped
  95. // lighting on displacements can work given the 128x128 allowance..
  96. int nWidth = 512;
  97. if ( nWidth > HardwareConfig()->MaxTextureWidth() )
  98. nWidth = HardwareConfig()->MaxTextureWidth();
  99. return nWidth;
  100. }
  101. //-----------------------------------------------------------------------------
  102. //
  103. //-----------------------------------------------------------------------------
  104. int CMatLightmaps::GetMaxLightmapPageHeight() const
  105. {
  106. int nHeight = 256;
  107. if ( nHeight > HardwareConfig()->MaxTextureHeight() )
  108. nHeight = HardwareConfig()->MaxTextureHeight();
  109. return nHeight;
  110. }
  111. //-----------------------------------------------------------------------------
  112. // Returns the lightmap page size
  113. //-----------------------------------------------------------------------------
  114. void CMatLightmaps::GetLightmapPageSize( int lightmapPageID, int *pWidth, int *pHeight ) const
  115. {
  116. switch( lightmapPageID )
  117. {
  118. default:
  119. Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() );
  120. *pWidth = m_pLightmapPages[lightmapPageID].m_Width;
  121. *pHeight = m_pLightmapPages[lightmapPageID].m_Height;
  122. break;
  123. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED:
  124. *pWidth = *pHeight = 1;
  125. AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" );
  126. break;
  127. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE:
  128. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP:
  129. *pWidth = *pHeight = 1;
  130. break;
  131. }
  132. }
  133. //-----------------------------------------------------------------------------
  134. //
  135. //-----------------------------------------------------------------------------
  136. int CMatLightmaps::GetLightmapWidth( int lightmapPageID ) const
  137. {
  138. switch( lightmapPageID )
  139. {
  140. default:
  141. Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() );
  142. return m_pLightmapPages[lightmapPageID].m_Width;
  143. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED:
  144. AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" );
  145. return 1;
  146. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE:
  147. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP:
  148. return 1;
  149. }
  150. }
  151. //-----------------------------------------------------------------------------
  152. //
  153. //-----------------------------------------------------------------------------
  154. int CMatLightmaps::GetLightmapHeight( int lightmapPageID ) const
  155. {
  156. switch( lightmapPageID )
  157. {
  158. default:
  159. Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() );
  160. return m_pLightmapPages[lightmapPageID].m_Height;
  161. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED:
  162. AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" );
  163. return 1;
  164. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE:
  165. case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP:
  166. return 1;
  167. }
  168. }
  169. //-----------------------------------------------------------------------------
  170. // Clean up lightmap pages.
  171. //-----------------------------------------------------------------------------
  172. void CMatLightmaps::CleanupLightmaps()
  173. {
  174. if ( mat_lightmap_pfms.GetBool())
  175. {
  176. // Write PFM files containing lightmap data for this page
  177. for (int lightmap = 0; lightmap < GetNumLightmapPages(); lightmap++)
  178. {
  179. if ((NULL != m_pLightmapDataPtrArray) && (NULL != m_pLightmapDataPtrArray[lightmap]))
  180. {
  181. char szPFMFileName[MAX_PATH];
  182. sprintf(szPFMFileName, "Lightmap-Page-%d.pfm", lightmap);
  183. m_pLightmapDataPtrArray[lightmap]->WritePFM(szPFMFileName);
  184. }
  185. }
  186. }
  187. // Remove the lightmap data bitmap representations
  188. if (m_pLightmapDataPtrArray)
  189. {
  190. int i;
  191. for( i = 0; i < GetNumLightmapPages(); i++ )
  192. {
  193. delete m_pLightmapDataPtrArray[i];
  194. }
  195. delete [] m_pLightmapDataPtrArray;
  196. m_pLightmapDataPtrArray = NULL;
  197. }
  198. // delete old lightmap pages
  199. if( m_pLightmapPages )
  200. {
  201. int i;
  202. for( i = 0; i < GetNumLightmapPages(); i++ )
  203. {
  204. g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] );
  205. }
  206. delete [] m_pLightmapPages;
  207. m_pLightmapPages = 0;
  208. }
  209. m_NumLightmapPages = 0;
  210. }
  211. //-----------------------------------------------------------------------------
  212. // Resets the lightmap page info for each material
  213. //-----------------------------------------------------------------------------
  214. void CMatLightmaps::ResetMaterialLightmapPageInfo( void )
  215. {
  216. for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )
  217. {
  218. IMaterialInternal *pMaterial = GetMaterialInternal(i);
  219. pMaterial->SetMinLightmapPageID( 9999 );
  220. pMaterial->SetMaxLightmapPageID( -9999 );
  221. pMaterial->SetNeedsWhiteLightmap( false );
  222. }
  223. }
  224. //-----------------------------------------------------------------------------
  225. // This is called before any lightmap allocations take place
  226. //-----------------------------------------------------------------------------
  227. void CMatLightmaps::BeginLightmapAllocation()
  228. {
  229. // delete old lightmap pages
  230. CleanupLightmaps();
  231. m_ImagePackers.RemoveAll();
  232. int i = m_ImagePackers.AddToTail();
  233. m_ImagePackers[i].Reset( 0, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );
  234. SetCurrentMaterialInternal(0);
  235. m_currentWhiteLightmapMaterial = 0;
  236. m_numSortIDs = 0;
  237. // need to set the min and max sorting id number for each material to
  238. // a default value that basically means that it hasn't been used yet.
  239. ResetMaterialLightmapPageInfo();
  240. EnumerateMaterials();
  241. }
  242. //-----------------------------------------------------------------------------
  243. // Allocates space in the lightmaps; must be called after BeginLightmapAllocation
  244. //-----------------------------------------------------------------------------
  245. int CMatLightmaps::AllocateLightmap( int width, int height,
  246. int offsetIntoLightmapPage[2],
  247. IMaterial *iMaterial )
  248. {
  249. IMaterialInternal *pMaterial = static_cast<IMaterialInternal *>( iMaterial );
  250. if ( !pMaterial )
  251. {
  252. Warning( "Programming error: CMatRenderContext::AllocateLightmap: NULL material\n" );
  253. return m_numSortIDs;
  254. }
  255. pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally
  256. // material change
  257. int i;
  258. int nPackCount = m_ImagePackers.Count();
  259. if ( GetCurrentMaterialInternal() != pMaterial )
  260. {
  261. // If this happens, then we need to close out all image packers other than
  262. // the last one so as to produce as few sort IDs as possible
  263. for ( i = nPackCount - 1; --i >= 0; )
  264. {
  265. // NOTE: We *must* use the order preserving one here so the remaining one
  266. // is the last lightmap
  267. m_ImagePackers.Remove( i );
  268. --nPackCount;
  269. }
  270. // If it's not the first material, increment the sort id
  271. if (GetCurrentMaterialInternal())
  272. {
  273. m_ImagePackers[0].IncrementSortId( );
  274. ++m_numSortIDs;
  275. }
  276. SetCurrentMaterialInternal(pMaterial);
  277. // This assertion guarantees we don't see the same material twice in this loop.
  278. Assert( pMaterial->GetMinLightmapPageID( ) > pMaterial->GetMaxLightmapPageID() );
  279. // NOTE: We may not use this lightmap page, but we might
  280. // we won't know for sure until the next material is passed in.
  281. // So, for now, we're going to forcibly add the current lightmap
  282. // page to this material so the sort IDs work out correctly.
  283. GetCurrentMaterialInternal()->SetMinLightmapPageID( GetNumLightmapPages() );
  284. GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() );
  285. }
  286. // Try to add it to any of the current images...
  287. bool bAdded = false;
  288. for ( i = 0; i < nPackCount; ++i )
  289. {
  290. bAdded = m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] );
  291. if ( bAdded )
  292. break;
  293. }
  294. if ( !bAdded )
  295. {
  296. ++m_numSortIDs;
  297. i = m_ImagePackers.AddToTail();
  298. m_ImagePackers[i].Reset( m_numSortIDs, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );
  299. ++m_NumLightmapPages;
  300. if ( !m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] ) )
  301. {
  302. Error( "MaterialSystem_Interface_t::AllocateLightmap: lightmap (%dx%d) too big to fit in page (%dx%d)\n",
  303. width, height, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );
  304. }
  305. // Add this lightmap to the material...
  306. GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() );
  307. }
  308. return m_ImagePackers[i].GetSortId();
  309. }
  310. // UNDONE: This needs testing, but it appears as though creating these textures managed
  311. // results in huge stalls whenever they are locked for modify.
  312. // That makes sense given the d3d docs, but these have been flagged as managed for quite some time.
  313. #define DYNAMIC_TEXTURES_NO_BACKING 1
  314. void CMatLightmaps::EndLightmapAllocation()
  315. {
  316. // count the last page that we were on.if it wasn't
  317. // and count the last sortID that we were on
  318. m_NumLightmapPages++;
  319. m_numSortIDs++;
  320. m_firstDynamicLightmap = m_NumLightmapPages;
  321. // UNDONE: Until we start using the separate dynamic lighting textures don't allocate them
  322. // NOTE: Enable this if we want to stop locking the base lightmaps and instead only lock update
  323. // these completely dynamic pages
  324. // m_NumLightmapPages += COUNT_DYNAMIC_LIGHTMAP_PAGES;
  325. m_dynamic.Init();
  326. // Compute the dimensions of the last lightmap
  327. int lastLightmapPageWidth, lastLightmapPageHeight;
  328. int nLastIdx = m_ImagePackers.Count();
  329. m_ImagePackers[nLastIdx - 1].GetMinimumDimensions( &lastLightmapPageWidth, &lastLightmapPageHeight );
  330. m_ImagePackers.Purge();
  331. m_pLightmapPages = new LightmapPageInfo_t[GetNumLightmapPages()];
  332. Assert( m_pLightmapPages );
  333. if ( mat_lightmap_pfms.GetBool())
  334. {
  335. // This array will be used to write PFM files full of lightmap data
  336. m_pLightmapDataPtrArray = new FloatBitMap_t*[GetNumLightmapPages()];
  337. }
  338. int i;
  339. m_LightmapPageTextureHandles.EnsureCapacity( GetNumLightmapPages() );
  340. for ( i = 0; i < GetNumLightmapPages(); i++ )
  341. {
  342. // Compute lightmap dimensions
  343. bool lastStaticLightmap = ( i == (m_firstDynamicLightmap-1));
  344. m_pLightmapPages[i].m_Width = (unsigned short)(lastStaticLightmap ? lastLightmapPageWidth : GetMaxLightmapPageWidth());
  345. m_pLightmapPages[i].m_Height = (unsigned short)(lastStaticLightmap ? lastLightmapPageHeight : GetMaxLightmapPageHeight());
  346. m_pLightmapPages[i].m_Flags = 0;
  347. AllocateLightmapTexture( i );
  348. if ( mat_lightmap_pfms.GetBool())
  349. {
  350. // Initialize the pointers to lightmap data
  351. m_pLightmapDataPtrArray[i] = NULL;
  352. }
  353. }
  354. }
  355. //-----------------------------------------------------------------------------
  356. // Allocate lightmap textures
  357. //-----------------------------------------------------------------------------
  358. void CMatLightmaps::AllocateLightmapTexture( int lightmap )
  359. {
  360. bool bUseDynamicTextures = HardwareConfig()->PreferDynamicTextures();
  361. int flags = bUseDynamicTextures ? TEXTURE_CREATE_DYNAMIC : TEXTURE_CREATE_MANAGED;
  362. m_LightmapPageTextureHandles.EnsureCount( lightmap + 1 );
  363. char debugName[256];
  364. Q_snprintf( debugName, sizeof( debugName ), "[lightmap %d]", lightmap );
  365. ImageFormat imageFormat;
  366. switch ( HardwareConfig()->GetHDRType() )
  367. {
  368. default:
  369. Assert( 0 );
  370. // fall through.
  371. case HDR_TYPE_NONE:
  372. #if !defined( _X360 )
  373. imageFormat = IMAGE_FORMAT_RGBA8888;
  374. flags |= TEXTURE_CREATE_SRGB;
  375. #else
  376. imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888;
  377. #endif
  378. break;
  379. case HDR_TYPE_INTEGER:
  380. #if !defined( _X360 )
  381. imageFormat = IMAGE_FORMAT_RGBA16161616;
  382. #else
  383. # if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
  384. imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888;
  385. # else
  386. imageFormat = IMAGE_FORMAT_LINEAR_RGBA16161616;
  387. # endif
  388. #endif
  389. break;
  390. case HDR_TYPE_FLOAT:
  391. imageFormat = IMAGE_FORMAT_RGBA16161616F;
  392. break;
  393. }
  394. switch ( m_eLightmapsState )
  395. {
  396. case STATE_DEFAULT:
  397. // Allow allocations in default state
  398. {
  399. m_LightmapPageTextureHandles[lightmap] = g_pShaderAPI->CreateTexture(
  400. GetLightmapWidth(lightmap), GetLightmapHeight(lightmap), 1,
  401. imageFormat,
  402. 1, 1, flags, debugName, TEXTURE_GROUP_LIGHTMAP ); // don't mipmap lightmaps
  403. // Load up the texture data
  404. g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] );
  405. g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR );
  406. g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR );
  407. if ( !bUseDynamicTextures )
  408. {
  409. g_pShaderAPI->TexSetPriority( 1 );
  410. }
  411. // Blat out the lightmap bits
  412. InitLightmapBits( lightmap );
  413. }
  414. break;
  415. case STATE_RELEASED:
  416. // Not assigned m_LightmapPageTextureHandles[lightmap];
  417. DevMsg( "AllocateLightmapTexture(%d) in released lightmap state (STATE_RELEASED), delayed till \"Restore\".\n", lightmap );
  418. return;
  419. default:
  420. // Not assigned m_LightmapPageTextureHandles[lightmap];
  421. Warning( "AllocateLightmapTexture(%d) in unknown lightmap state (%d), skipped.\n", lightmap, m_eLightmapsState );
  422. Assert( !"AllocateLightmapTexture(?) in unknown lightmap state (?)" );
  423. return;
  424. }
  425. }
  426. int CMatLightmaps::AllocateWhiteLightmap( IMaterial *iMaterial )
  427. {
  428. IMaterialInternal *pMaterial = static_cast<IMaterialInternal *>( iMaterial );
  429. if( !pMaterial )
  430. {
  431. Warning( "Programming error: CMatRenderContext::AllocateWhiteLightmap: NULL material\n" );
  432. return m_numSortIDs;
  433. }
  434. pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally
  435. if ( !m_currentWhiteLightmapMaterial || ( m_currentWhiteLightmapMaterial != pMaterial ) )
  436. {
  437. if ( !GetCurrentMaterialInternal() && !m_currentWhiteLightmapMaterial )
  438. {
  439. // don't increment if this is the very first material (ie. no lightmaps
  440. // allocated with AllocateLightmap
  441. // Assert( 0 );
  442. }
  443. else
  444. {
  445. // material change
  446. m_numSortIDs++;
  447. #if 0
  448. char buf[128];
  449. Q_snprintf( buf, sizeof( buf ), "AllocateWhiteLightmap: m_numSortIDs = %d %s\n", m_numSortIDs, pMaterial->GetName() );
  450. OutputDebugString( buf );
  451. #endif
  452. }
  453. // Warning( "%d material: \"%s\" lightmapPageID: -1\n", m_numSortIDs, pMaterial->GetName() );
  454. m_currentWhiteLightmapMaterial = pMaterial;
  455. pMaterial->SetNeedsWhiteLightmap( true );
  456. }
  457. return m_numSortIDs;
  458. }
  459. //-----------------------------------------------------------------------------
  460. // Releases/restores lightmap pages
  461. //-----------------------------------------------------------------------------
  462. void CMatLightmaps::ReleaseLightmapPages()
  463. {
  464. switch ( m_eLightmapsState )
  465. {
  466. case STATE_DEFAULT:
  467. // Allow release in default state only
  468. break;
  469. default:
  470. Warning( "ReleaseLightmapPages is expected in STATE_DEFAULT, current state = %d, discarded.\n", m_eLightmapsState );
  471. Assert( !"ReleaseLightmapPages is expected in STATE_DEFAULT" );
  472. return;
  473. }
  474. for( int i = 0; i < GetNumLightmapPages(); i++ )
  475. {
  476. g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] );
  477. }
  478. // We are now in released state
  479. m_eLightmapsState = STATE_RELEASED;
  480. }
  481. void CMatLightmaps::RestoreLightmapPages()
  482. {
  483. switch ( m_eLightmapsState )
  484. {
  485. case STATE_RELEASED:
  486. // Allow restore in released state only
  487. break;
  488. default:
  489. Warning( "RestoreLightmapPages is expected in STATE_RELEASED, current state = %d, discarded.\n", m_eLightmapsState );
  490. Assert( !"RestoreLightmapPages is expected in STATE_RELEASED" );
  491. return;
  492. }
  493. // Switch to default state to allow allocations
  494. m_eLightmapsState = STATE_DEFAULT;
  495. for( int i = 0; i < GetNumLightmapPages(); i++ )
  496. {
  497. AllocateLightmapTexture( i );
  498. }
  499. }
  500. //-----------------------------------------------------------------------------
  501. // This initializes the lightmap bits
  502. //-----------------------------------------------------------------------------
  503. void CMatLightmaps::InitLightmapBits( int lightmap )
  504. {
  505. VPROF_( "CMatLightmaps::InitLightmapBits", 1, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 );
  506. int width = GetLightmapWidth(lightmap);
  507. int height = GetLightmapHeight(lightmap);
  508. CPixelWriter writer;
  509. g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] );
  510. if ( !g_pShaderAPI->TexLock( 0, 0, 0, 0, width, height, writer ) )
  511. return;
  512. // Debug mode, make em green checkerboard
  513. if ( writer.IsUsingFloatFormat() )
  514. {
  515. for ( int j = 0; j < height; ++j )
  516. {
  517. writer.Seek( 0, j );
  518. for ( int k = 0; k < width; ++k )
  519. {
  520. #ifndef _DEBUG
  521. writer.WritePixel( 1.0f, 1.0f, 1.0f );
  522. #else // _DEBUG
  523. if( ( j + k ) & 1 )
  524. {
  525. writer.WritePixelF( 0.0f, 1.0f, 0.0f );
  526. }
  527. else
  528. {
  529. writer.WritePixelF( 0.0f, 0.0f, 0.0f );
  530. }
  531. #endif // _DEBUG
  532. }
  533. }
  534. }
  535. else
  536. {
  537. for ( int j = 0; j < height; ++j )
  538. {
  539. writer.Seek( 0, j );
  540. for ( int k = 0; k < width; ++k )
  541. {
  542. #ifndef _DEBUG
  543. // note: make this white to find multisample centroid sampling problems.
  544. // writer.WritePixel( 255, 255, 255 );
  545. writer.WritePixel( 0, 0, 0 );
  546. #else // _DEBUG
  547. if ( ( j + k ) & 1 )
  548. {
  549. writer.WritePixel( 0, 255, 0 );
  550. }
  551. else
  552. {
  553. writer.WritePixel( 0, 0, 0 );
  554. }
  555. #endif // _DEBUG
  556. }
  557. }
  558. }
  559. g_pShaderAPI->TexUnlock();
  560. }
  561. bool CMatLightmaps::LockLightmap( int lightmap )
  562. {
  563. // Warning( "locking lightmap page: %d\n", lightmap );
  564. VPROF_INCREMENT_COUNTER( "lightmap fullpage texlock", 1 );
  565. if( m_nLockedLightmap != -1 )
  566. {
  567. g_pShaderAPI->TexUnlock();
  568. }
  569. g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] );
  570. int pageWidth = m_pLightmapPages[lightmap].m_Width;
  571. int pageHeight = m_pLightmapPages[lightmap].m_Height;
  572. if (!g_pShaderAPI->TexLock( 0, 0, 0, 0, pageWidth, pageHeight, m_LightmapPixelWriter ))
  573. {
  574. Assert( 0 );
  575. return false;
  576. }
  577. m_nLockedLightmap = lightmap;
  578. return true;
  579. }
  580. Vector4D ConvertLightmapColorToRGBScale( const float *lightmapColor )
  581. {
  582. Vector4D result;
  583. float fScale = lightmapColor[0];
  584. for( int i = 1; i != 3; ++i )
  585. {
  586. if( lightmapColor[i] > fScale )
  587. fScale = lightmapColor[i];
  588. }
  589. fScale = ceil( fScale * (255.0f/16.0f) ) * (16.0f/255.0f);
  590. fScale = min( fScale, 16.0f );
  591. float fInvScale = 1.0f / fScale;
  592. for( int i = 0; i != 3; ++i )
  593. {
  594. result[i] = lightmapColor[i] * fInvScale;
  595. result[i] = ceil( result[i] * 255.0f ) * (1.0f/255.0f);
  596. result[i] = min( result[i], 1.0f );
  597. }
  598. fScale /= 16.0f;
  599. result.w = fScale;
  600. return result;
  601. }
  602. #ifdef _X360
  603. // SIMD version of above
  604. // input numbers from pSrc are on the domain [0..16]
  605. // output is RGBA
  606. // ignores contents of w channel of input
  607. // the shader does this: rOut = Rin * Ain * 16.0f
  608. // where Rin is [0..1], a float computed from a byte value [0..255]
  609. // Ain is therefore the brightest channel (say R) divided by 16 and quantized
  610. // Rin is computed from pSrc->r by dividing by Ain
  611. // this outputs RGBa where RGB are [0..255] and a is the shader's scaling factor (also 0..255)
  612. //
  613. // WARNING - this code appears to be vulnerable to a compiler bug. Be very careful modifying and be
  614. // sure to test
  615. fltx4 ConvertLightmapColorToRGBScale( FLTX4 lightmapColor )
  616. {
  617. static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};
  618. static const fltx4 FourPoint1s = { 0.1, 0.1, 0.1, 0.1 };
  619. static const fltx4 vTwoFiftyFiveOverSixteen = {255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f};
  620. // static const fltx4 vSixteenOverTwoFiftyFive = { 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f };
  621. // find the highest color value in lightmapColor and replicate it
  622. fltx4 scale = FindHighestSIMD3( lightmapColor );
  623. fltx4 minscale = FindLowestSIMD3( lightmapColor );
  624. fltx4 fl4OutofRange = OrSIMD( CmpGeSIMD( scale, Four_Ones ), CmpLeSIMD( scale, FourPoint1s ) );
  625. fl4OutofRange = OrSIMD( fl4OutofRange, CmpGtSIMD( minscale, MulSIMD( Four_PointFives, scale ) ) );
  626. // scale needs to be divided by 16 (because the shader multiplies it by 16)
  627. // then mapped to 0..255 and quantized.
  628. scale = __vrfip(MulSIMD(scale, vTwoFiftyFiveOverSixteen)); // scale = ceil(scale * 255/16)
  629. fltx4 result = MulSIMD(vTwoFiftyFive, lightmapColor); // start the scale cooking on the final result
  630. fltx4 invScale = ReciprocalEstSIMD(scale); // invScale = (16/255)(1/scale). may be +inf
  631. invScale = MulSIMD(invScale, vTwoFiftyFiveOverSixteen); // take the quantizing factor back out
  632. // of the inverse scale (one less
  633. // dependent op if you do it this way)
  634. // scale the input channels
  635. // compute so the numbers are all 0..255 ints. (if one happens to
  636. // be 256 due to numerical error in the reciprocation, the unsigned-saturate
  637. // store we'll use later on will bake it back down to 255)
  638. result = MulSIMD(result, invScale);
  639. // now, output --
  640. // if the input color was nonzero, slip the scale into return value's w
  641. // component and return. If the input was zero, return zero.
  642. result = MaskedAssign(
  643. fl4OutofRange,
  644. SetWSIMD( result, scale ),
  645. SetWSIMD( MulSIMD( lightmapColor, vTwoFiftyFive ), vTwoFiftyFiveOverSixteen ) );
  646. return result;
  647. }
  648. #endif
  649. // write bumped lightmap update to LDR 8-bit lightmap
  650. void CMatLightmaps::BumpedLightmapBitsToPixelWriter_LDR( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2,
  651. float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )
  652. {
  653. const int nLightmapSize0 = pLightmapSize[0];
  654. const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize();
  655. const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );
  656. for( int t = 0; t < pLightmapSize[1]; t++ )
  657. {
  658. int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );
  659. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  660. for( int s = 0; s < nLightmapSize0;
  661. s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))
  662. {
  663. unsigned char color[4][3];
  664. ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],
  665. &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],
  666. &pFloatImageBump3[srcTexelOffset],
  667. color[0], color[1], color[2], color[3] );
  668. unsigned char alpha = RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f );
  669. m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], alpha );
  670. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  671. m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], alpha );
  672. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  673. m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], alpha );
  674. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  675. m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], alpha );
  676. }
  677. }
  678. if ( pfmOut )
  679. {
  680. for( int t = 0; t < pLightmapSize[1]; t++ )
  681. {
  682. int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );
  683. for( int s = 0; s < nLightmapSize0; s++,srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))
  684. {
  685. unsigned char color[4][3];
  686. ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],
  687. &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],
  688. &pFloatImageBump3[srcTexelOffset],
  689. color[0], color[1], color[2], color[3] );
  690. unsigned char alpha = RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f );
  691. // Write data to the bitmapped represenations so that PFM files can be written
  692. PixRGBAF pixelData;
  693. pixelData.Red = color[0][0];
  694. pixelData.Green = color[0][1];
  695. pixelData.Blue = color[0][2];
  696. pixelData.Alpha = alpha;
  697. pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData);
  698. }
  699. }
  700. }
  701. }
  702. // write bumped lightmap update to HDR float lightmap
  703. void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRF( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2,
  704. float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )
  705. {
  706. if ( IsX360() )
  707. {
  708. // 360 does not support HDR float mode
  709. Assert( 0 );
  710. return;
  711. }
  712. Assert( !pfmOut ); // unsupported in this mode
  713. const int nLightmapSize0 = pLightmapSize[0];
  714. const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize();
  715. const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );
  716. for( int t = 0; t < pLightmapSize[1]; t++ )
  717. {
  718. int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );
  719. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  720. for( int s = 0;
  721. s < nLightmapSize0;
  722. s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))
  723. {
  724. m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImage[srcTexelOffset], pFloatImage[srcTexelOffset+1],
  725. pFloatImage[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] );
  726. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  727. m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImageBump1[srcTexelOffset], pFloatImageBump1[srcTexelOffset+1],
  728. pFloatImageBump1[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] );
  729. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  730. m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImageBump2[srcTexelOffset], pFloatImageBump2[srcTexelOffset+1],
  731. pFloatImageBump2[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] );
  732. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  733. m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImageBump3[srcTexelOffset], pFloatImageBump3[srcTexelOffset+1],
  734. pFloatImageBump3[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] );
  735. }
  736. }
  737. }
  738. #ifdef _X360
  739. #pragma optimize("u", on)
  740. #endif
  741. #ifdef _X360
  742. namespace {
  743. // pack a pixel into BGRA8888 and return it with the data packed into the w component
  744. FORCEINLINE fltx4 PackPixel_BGRA8888( FLTX4 rgba )
  745. {
  746. // this happens to be in an order such that we can use the handy builtin packing op
  747. // clamp to 0..255 (coz it might have leaked over)
  748. static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};
  749. // the magic number such that when mul-accummulated against rbga,
  750. // gets us a representation 3.0 + (r)*2^-22 -- puts the bits at
  751. // the bottom of the float
  752. static const XMVECTOR PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22)
  753. static const XMVECTOR Three = {3.0f, 3.0f, 3.0f, 3.0f};
  754. fltx4 N = MinSIMD(vTwoFiftyFive, rgba);
  755. N = __vmaddfp(N, PackScale, Three);
  756. N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 0); // pack into w word
  757. return N;
  758. }
  759. // A small store-gather buffer used in the
  760. // BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360().
  761. // The store-gather buffers. Hopefully these will live in the L1
  762. // cache, which will make writing to them, then to memory, faster
  763. // than just using __stvewx to write directly into WC memory
  764. // one noncontiguous float at a time. (If there weren't a huge
  765. // compiler bug with __stvewx in the Apr07 XDK, that might not
  766. // be the case.)
  767. struct ALIGN128 CPixelWriterStoreGather
  768. {
  769. enum {
  770. kRows = 4,
  771. kWordsPerRow = 32,
  772. };
  773. ALIGN128 uint32 m_data[kRows][kWordsPerRow]; // four rows of bgra data, aligned to 4 cache lines. dwords so memcpy works better.
  774. int m_wordsGathered;
  775. int m_bytesBetweenWriterRows; // the number of bytes spacing the maps inside the writer from each other
  776. // if we weren't gathering, we'd SkipBytes this many between the base map, bump1, etc.
  777. // write four rows, as SIMD registers, into the buffers
  778. inline void write( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0, FLTX4 row1, FLTX4 row2, FLTX4 row3 ) RESTRICT
  779. {
  780. // if full, commit
  781. Assert(m_wordsGathered <= kWordsPerRow);
  782. AssertMsg((m_wordsGathered & 3) == 0, "Don't call CPixelWriterStoreGather::write after ::writeJustX"); // single-word writes have misaligned me
  783. if (m_wordsGathered >= kWordsPerRow)
  784. {
  785. commitWhenFull(pLightmapPixelWriter);
  786. }
  787. XMStoreVector4A( &m_data[0][m_wordsGathered], row0 );
  788. XMStoreVector4A( &m_data[1][m_wordsGathered], row1 );
  789. XMStoreVector4A( &m_data[2][m_wordsGathered], row2 );
  790. XMStoreVector4A( &m_data[3][m_wordsGathered], row3 );
  791. m_wordsGathered += 4 ; // four words per simd vec
  792. }
  793. // pluck the w component out of each of the rows, and store it into the gather buffer. Don't
  794. // call the other write function after calling this.
  795. inline void writeJustW( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0, FLTX4 row1, FLTX4 row2, FLTX4 row3 ) RESTRICT
  796. {
  797. // if full, commit
  798. Assert(m_wordsGathered <= kWordsPerRow);
  799. if (m_wordsGathered >= kWordsPerRow)
  800. {
  801. commitWhenFull(pLightmapPixelWriter);
  802. }
  803. // for each fltx4, splat out x and then use the __stvewx to store
  804. // whichever word happens to align with the float pointer through
  805. // that pointer.
  806. __stvewx(__vspltw(row0, 3), &m_data[0][m_wordsGathered], 0 );
  807. __stvewx(__vspltw(row1, 3), &m_data[1][m_wordsGathered], 0 );
  808. __stvewx(__vspltw(row2, 3), &m_data[2][m_wordsGathered], 0 );
  809. __stvewx(__vspltw(row3, 3), &m_data[3][m_wordsGathered], 0 );
  810. m_wordsGathered += 1 ; // only stored one word
  811. }
  812. // Commit my buffers to the pixelwriter's memory, and advance its
  813. // pointer.
  814. void commit(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT
  815. {
  816. if (m_wordsGathered > 0)
  817. {
  818. unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel();
  819. // we have to use memcpy because we're writing to non-cacheable memory,
  820. // but we can't even assume that the addresses we're writing to are
  821. // vector-aligned.
  822. #ifdef memcpy // if someone's overriden the intrinsic, complain
  823. #pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.")
  824. #endif
  825. memcpy(pWriteInto, m_data[0], m_wordsGathered * sizeof(uint32));
  826. pWriteInto += m_bytesBetweenWriterRows;
  827. memcpy(pWriteInto, m_data[1], m_wordsGathered * sizeof(uint32));
  828. pWriteInto += m_bytesBetweenWriterRows;
  829. memcpy(pWriteInto, m_data[2], m_wordsGathered * sizeof(uint32));
  830. pWriteInto += m_bytesBetweenWriterRows;
  831. memcpy(pWriteInto, m_data[3], m_wordsGathered * sizeof(uint32));
  832. pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32));
  833. m_wordsGathered = 0;
  834. }
  835. }
  836. // like commit, but the version we use when we know we're full.
  837. // Takes advantage of better compile-time generation for
  838. // memcpy.
  839. void commitWhenFull(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT
  840. {
  841. unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel();
  842. // we have to use memcpy because we're writing to non-cacheable memory,
  843. // but we can't even assume that the addresses we're writing to are
  844. // vector-aligned.
  845. #ifdef memcpy // if someone's overriden the intrinsic, complain
  846. #pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.")
  847. #endif
  848. // if we're full, use compile-time known version of
  849. // mempcy to take advantage of its ability to generate
  850. // inline code. In fact, use the dword-aligned
  851. // version so that we use the 64-bit writing funcs.
  852. Assert( m_wordsGathered == kWordsPerRow );
  853. COMPILE_TIME_ASSERT((kWordsPerRow & 3) == 0); // the number of words per row has to be a multiple of four
  854. memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[0]), kWordsPerRow * sizeof(uint32));
  855. pWriteInto += m_bytesBetweenWriterRows;
  856. memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[1]), kWordsPerRow * sizeof(uint32));
  857. pWriteInto += m_bytesBetweenWriterRows;
  858. memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[2]), kWordsPerRow * sizeof(uint32));
  859. pWriteInto += m_bytesBetweenWriterRows;
  860. memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[3]), kWordsPerRow * sizeof(uint32));
  861. pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32));
  862. m_wordsGathered = 0;
  863. }
  864. // parameter: space between bump pages in the pixelwriter
  865. CPixelWriterStoreGather(int writerSizeBytes) : m_wordsGathered(0), m_bytesBetweenWriterRows(writerSizeBytes) {};
  866. };
  867. }
  868. // this is a function for specifically writing bumped BGRA lightmaps -- in order for it
  869. // to be properly scheduled, I needed to break out the inline functions. Also,
  870. // to make the write-combined memory more efficient (and work around a bug in the
  871. // April 2007 XDK), we need to store-gather our writes on the cache before blasting
  872. // them out to write-combined memory. We can't simply write from the SIMD registers
  873. // into the pixelwriter's data, because the difference between the output rows,
  874. // eg nLightmap0WriterSizeBytes[0], might not be a multiple of 16. Unaligned stores
  875. // to non-cacheable memory cause an alignment exception.
  876. static void BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2,
  877. float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut,
  878. CPixelWriter * RESTRICT m_LightmapPixelWriter)
  879. {
  880. AssertMsg(m_LightmapPixelWriter->GetPixelSize() == 4, "BGRA format is no longer four bytes long? This is unsupported on 360, and probably immoral as well.");
  881. const int nLightmap0WriterSizeBytes = pLightmapSize[0] * 4 /*m_LightmapPixelWriter->GetPixelSize()*/;
  882. // const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - 4 );
  883. // assert that 1 * 4 = 4
  884. COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4);
  885. AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n");
  886. // The store-gather buffers. Hopefully these will live in the L1
  887. // cache, which will make writing to them, then to memory, faster
  888. // than just using __stvewx to write directly into WC memory
  889. // one noncontiguous float at a time. (If there weren't a huge
  890. // compiler bug with __stvewx in the Apr07 XDK, that might not
  891. // be the case.)
  892. CPixelWriterStoreGather storeGather(nLightmap0WriterSizeBytes);
  893. for( int t = 0; t < pLightmapSize[1]; t++ )
  894. {
  895. #define FOUR (sizeof( Vector4D ) / sizeof( float )) // make explicit when we're incrementing by length of a 4dvec
  896. int srcTexelOffset = ( FOUR ) * ( 0 + t * pLightmapSize[0] );
  897. m_LightmapPixelWriter->Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  898. // Our code works best when we can process luxels in groups of four. So,
  899. // figure out how many four-luxel groups we can process,
  900. // then do them in groups, then process the remainder.
  901. unsigned int groupsOfFourLimit = (((unsigned int)pLightmapSize[0]) & ~3);
  902. // we want to hang on to this index when we're done with groups so we can do the remainder.
  903. unsigned int s; // counts the number of luxels processed
  904. for( s = 0;
  905. s < groupsOfFourLimit;
  906. s += 4, srcTexelOffset += 4 * ( FOUR ))
  907. {
  908. static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};
  909. // the store-gather simds
  910. fltx4 outBaseMap = Four_Zeros, outBump1 = Four_Zeros, outBump2 = Four_Zeros, outBump3 = Four_Zeros;
  911. // we'll read four at a time
  912. fltx4 vFloatImage[4], vFloatImageBump1[4], vFloatImageBump2[4], vFloatImageBump3[4];
  913. // stripe these loads to cause less ERAT thrashing
  914. vFloatImage[0] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset );
  915. vFloatImage[1] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 4 );
  916. vFloatImage[2] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 8 );
  917. vFloatImage[3] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 12 );
  918. vFloatImageBump1[0] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset );
  919. vFloatImageBump1[1] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 4 );
  920. vFloatImageBump1[2] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 8 );
  921. vFloatImageBump1[3] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 12 );
  922. vFloatImageBump2[0] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset );
  923. vFloatImageBump2[1] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 4 );
  924. vFloatImageBump2[2] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 8 );
  925. vFloatImageBump2[3] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 12 );
  926. vFloatImageBump3[0] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset );
  927. vFloatImageBump3[1] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 4 );
  928. vFloatImageBump3[2] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 8 );
  929. vFloatImageBump3[3] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 12 );
  930. // perform an arcane averaging operation upon the bump map values
  931. // (todo: make this not an inline so it will schedule better -- inlining is
  932. // done by the linker, which is too late for operation scheduling)
  933. ColorSpace::LinearToBumpedLightmap( vFloatImage[0], vFloatImageBump1[0],
  934. vFloatImageBump2[0], vFloatImageBump3[0],
  935. // transform "in place":
  936. vFloatImage[0], vFloatImageBump1[0],
  937. vFloatImageBump2[0], vFloatImageBump3[0] );
  938. ColorSpace::LinearToBumpedLightmap( vFloatImage[1], vFloatImageBump1[1],
  939. vFloatImageBump2[1], vFloatImageBump3[1],
  940. // transform "in place":
  941. vFloatImage[1], vFloatImageBump1[1],
  942. vFloatImageBump2[1], vFloatImageBump3[1] );
  943. ColorSpace::LinearToBumpedLightmap( vFloatImage[2], vFloatImageBump1[2],
  944. vFloatImageBump2[2], vFloatImageBump3[2],
  945. // transform "in place":
  946. vFloatImage[2], vFloatImageBump1[2],
  947. vFloatImageBump2[2], vFloatImageBump3[2] );
  948. ColorSpace::LinearToBumpedLightmap( vFloatImage[3], vFloatImageBump1[3],
  949. vFloatImageBump2[3], vFloatImageBump3[3],
  950. // transform "in place":
  951. vFloatImage[3], vFloatImageBump1[3],
  952. vFloatImageBump2[3], vFloatImageBump3[3] );
  953. // convert each color to RGB scaled.
  954. // DO NOT! make this into a for loop. The (April07 XDK) compiler
  955. // in fact DOES NOT unroll them, and will perform very naive
  956. // scheduling if you try.
  957. // clamp to 0..16 float
  958. vFloatImage[0] = MinSIMD(vFloatImage[0], vSixteen);
  959. vFloatImageBump1[0] = MinSIMD(vFloatImageBump1[0], vSixteen);
  960. vFloatImageBump2[0] = MinSIMD(vFloatImageBump2[0], vSixteen);
  961. vFloatImageBump3[0] = MinSIMD(vFloatImageBump3[0], vSixteen);
  962. vFloatImage[1] = MinSIMD(vFloatImage[1], vSixteen);
  963. vFloatImageBump1[1] = MinSIMD(vFloatImageBump1[1], vSixteen);
  964. vFloatImageBump2[1] = MinSIMD(vFloatImageBump2[1], vSixteen);
  965. vFloatImageBump3[1] = MinSIMD(vFloatImageBump3[1], vSixteen);
  966. vFloatImage[2] = MinSIMD(vFloatImage[2], vSixteen);
  967. vFloatImageBump1[2] = MinSIMD(vFloatImageBump1[2], vSixteen);
  968. vFloatImageBump2[2] = MinSIMD(vFloatImageBump2[2], vSixteen);
  969. vFloatImageBump3[2] = MinSIMD(vFloatImageBump3[2], vSixteen);
  970. vFloatImage[3] = MinSIMD(vFloatImage[3], vSixteen);
  971. vFloatImageBump1[3] = MinSIMD(vFloatImageBump1[3], vSixteen);
  972. vFloatImageBump2[3] = MinSIMD(vFloatImageBump2[3], vSixteen);
  973. vFloatImageBump3[3] = MinSIMD(vFloatImageBump3[3], vSixteen);
  974. // compute the scaling factor, place it in w, and
  975. // scale the rest by it. Obliterates whatever was
  976. // already in alpha.
  977. // This code is why it is important to not use a for
  978. // loop: you need to let the compiler keep the value
  979. // on registers (which it can't do if you use a
  980. // variable indexed array) and interleave the
  981. // inlined instructions.
  982. vFloatImage[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[0]) );
  983. vFloatImageBump1[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[0]) );
  984. vFloatImageBump2[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[0]) );
  985. vFloatImageBump3[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[0]) );
  986. vFloatImage[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[1]) );
  987. vFloatImageBump1[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[1]) );
  988. vFloatImageBump2[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[1]) );
  989. vFloatImageBump3[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[1]) );
  990. vFloatImage[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[2]) );
  991. vFloatImageBump1[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[2]) );
  992. vFloatImageBump2[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[2]) );
  993. vFloatImageBump3[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[2]) );
  994. vFloatImage[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[3]) );
  995. vFloatImageBump1[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[3]) );
  996. vFloatImageBump2[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[3]) );
  997. vFloatImageBump3[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[3]) );
  998. // Each of the registers above contains one RGBA 32-bit struct
  999. // in their w word. So, combine them such that each of the assignees
  1000. // below contains four RGBAs, in xyzw order (big-endian).
  1001. outBaseMap = __vrlimi(outBaseMap, vFloatImage[0], 8, 3 ); // insert into x
  1002. outBump1 = __vrlimi(outBump1, vFloatImageBump1[0], 8, 3 ); // insert into x
  1003. outBump2 = __vrlimi(outBump2, vFloatImageBump2[0], 8, 3 ); // insert into x
  1004. outBump3 = __vrlimi(outBump3, vFloatImageBump3[0], 8, 3 ); // insert into x
  1005. outBaseMap = __vrlimi(outBaseMap, vFloatImage[1], 4, 2 ); // insert into y
  1006. outBump1 = __vrlimi(outBump1, vFloatImageBump1[1], 4, 2 ); // insert into y
  1007. outBump2 = __vrlimi(outBump2, vFloatImageBump2[1], 4, 2 ); // insert into y
  1008. outBump3 = __vrlimi(outBump3, vFloatImageBump3[1], 4, 2 ); // insert into y
  1009. outBaseMap = __vrlimi(outBaseMap, vFloatImage[2], 2, 1 ); // insert into z
  1010. outBump1 = __vrlimi(outBump1, vFloatImageBump1[2], 2, 1 ); // insert into z
  1011. outBump2 = __vrlimi(outBump2, vFloatImageBump2[2], 2, 1 ); // insert into z
  1012. outBump3 = __vrlimi(outBump3, vFloatImageBump3[2], 2, 1 ); // insert into z
  1013. outBaseMap = __vrlimi(outBaseMap, vFloatImage[3], 1, 0 ); // insert into w
  1014. outBump1 = __vrlimi(outBump1, vFloatImageBump1[3], 1, 0 ); // insert into w
  1015. outBump2 = __vrlimi(outBump2, vFloatImageBump2[3], 1, 0 ); // insert into w
  1016. outBump3 = __vrlimi(outBump3, vFloatImageBump3[3], 1, 0 ); // insert into w
  1017. // push the data through the store-gather buffer.
  1018. storeGather.write(m_LightmapPixelWriter, outBaseMap, outBump1, outBump2, outBump3);
  1019. }
  1020. // Once here, make sure we've committed any leftover changes, then process
  1021. // the remainders singly.
  1022. storeGather.commit(m_LightmapPixelWriter);
  1023. for( ; // s is where it should be from the loop above
  1024. s < (unsigned int) pLightmapSize[0];
  1025. s++,
  1026. // m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel), // now handled by store-gather
  1027. srcTexelOffset += ( FOUR ))
  1028. {
  1029. static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};
  1030. fltx4 vColor[4];
  1031. fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]);
  1032. fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]);
  1033. fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]);
  1034. fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]);
  1035. // perform an arcane averaging operation upon the bump map values
  1036. ColorSpace::LinearToBumpedLightmap( vFloatImage,
  1037. vFloatImageBump1, vFloatImageBump2,
  1038. vFloatImageBump3,
  1039. vColor[0], vColor[1], vColor[2], vColor[3] );
  1040. // convert each color to RGB scaled.
  1041. // DO NOT! make this into a for loop. The (April07 XDK) compiler
  1042. // in fact DOES NOT unroll them, and will perform very naive
  1043. // scheduling if you try.
  1044. // clamp to 0..16 float
  1045. vColor[0] = MinSIMD(vColor[0], vSixteen);
  1046. vColor[1] = MinSIMD(vColor[1], vSixteen);
  1047. vColor[2] = MinSIMD(vColor[2], vSixteen);
  1048. vColor[3] = MinSIMD(vColor[3], vSixteen);
  1049. // compute the scaling factor, place it in w, and
  1050. // scale the rest by it. Obliterates whatever was
  1051. // already in alpha.
  1052. // This code is why it is important to not use a for
  1053. // loop: you need to let the compiler interleave the
  1054. // inlined instructions.
  1055. vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] );
  1056. vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] );
  1057. vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] );
  1058. vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] );
  1059. #ifdef X360_DOUBLECHECK_LIGHTMAPS
  1060. unsigned short color[4][4];
  1061. ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],
  1062. &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],
  1063. &pFloatImageBump3[srcTexelOffset],
  1064. color[0], color[1], color[2], color[3] );
  1065. unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 );
  1066. color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;
  1067. if( IsX360() )
  1068. {
  1069. for( int i = 0; i != 4; ++i )
  1070. {
  1071. Vector4D vRGBScale;
  1072. vRGBScale.x = color[i][0] * (16.0f / 65535.0f);
  1073. vRGBScale.y = color[i][1] * (16.0f / 65535.0f);
  1074. vRGBScale.z = color[i][2] * (16.0f / 65535.0f);
  1075. vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
  1076. color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f );
  1077. color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f );
  1078. color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f );
  1079. color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f );
  1080. }
  1081. }
  1082. /*
  1083. for (int ii = 0; ii < 4; ++ii)
  1084. {
  1085. uint32 pack = (PackPixel_BGRA8888( vColor[ii] ).u[3]);
  1086. if (color[ii][3] != 0)
  1087. Assert( color[ii][0] == (pack & 0xFF0000) >> 16 &&
  1088. color[ii][1] == (pack & 0xFF00) >> 8 &&
  1089. color[ii][2] == (pack & 0xFF) &&
  1090. color[ii][3] == (pack & 0xFF000000) >> 24 );
  1091. }
  1092. */
  1093. #endif
  1094. vColor[0] = PackPixel_BGRA8888( vColor[0] );
  1095. vColor[1] = PackPixel_BGRA8888( vColor[1] );
  1096. vColor[2] = PackPixel_BGRA8888( vColor[2] );
  1097. vColor[3] = PackPixel_BGRA8888( vColor[3] );
  1098. storeGather.writeJustW(m_LightmapPixelWriter, vColor[0], vColor[1], vColor[2], vColor[3] );
  1099. /* // here is the old way of writing pixels:
  1100. // now we store-gather this
  1101. m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[0] );
  1102. Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[0] ).u[3] );
  1103. void * RESTRICT pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes );
  1104. m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[1], pBits );
  1105. Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[1] ).u[3] );
  1106. pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes );
  1107. m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[2], pBits );
  1108. Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[2] ).u[3] );
  1109. pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes );
  1110. m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[3], pBits );
  1111. Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[3] ).u[3] );
  1112. m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel);
  1113. */
  1114. }
  1115. storeGather.commit(m_LightmapPixelWriter);
  1116. }
  1117. }
  1118. #endif // _X360
  1119. // write bumped lightmap update to HDR integer lightmap
  1120. void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2,
  1121. float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) RESTRICT
  1122. {
  1123. const int nLightmapSize0 = pLightmapSize[0];
  1124. const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize();
  1125. const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );
  1126. if( m_LightmapPixelWriter.IsUsingFloatFormat() )
  1127. {
  1128. AssertMsg(!IsX360(), "Tried to use a floating-point pixel format for lightmaps on 360, which is not supported.");
  1129. if (!IsX360())
  1130. {
  1131. for( int t = 0; t < pLightmapSize[1]; t++ )
  1132. {
  1133. int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );
  1134. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1135. for( int s = 0;
  1136. s < nLightmapSize0;
  1137. s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))
  1138. {
  1139. unsigned short color[4][4];
  1140. ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],
  1141. &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],
  1142. &pFloatImageBump3[srcTexelOffset],
  1143. color[0], color[1], color[2], color[3] );
  1144. float alpha = pFloatImage[srcTexelOffset+3];
  1145. Assert( alpha >= 0.0f && alpha <= 1.0f );
  1146. color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;
  1147. float toFloat = ( 1.0f / ( float )( 1 << 16 ) );
  1148. /* // This code is now a can't-happen, because we do not allow float formats on 360.
  1149. #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
  1150. if( IsX360() )
  1151. {
  1152. for( int i = 0; i != 4; ++i )
  1153. {
  1154. Vector4D vRGBScale;
  1155. vRGBScale.x = color[i][0] * (16.0f / 65535.0f);
  1156. vRGBScale.y = color[i][1] * (16.0f / 65535.0f);
  1157. vRGBScale.z = color[i][2] * (16.0f / 65535.0f);
  1158. vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
  1159. color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f );
  1160. color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f );
  1161. color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f );
  1162. color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f );
  1163. }
  1164. toFloat = ( 1.0f / ( float )( 1 << 8 ) );
  1165. }
  1166. #endif
  1167. */
  1168. m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[0][0], toFloat * color[0][1], toFloat * color[0][2], toFloat * color[0][3] );
  1169. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1170. m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[1][0], toFloat * color[1][1], toFloat * color[1][2], toFloat * color[1][3] );
  1171. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1172. m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[2][0], toFloat * color[2][1], toFloat * color[2][2], toFloat * color[2][3] );
  1173. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1174. m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[3][0], toFloat * color[3][1], toFloat * color[3][2], toFloat * color[3][3] );
  1175. }
  1176. }
  1177. }
  1178. }
  1179. else
  1180. {
  1181. #ifndef X360_USE_SIMD_LIGHTMAP
  1182. for( int t = 0; t < pLightmapSize[1]; t++ )
  1183. {
  1184. int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );
  1185. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1186. for( int s = 0;
  1187. s < nLightmapSize0;
  1188. s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))
  1189. {
  1190. unsigned short color[4][4];
  1191. ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],
  1192. &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],
  1193. &pFloatImageBump3[srcTexelOffset],
  1194. color[0], color[1], color[2], color[3] );
  1195. unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 );
  1196. color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;
  1197. #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
  1198. if( IsX360() )
  1199. {
  1200. for( int i = 0; i != 4; ++i )
  1201. {
  1202. Vector4D vRGBScale;
  1203. vRGBScale.x = color[i][0] * (16.0f / 65535.0f);
  1204. vRGBScale.y = color[i][1] * (16.0f / 65535.0f);
  1205. vRGBScale.z = color[i][2] * (16.0f / 65535.0f);
  1206. vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
  1207. color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f );
  1208. color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f );
  1209. color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f );
  1210. color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f );
  1211. }
  1212. }
  1213. #endif
  1214. m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], color[0][3] );
  1215. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1216. m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], color[1][3] );
  1217. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1218. m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], color[2][3] );
  1219. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1220. m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], color[3][3] );
  1221. // Write data to the bitmapped represenations so that PFM files can be written
  1222. if ( pfmOut )
  1223. {
  1224. PixRGBAF pixelData;
  1225. pixelData.Red = color[0][0];
  1226. pixelData.Green = color[0][1];
  1227. pixelData.Blue = color[0][2];
  1228. pixelData.Alpha = alpha;
  1229. pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData);
  1230. }
  1231. }
  1232. }
  1233. #else
  1234. // this is an optimized XBOX implementation. For a clearer
  1235. // presentation of the algorithm, see the PC implementation
  1236. // above.
  1237. // First check for the most common case, using an efficient
  1238. // branch rather than a switch:
  1239. if (m_LightmapPixelWriter.GetFormat() == IMAGE_FORMAT_LINEAR_BGRA8888)
  1240. {
  1241. // broken out into a static to make things more readable
  1242. // and be nicer to the instruction cache
  1243. BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( pFloatImage, pFloatImageBump1, pFloatImageBump2,
  1244. pFloatImageBump3, pLightmapSize, pOffsetIntoLightmapPage, pfmOut, &m_LightmapPixelWriter );
  1245. }
  1246. else
  1247. { // This case should actually never be hit -- we do not use RGBA.
  1248. for( int t = 0; t < pLightmapSize[1]; t++ )
  1249. {
  1250. // assert that 1 * 4 = 4
  1251. COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4);
  1252. #define FOUR (sizeof( Vector4D ) / sizeof( float )) // in case this ever changes
  1253. int srcTexelOffset = ( FOUR ) * ( 0 + t * nLightmapSize0 );
  1254. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1255. for( int s = 0;
  1256. s < nLightmapSize0;
  1257. s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += ( FOUR ))
  1258. {
  1259. static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};
  1260. fltx4 vColor[4];
  1261. fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]);
  1262. fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]);
  1263. fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]);
  1264. fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]);
  1265. // perform an arcane averaging operation upon the bump map values
  1266. ColorSpace::LinearToBumpedLightmap( vFloatImage,
  1267. vFloatImageBump1, vFloatImageBump2,
  1268. vFloatImageBump3,
  1269. vColor[0], vColor[1], vColor[2], vColor[3] );
  1270. // convert each color to RGB scaled.
  1271. // DO NOT! make this into a for loop. The (April07 XDK) compiler
  1272. // in fact DOES NOT unroll them, and will perform very naive
  1273. // scheduling if you try.
  1274. // clamp to 0..16 float
  1275. vColor[0] = MinSIMD(vColor[0], vSixteen);
  1276. vColor[1] = MinSIMD(vColor[1], vSixteen);
  1277. vColor[2] = MinSIMD(vColor[2], vSixteen);
  1278. vColor[3] = MinSIMD(vColor[3], vSixteen);
  1279. // compute the scaling factor, transform the RGB,
  1280. // and place the scale in w. Obliterates whatever was
  1281. // already in alpha.
  1282. // This code is why it is important to not use a for
  1283. // loop: you need to let the compiler interleave the
  1284. // inlined instructions.
  1285. vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] );
  1286. vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] );
  1287. vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] );
  1288. vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] );
  1289. m_LightmapPixelWriter.WritePixelNoAdvance( vColor[0] );
  1290. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1291. m_LightmapPixelWriter.WritePixelNoAdvance( vColor[1] );
  1292. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1293. m_LightmapPixelWriter.WritePixelNoAdvance( vColor[2] );
  1294. m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );
  1295. m_LightmapPixelWriter.WritePixelNoAdvance( vColor[3] );
  1296. AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n");
  1297. // Write data to the bitmapped represenations so that PFM files can be written
  1298. if ( pfmOut )
  1299. {
  1300. Warning("**************************************************\n"
  1301. "Lightmap output to files on 360 HAS BEEN DISABLED.\n"
  1302. "A grave error has just occurred.\n"
  1303. "**************************************************\n");
  1304. DebuggerBreakIfDebugging();
  1305. /*
  1306. PixRGBAF pixelData;
  1307. pixelData.Red = color[0][0];
  1308. pixelData.Green = color[0][1];
  1309. pixelData.Blue = color[0][2];
  1310. pixelData.Alpha = alpha;
  1311. pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData);
  1312. */
  1313. }
  1314. }
  1315. }
  1316. }
  1317. #endif
  1318. }
  1319. }
  1320. void CMatLightmaps::LightmapBitsToPixelWriter_LDR( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )
  1321. {
  1322. // non-HDR lightmap processing
  1323. float *pSrc = pFloatImage;
  1324. for( int t = 0; t < pLightmapSize[1]; ++t )
  1325. {
  1326. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1327. for( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )
  1328. {
  1329. unsigned char color[4];
  1330. ColorSpace::LinearToLightmap( color, pSrc );
  1331. color[3] = RoundFloatToByte( pSrc[3] * 255.0f );
  1332. m_LightmapPixelWriter.WritePixel( color[0], color[1], color[2], color[3] );
  1333. if ( pfmOut )
  1334. {
  1335. // Write data to the bitmapped represenations so that PFM files can be written
  1336. PixRGBAF pixelData;
  1337. pixelData.Red = color[0];
  1338. pixelData.Green = color[1];
  1339. pixelData.Blue = color[2];
  1340. pixelData.Alpha = color[3];
  1341. pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData );
  1342. }
  1343. }
  1344. }
  1345. }
  1346. void CMatLightmaps::LightmapBitsToPixelWriter_HDRF( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )
  1347. {
  1348. if ( IsX360() )
  1349. {
  1350. // 360 does not support HDR float
  1351. Assert( 0 );
  1352. return;
  1353. }
  1354. // float HDR lightmap processing
  1355. float *pSrc = pFloatImage;
  1356. for ( int t = 0; t < pLightmapSize[1]; ++t )
  1357. {
  1358. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1359. for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )
  1360. {
  1361. m_LightmapPixelWriter.WritePixelF( pSrc[0], pSrc[1], pSrc[2], pSrc[3] );
  1362. }
  1363. }
  1364. }
  1365. // numbers come in on the domain [0..16]
  1366. void CMatLightmaps::LightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t * RESTRICT pfmOut )
  1367. {
  1368. #ifndef X360_USE_SIMD_LIGHTMAP
  1369. // PC code (and old, pre-SIMD xbox version -- unshippably slow)
  1370. if ( m_LightmapPixelWriter.IsUsingFloatFormat() )
  1371. {
  1372. // integer HDR lightmap processing
  1373. float *pSrc = pFloatImage;
  1374. for ( int t = 0; t < pLightmapSize[1]; ++t )
  1375. {
  1376. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1377. for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )
  1378. {
  1379. int r, g, b, a;
  1380. r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] );
  1381. g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] );
  1382. b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );
  1383. a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );
  1384. float toFloat = ( 1.0f / ( float )( 1 << 16 ) );
  1385. #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
  1386. if( IsX360() )
  1387. {
  1388. Vector4D vRGBScale;
  1389. vRGBScale.x = r * (16.0f / 65535.0f);
  1390. vRGBScale.y = g * (16.0f / 65535.0f);
  1391. vRGBScale.z = b * (16.0f / 65535.0f);
  1392. vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
  1393. r = RoundFloatToByte( vRGBScale.x * 255.0f );
  1394. g = RoundFloatToByte( vRGBScale.y * 255.0f );
  1395. b = RoundFloatToByte( vRGBScale.z * 255.0f );
  1396. a = RoundFloatToByte( vRGBScale.w * 255.0f );
  1397. toFloat = ( 1.0f / ( float )( 1 << 8 ) );
  1398. }
  1399. #endif
  1400. Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f );
  1401. m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] );
  1402. }
  1403. }
  1404. }
  1405. else
  1406. {
  1407. // integer HDR lightmap processing
  1408. float *pSrc = pFloatImage;
  1409. for ( int t = 0; t < pLightmapSize[1]; ++t )
  1410. {
  1411. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1412. for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )
  1413. {
  1414. int r, g, b, a;
  1415. r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] );
  1416. g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] );
  1417. b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );
  1418. a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );
  1419. #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
  1420. if( IsX360() )
  1421. {
  1422. Vector4D vRGBScale;
  1423. vRGBScale.x = r * (16.0f / 65535.0f);
  1424. vRGBScale.y = g * (16.0f / 65535.0f);
  1425. vRGBScale.z = b * (16.0f / 65535.0f);
  1426. vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
  1427. r = RoundFloatToByte( vRGBScale.x * 255.0f );
  1428. g = RoundFloatToByte( vRGBScale.y * 255.0f );
  1429. b = RoundFloatToByte( vRGBScale.z * 255.0f );
  1430. a = RoundFloatToByte( vRGBScale.w * 255.0f );
  1431. }
  1432. #endif
  1433. m_LightmapPixelWriter.WritePixel( r, g, b, a );
  1434. if ( pfmOut )
  1435. {
  1436. // Write data to the bitmapped represenations so that PFM files can be written
  1437. PixRGBAF pixelData;
  1438. pixelData.Red = pSrc[0];
  1439. pixelData.Green = pSrc[1];
  1440. pixelData.Blue = pSrc[2];
  1441. pixelData.Alpha = pSrc[3];
  1442. pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData );
  1443. }
  1444. }
  1445. }
  1446. }
  1447. #else
  1448. // XBOX360 code
  1449. if ( m_LightmapPixelWriter.IsUsingFloatFormat() )
  1450. {
  1451. if( IsX360() )
  1452. {
  1453. AssertMsg( false, "Float-format pixel writers do not exist on x360." );
  1454. }
  1455. else
  1456. { // This code is here as an example only, in case floating point
  1457. // format is restored to 360.
  1458. // integer HDR lightmap processing
  1459. float * RESTRICT pSrc = pFloatImage;
  1460. for ( int t = 0; t < pLightmapSize[1]; ++t )
  1461. {
  1462. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1463. for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )
  1464. {
  1465. int r, g, b, a;
  1466. r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] );
  1467. g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] );
  1468. b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );
  1469. a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );
  1470. float toFloat = ( 1.0f / ( float )( 1 << 16 ) );
  1471. #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
  1472. if( IsX360() )
  1473. {
  1474. Vector4D vRGBScale;
  1475. vRGBScale.x = r * (16.0f / 65535.0f);
  1476. vRGBScale.y = g * (16.0f / 65535.0f);
  1477. vRGBScale.z = b * (16.0f / 65535.0f);
  1478. vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
  1479. r = RoundFloatToByte( vRGBScale.x * 255.0f );
  1480. g = RoundFloatToByte( vRGBScale.y * 255.0f );
  1481. b = RoundFloatToByte( vRGBScale.z * 255.0f );
  1482. a = RoundFloatToByte( vRGBScale.w * 255.0f );
  1483. toFloat = ( 1.0f / ( float )( 1 << 8 ) );
  1484. }
  1485. #endif
  1486. Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f );
  1487. m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] );
  1488. }
  1489. }
  1490. }
  1491. }
  1492. else
  1493. {
  1494. // This is the fast X360 pathway.
  1495. // integer HDR lightmap processing
  1496. float * RESTRICT pSrc = pFloatImage;
  1497. // Assert((reinterpret_cast<unsigned int>(pSrc) & 15) == 0); // 16-byte aligned?
  1498. COMPILE_TIME_ASSERT(sizeof(Vector4D)/sizeof(*pSrc) == 4); // assert that 1 * 4 = 4
  1499. #ifndef USE_32BIT_LIGHTMAPS_ON_360
  1500. #pragma error("This function only supports 32 bit lightmaps.")
  1501. #endif
  1502. // input numbers from pSrc are on the domain [0..+inf]
  1503. // we clamp them to the range [0..16]
  1504. // output is RGBA
  1505. // the shader does this: rOut = Rin * Ain * 16.0f
  1506. // where Rin is [0..1], a float computed from a byte value [0..255]
  1507. // Ain is therefore the brightest channel (say R) divided by 16 and quantized
  1508. // Rin is computed from pSrc->r by dividing by Ain
  1509. // rather than switching inside WritePixel for each different format,
  1510. // thus causing a 23-cycle pipeline clear for every pixel, we'll
  1511. // branch on the format here. That will allow us to unroll the inline
  1512. // pixel write functions differently depending on their different
  1513. // latencies.
  1514. Assert(!pfmOut); // should never happen on 360.
  1515. #ifndef ALLOW_PFM_OUTPUT_ON_360
  1516. if ( pfmOut )
  1517. {
  1518. Warning("*****************************************\n"
  1519. "Lightmap output on 360 HAS BEEN DISABLED.\n"
  1520. "A grave error has just occurred.\n"
  1521. "*****************************************\n");
  1522. }
  1523. #endif
  1524. // switch once, here, outside the loop, rather than
  1525. // switching inside each pixel. Switches are not fast
  1526. // on x360: they are usually implemented as jumps
  1527. // through function tables, which have a 24-cycle
  1528. // stall.
  1529. switch (m_LightmapPixelWriter.GetFormat())
  1530. {
  1531. // note: format names are low-order-byte first.
  1532. case IMAGE_FORMAT_RGBA8888:
  1533. case IMAGE_FORMAT_LINEAR_RGBA8888:
  1534. {
  1535. for ( int t = 0; t < pLightmapSize[1]; ++t )
  1536. {
  1537. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1538. for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 )
  1539. {
  1540. static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};
  1541. fltx4 rgba = LoadUnalignedSIMD(pSrc);
  1542. // clamp to 0..16 float
  1543. rgba = MinSIMD(rgba, vSixteen);
  1544. // compute the scaling factor, place it in w, and
  1545. // scale the rest by it.
  1546. rgba = ConvertLightmapColorToRGBScale( rgba );
  1547. // rgba is now float 0..255 in each component
  1548. m_LightmapPixelWriter.WritePixelNoAdvance_RGBA8888(rgba);
  1549. /* // not supported on X360
  1550. if ( pfmOut )
  1551. {
  1552. // Write data to the bitmapped represenations so that PFM files can be written
  1553. PixRGBAF pixelData;
  1554. XMStoreVector4(&pixelData,rgba);
  1555. pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData );
  1556. }
  1557. */
  1558. }
  1559. }
  1560. break;
  1561. }
  1562. case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
  1563. case IMAGE_FORMAT_LINEAR_BGRA8888:
  1564. {
  1565. for ( int t = 0; t < pLightmapSize[1]; ++t )
  1566. {
  1567. m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
  1568. for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 )
  1569. {
  1570. static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};
  1571. fltx4 rgba = LoadUnalignedSIMD(pSrc);
  1572. // clamp to 0..16 float
  1573. rgba = MinSIMD(rgba, vSixteen);
  1574. // compute the scaling factor, place it in w, and
  1575. // scale the rest by it.
  1576. rgba = ConvertLightmapColorToRGBScale( rgba );
  1577. // rgba is now float 0..255 in each component
  1578. m_LightmapPixelWriter.WritePixelNoAdvance_BGRA8888(rgba);
  1579. // forcibly advance
  1580. m_LightmapPixelWriter.SkipBytes(4);
  1581. /* // not supported on X360
  1582. if ( pfmOut )
  1583. {
  1584. // Write data to the bitmapped represenations so that PFM files can be written
  1585. PixRGBAF pixelData;
  1586. XMStoreVector4(&pixelData,rgba);
  1587. pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData );
  1588. }
  1589. */
  1590. }
  1591. }
  1592. break;
  1593. }
  1594. default:
  1595. AssertMsg1(false,"Unsupported pixel format %d while writing lightmaps!", m_LightmapPixelWriter.GetFormat() );
  1596. Warning("Unsupported pixel format used in lightmap. Lightmaps could not be downloaded.\n");
  1597. break;
  1598. }
  1599. }
  1600. #endif
  1601. }
  1602. void CMatLightmaps::BeginUpdateLightmaps( void )
  1603. {
  1604. CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal();
  1605. if ( pCallQueue )
  1606. {
  1607. pCallQueue->QueueCall( this, &CMatLightmaps::BeginUpdateLightmaps );
  1608. return;
  1609. }
  1610. m_nUpdatingLightmapsStackDepth++;
  1611. }
  1612. void CMatLightmaps::EndUpdateLightmaps( void )
  1613. {
  1614. CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal();
  1615. if ( pCallQueue )
  1616. {
  1617. pCallQueue->QueueCall( this, &CMatLightmaps::EndUpdateLightmaps );
  1618. return;
  1619. }
  1620. m_nUpdatingLightmapsStackDepth--;
  1621. Assert( m_nUpdatingLightmapsStackDepth >= 0 );
  1622. if( m_nUpdatingLightmapsStackDepth <= 0 && m_nLockedLightmap != -1 )
  1623. {
  1624. g_pShaderAPI->TexUnlock();
  1625. m_nLockedLightmap = -1;
  1626. }
  1627. }
  1628. int CMatLightmaps::AllocateDynamicLightmap( int lightmapSize[2], int *pOutOffsetIntoPage, int frameID )
  1629. {
  1630. // check frameID, fail if current
  1631. for ( int i = 0; i < COUNT_DYNAMIC_LIGHTMAP_PAGES; i++ )
  1632. {
  1633. int dynamicIndex = (m_dynamic.currentDynamicIndex + i) % COUNT_DYNAMIC_LIGHTMAP_PAGES;
  1634. int lightmapPageIndex = m_firstDynamicLightmap + dynamicIndex;
  1635. if ( m_dynamic.lightmapLockFrame[dynamicIndex] != frameID )
  1636. {
  1637. m_dynamic.lightmapLockFrame[dynamicIndex] = frameID;
  1638. m_dynamic.imagePackers[dynamicIndex].Reset( 0, m_pLightmapPages[lightmapPageIndex].m_Width, m_pLightmapPages[lightmapPageIndex].m_Height );
  1639. }
  1640. if ( m_dynamic.imagePackers[dynamicIndex].AddBlock( lightmapSize[0], lightmapSize[1], &pOutOffsetIntoPage[0], &pOutOffsetIntoPage[1] ) )
  1641. {
  1642. return lightmapPageIndex;
  1643. }
  1644. }
  1645. return -1;
  1646. }
  1647. //-----------------------------------------------------------------------------
  1648. // Updates the lightmap
  1649. //-----------------------------------------------------------------------------
  1650. void CMatLightmaps::UpdateLightmap( int lightmapPageID, int lightmapSize[2],
  1651. int offsetIntoLightmapPage[2],
  1652. float *pFloatImage, float *pFloatImageBump1,
  1653. float *pFloatImageBump2, float *pFloatImageBump3 )
  1654. {
  1655. VPROF( "CMatRenderContext::UpdateLightmap" );
  1656. bool hasBump = false;
  1657. int uSize = 1;
  1658. FloatBitMap_t *pfmOut = NULL;
  1659. if ( pFloatImageBump1 && pFloatImageBump2 && pFloatImageBump3 )
  1660. {
  1661. hasBump = true;
  1662. uSize = 4;
  1663. }
  1664. if ( lightmapPageID >= GetNumLightmapPages() || lightmapPageID < 0 )
  1665. {
  1666. Error( "MaterialSystem_Interface_t::UpdateLightmap lightmapPageID=%d out of range\n", lightmapPageID );
  1667. return;
  1668. }
  1669. bool bDynamic = IsDynamicLightmap(lightmapPageID);
  1670. if ( bDynamic )
  1671. {
  1672. int dynamicIndex = lightmapPageID-m_firstDynamicLightmap;
  1673. Assert(dynamicIndex < COUNT_DYNAMIC_LIGHTMAP_PAGES);
  1674. m_dynamic.currentDynamicIndex = (dynamicIndex + 1) % COUNT_DYNAMIC_LIGHTMAP_PAGES;
  1675. }
  1676. if ( mat_lightmap_pfms.GetBool())
  1677. {
  1678. // Allocate and initialize lightmap data that will be written to a PFM file
  1679. if (NULL == m_pLightmapDataPtrArray[lightmapPageID])
  1680. {
  1681. m_pLightmapDataPtrArray[lightmapPageID] = new FloatBitMap_t(m_pLightmapPages[lightmapPageID].m_Width, m_pLightmapPages[lightmapPageID].m_Height);
  1682. m_pLightmapDataPtrArray[lightmapPageID]->Clear(0, 0, 0, 1);
  1683. }
  1684. pfmOut = m_pLightmapDataPtrArray[lightmapPageID];
  1685. }
  1686. // NOTE: Change how the lock is taking place if you ever change how bumped
  1687. // lightmaps are put into the page. Right now, we assume that they're all
  1688. // added to the right of the original lightmap.
  1689. bool bLockSubRect;
  1690. {
  1691. VPROF_( "Locking lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); // vprof scope
  1692. bLockSubRect = m_nUpdatingLightmapsStackDepth <= 0 && !bDynamic;
  1693. if( bLockSubRect )
  1694. {
  1695. VPROF_INCREMENT_COUNTER( "lightmap subrect texlock", 1 );
  1696. g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmapPageID] );
  1697. if (!g_pShaderAPI->TexLock( 0, 0, offsetIntoLightmapPage[0], offsetIntoLightmapPage[1],
  1698. lightmapSize[0] * uSize, lightmapSize[1], m_LightmapPixelWriter ))
  1699. {
  1700. return;
  1701. }
  1702. }
  1703. else if( lightmapPageID != m_nLockedLightmap )
  1704. {
  1705. if ( !LockLightmap( lightmapPageID ) )
  1706. {
  1707. ExecuteNTimes( 10, Warning( "Failed to lock lightmap\n" ) );
  1708. return;
  1709. }
  1710. }
  1711. }
  1712. int subRectOffset[2] = {0,0};
  1713. {
  1714. // account for the part spent in math:
  1715. VPROF_( "LightmapBitsToPixelWriter", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 );
  1716. if ( hasBump )
  1717. {
  1718. switch( HardwareConfig()->GetHDRType() )
  1719. {
  1720. case HDR_TYPE_NONE:
  1721. BumpedLightmapBitsToPixelWriter_LDR( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,
  1722. lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );
  1723. break;
  1724. case HDR_TYPE_INTEGER:
  1725. BumpedLightmapBitsToPixelWriter_HDRI( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,
  1726. lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );
  1727. break;
  1728. case HDR_TYPE_FLOAT:
  1729. BumpedLightmapBitsToPixelWriter_HDRF( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,
  1730. lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );
  1731. break;
  1732. }
  1733. }
  1734. else
  1735. {
  1736. switch ( HardwareConfig()->GetHDRType() )
  1737. {
  1738. case HDR_TYPE_NONE:
  1739. LightmapBitsToPixelWriter_LDR( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );
  1740. break;
  1741. case HDR_TYPE_INTEGER:
  1742. LightmapBitsToPixelWriter_HDRI( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );
  1743. break;
  1744. case HDR_TYPE_FLOAT:
  1745. LightmapBitsToPixelWriter_HDRF( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );
  1746. break;
  1747. default:
  1748. Assert( 0 );
  1749. break;
  1750. }
  1751. }
  1752. }
  1753. if( bLockSubRect )
  1754. {
  1755. VPROF_( "Unlocking Lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 );
  1756. g_pShaderAPI->TexUnlock();
  1757. }
  1758. }
  1759. //-----------------------------------------------------------------------------
  1760. //
  1761. //-----------------------------------------------------------------------------
  1762. int CMatLightmaps::GetNumSortIDs( void )
  1763. {
  1764. return m_numSortIDs;
  1765. }
  1766. //-----------------------------------------------------------------------------
  1767. //
  1768. //-----------------------------------------------------------------------------
  1769. void CMatLightmaps::ComputeSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha )
  1770. {
  1771. int lightmapPageID;
  1772. for ( MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )
  1773. {
  1774. IMaterialInternal* pMaterial = GetMaterialInternal(i);
  1775. if ( pMaterial->GetMinLightmapPageID() > pMaterial->GetMaxLightmapPageID() )
  1776. {
  1777. continue;
  1778. }
  1779. // const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY );
  1780. // if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ||
  1781. // ( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) )
  1782. // {
  1783. // return true;
  1784. // }
  1785. // Warning( "sort stuff: %s %s\n", material->GetName(), bAlpha ? "alpha" : "not alpha" );
  1786. // fill in the lightmapped materials
  1787. for ( lightmapPageID = pMaterial->GetMinLightmapPageID();
  1788. lightmapPageID <= pMaterial->GetMaxLightmapPageID(); ++lightmapPageID )
  1789. {
  1790. pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion();
  1791. pInfo[sortId].lightmapPageID = lightmapPageID;
  1792. #if 0
  1793. char buf[128];
  1794. Q_snprintf( buf, sizeof( buf ), "ComputeSortInfo: %s lightmapPageID: %d sortID: %d\n", pMaterial->GetName(), lightmapPageID, sortId );
  1795. OutputDebugString( buf );
  1796. #endif
  1797. ++sortId;
  1798. }
  1799. }
  1800. }
  1801. //-----------------------------------------------------------------------------
  1802. //
  1803. //-----------------------------------------------------------------------------
  1804. void CMatLightmaps::ComputeWhiteLightmappedSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha )
  1805. {
  1806. for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )
  1807. {
  1808. IMaterialInternal* pMaterial = GetMaterialInternal(i);
  1809. // fill in the lightmapped materials that are actually used by this level
  1810. if( pMaterial->GetNeedsWhiteLightmap() &&
  1811. ( pMaterial->GetReferenceCount() > 0 ) )
  1812. {
  1813. // const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY );
  1814. // if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ||
  1815. // ( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) )
  1816. // {
  1817. // return true;
  1818. // }
  1819. pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion();
  1820. if( pMaterial->GetPropertyFlag( MATERIAL_PROPERTY_NEEDS_BUMPED_LIGHTMAPS ) )
  1821. {
  1822. pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP;
  1823. }
  1824. else
  1825. {
  1826. pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE;
  1827. }
  1828. sortId++;
  1829. }
  1830. }
  1831. }
  1832. //-----------------------------------------------------------------------------
  1833. //
  1834. //-----------------------------------------------------------------------------
  1835. void CMatLightmaps::GetSortInfo( MaterialSystem_SortInfo_t *pSortInfoArray )
  1836. {
  1837. // sort non-alpha blended materials first
  1838. int sortId = 0;
  1839. ComputeSortInfo( pSortInfoArray, sortId, false );
  1840. ComputeWhiteLightmappedSortInfo( pSortInfoArray, sortId, false );
  1841. Assert( m_numSortIDs == sortId );
  1842. }
  1843. //-----------------------------------------------------------------------------
  1844. //
  1845. //-----------------------------------------------------------------------------
  1846. void CMatLightmaps::EnableLightmapFiltering( bool enabled )
  1847. {
  1848. int i;
  1849. for( i = 0; i < GetNumLightmapPages(); i++ )
  1850. {
  1851. g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[i] );
  1852. if( enabled )
  1853. {
  1854. g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR );
  1855. g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR );
  1856. }
  1857. else
  1858. {
  1859. g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_NEAREST );
  1860. g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_NEAREST );
  1861. }
  1862. }
  1863. }