|
|
//========== Copyright (c) Valve Corporation, All rights reserved. ========
//
// Purpose:
//
//=============================================================================
#include "pch_materialsystem.h"
#ifndef _PS3
#define MATSYS_INTERNAL
#endif
#include "cmatlightmaps.h"
#include "colorspace.h"
#include "IHardwareConfigInternal.h"
#include "cmaterialsystem.h"
// NOTE: This must be the last file included!!!
#include "tier0/memdbgon.h"
#include "bitmap/floatbitmap.h"
static ConVar mat_lightmap_pfms( "mat_lightmap_pfms", "0", FCVAR_MATERIAL_SYSTEM_THREAD, "Outputs .pfm files containing lightmap data for each lightmap page when a level exits." ); // Write PFM files for each lightmap page in the game directory when exiting a level
// Turning off 32 bit lightmaps for Portal 2, to save shader perf --Thorsten
//#define USE_32BIT_LIGHTMAPS_ON_360 //uncomment to use 32bit lightmaps, be sure to keep this in sync with the same #define in stdshaders/lightmappedgeneric_ps2_3_x.h
#ifdef _X360
// 7LS - fixup support for lightmap alpha channel data for csm's, definitely do this when/if turning dynamic lightmaps back on
// #define X360_USE_SIMD_LIGHTMAP
#endif
//-----------------------------------------------------------------------------
inline IMaterialInternal* CMatLightmaps::GetCurrentMaterialInternal() const { return GetMaterialSystem()->GetRenderContextInternal()->GetCurrentMaterialInternal(); }
inline void CMatLightmaps::SetCurrentMaterialInternal(IMaterialInternal* pCurrentMaterial) { return GetMaterialSystem()->GetRenderContextInternal()->SetCurrentMaterialInternal( pCurrentMaterial ); }
inline IMaterialInternal *CMatLightmaps::GetMaterialInternal( MaterialHandle_t idx ) const { return GetMaterialSystem()->GetMaterialInternal( idx ); }
inline const IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal() const { return GetMaterialSystem()->GetRenderContextInternal(); }
inline IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal() { return GetMaterialSystem()->GetRenderContextInternal(); }
inline const CMaterialDict *CMatLightmaps::GetMaterialDict() const { return GetMaterialSystem()->GetMaterialDict(); }
inline CMaterialDict *CMatLightmaps::GetMaterialDict() { return GetMaterialSystem()->GetMaterialDict(); }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
CMatLightmaps::CMatLightmaps() { m_currentWhiteLightmapMaterial = NULL; m_pLightmapPages = NULL; m_NumLightmapPages = 0; m_numSortIDs = 0; m_nUpdatingLightmapsStackDepth = 0; m_nLockedLightmap = -1; m_pLightmapDataPtrArray = NULL; m_eLightmapsState = STATE_DEFAULT; }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void CMatLightmaps::Shutdown( ) { // Clean up all lightmaps
CleanupLightmaps(); }
//-----------------------------------------------------------------------------
// Assign enumeration IDs to all materials
//-----------------------------------------------------------------------------
void CMatLightmaps::EnumerateMaterials( void ) { // iterate in sorted order
int id = 0; for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { GetMaterialInternal(i)->SetEnumerationID( id ); ++id; } }
//-----------------------------------------------------------------------------
// Gets the maximum lightmap page size...
//-----------------------------------------------------------------------------
int CMatLightmaps::GetMaxLightmapPageWidth() const { // FIXME: It's unclear which we want here.
// It doesn't drastically increase primitives per DrawIndexedPrimitive
// call at the moment to increase it, so let's not for now.
// If we're using dynamic textures though, we want bigger that's for sure.
// The tradeoff here is how much memory we waste if we don't fill the lightmap
// We need to go to 512x256 textures because that's the only way bumped
// lighting on displacements can work given the 128x128 allowance..
int nWidth = 512; if ( nWidth > HardwareConfig()->MaxTextureWidth() ) nWidth = HardwareConfig()->MaxTextureWidth();
return nWidth; }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
int CMatLightmaps::GetMaxLightmapPageHeight() const { int nHeight = 256;
if ( nHeight > HardwareConfig()->MaxTextureHeight() ) nHeight = HardwareConfig()->MaxTextureHeight();
return nHeight; }
//-----------------------------------------------------------------------------
// Returns the lightmap page size
//-----------------------------------------------------------------------------
void CMatLightmaps::GetLightmapPageSize( int lightmapPageID, int *pWidth, int *pHeight ) const { switch( lightmapPageID ) { default: Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() ); *pWidth = m_pLightmapPages[lightmapPageID].m_Width; *pHeight = m_pLightmapPages[lightmapPageID].m_Height; break;
case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED: *pWidth = *pHeight = 1; AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" ); break; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE: case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP: *pWidth = *pHeight = 1; break; } }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
int CMatLightmaps::GetLightmapWidth( int lightmapPageID ) const { switch( lightmapPageID ) { default: Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() ); return m_pLightmapPages[lightmapPageID].m_Width;
case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED: AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" ); return 1; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE: case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP: return 1; } }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
int CMatLightmaps::GetLightmapHeight( int lightmapPageID ) const { switch( lightmapPageID ) { default: Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() ); return m_pLightmapPages[lightmapPageID].m_Height;
case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED: AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" ); return 1; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE: case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP: return 1; } }
//-----------------------------------------------------------------------------
// Clean up lightmap pages.
//-----------------------------------------------------------------------------
void CMatLightmaps::CleanupLightmaps() { GetMaterialSystem()->GetPaintmaps()->CleanupPaintmaps(); if ( mat_lightmap_pfms.GetBool()) { // Write PFM files containing lightmap data for this page
for (int lightmap = 0; lightmap < GetNumLightmapPages(); lightmap++) { if ((NULL != m_pLightmapDataPtrArray) && (NULL != m_pLightmapDataPtrArray[lightmap])) { char szPFMFileName[MAX_PATH];
sprintf(szPFMFileName, "Lightmap-Page-%d.pfm", lightmap); m_pLightmapDataPtrArray[lightmap]->WritePFM(szPFMFileName); } } }
// Remove the lightmap data bitmap representations
if (m_pLightmapDataPtrArray) { int i; for( i = 0; i < GetNumLightmapPages(); i++ ) { delete m_pLightmapDataPtrArray[i]; }
delete [] m_pLightmapDataPtrArray; m_pLightmapDataPtrArray = NULL; }
// delete old lightmap pages
if( m_pLightmapPages ) { int i; for( i = 0; i < GetNumLightmapPages(); i++ ) { g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] ); } delete [] m_pLightmapPages; m_pLightmapPages = 0; }
m_NumLightmapPages = 0; }
//-----------------------------------------------------------------------------
// Resets the lightmap page info for each material
//-----------------------------------------------------------------------------
void CMatLightmaps::ResetMaterialLightmapPageInfo( void ) { for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { IMaterialInternal *pMaterial = GetMaterialInternal(i); pMaterial->SetMinLightmapPageID( 9999 ); pMaterial->SetMaxLightmapPageID( -9999 ); pMaterial->SetNeedsWhiteLightmap( false ); } }
//-----------------------------------------------------------------------------
// This is called before any lightmap allocations take place
//-----------------------------------------------------------------------------
void CMatLightmaps::BeginLightmapAllocation() { // we clean up lightmaps on console right before we load the next map
if ( IsPC() ) { CleanupLightmaps(); }
m_ImagePackers.RemoveAll(); int i = m_ImagePackers.AddToTail(); m_ImagePackers[i].Reset( 0, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );
SetCurrentMaterialInternal(0); m_currentWhiteLightmapMaterial = 0; m_numSortIDs = 0;
// need to set the min and max sorting id number for each material to
// a default value that basically means that it hasn't been used yet.
ResetMaterialLightmapPageInfo();
EnumerateMaterials(); }
//-----------------------------------------------------------------------------
// Allocates space in the lightmaps; must be called after BeginLightmapAllocation
//-----------------------------------------------------------------------------
int CMatLightmaps::AllocateLightmap( int width, int height, int offsetIntoLightmapPage[2], IMaterial *iMaterial ) { IMaterialInternal *pMaterial = static_cast<IMaterialInternal *>( iMaterial ); if ( !pMaterial ) { Warning( "Programming error: CMatRenderContext::AllocateLightmap: NULL material\n" ); return m_numSortIDs; } pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally
// material change
int i; int nPackCount = m_ImagePackers.Count(); if ( GetCurrentMaterialInternal() != pMaterial ) { // If this happens, then we need to close out all image packers other than
// the last one so as to produce as few sort IDs as possible
for ( i = nPackCount - 1; --i >= 0; ) { // NOTE: We *must* use the order preserving one here so the remaining one
// is the last lightmap
m_ImagePackers.Remove( i ); --nPackCount; }
// If it's not the first material, increment the sort id
if (GetCurrentMaterialInternal()) { m_ImagePackers[0].IncrementSortId( ); ++m_numSortIDs; }
SetCurrentMaterialInternal(pMaterial);
// This assertion guarantees we don't see the same material twice in this loop.
Assert( pMaterial->GetMinLightmapPageID( ) > pMaterial->GetMaxLightmapPageID() );
// NOTE: We may not use this lightmap page, but we might
// we won't know for sure until the next material is passed in.
// So, for now, we're going to forcibly add the current lightmap
// page to this material so the sort IDs work out correctly.
GetCurrentMaterialInternal()->SetMinLightmapPageID( GetNumLightmapPages() ); GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() ); }
// Try to add it to any of the current images...
bool bAdded = false; for ( i = 0; i < nPackCount; ++i ) { bAdded = m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] ); if ( bAdded ) break; }
if ( !bAdded ) { ++m_numSortIDs; i = m_ImagePackers.AddToTail(); m_ImagePackers[i].Reset( m_numSortIDs, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() ); ++m_NumLightmapPages; if ( !m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] ) ) { Error( "MaterialSystem_Interface_t::AllocateLightmap: lightmap (%dx%d) too big to fit in page (%dx%d)\n", width, height, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() ); }
// Add this lightmap to the material...
GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() ); }
return m_ImagePackers[i].GetSortId(); }
// UNDONE: This needs testing, but it appears as though creating these textures managed
// results in huge stalls whenever they are locked for modify.
// That makes sense given the d3d docs, but these have been flagged as managed for quite some time.
#define DYNAMIC_TEXTURES_NO_BACKING 1
void CMatLightmaps::EndLightmapAllocation() { // count the last page that we were on.if it wasn't
// and count the last sortID that we were on
m_NumLightmapPages++; m_numSortIDs++;
m_firstDynamicLightmap = m_NumLightmapPages; // UNDONE: Until we start using the separate dynamic lighting textures don't allocate them
// NOTE: Enable this if we want to stop locking the base lightmaps and instead only lock update
// these completely dynamic pages
// m_NumLightmapPages += COUNT_DYNAMIC_LIGHTMAP_PAGES;
m_dynamic.Init();
// Compute the dimensions of the last lightmap
int lastLightmapPageWidth, lastLightmapPageHeight; int nLastIdx = m_ImagePackers.Count(); m_ImagePackers[nLastIdx - 1].GetMinimumDimensions( &lastLightmapPageWidth, &lastLightmapPageHeight ); m_ImagePackers.Purge();
m_pLightmapPages = new LightmapPageInfo_t[GetNumLightmapPages()]; Assert( m_pLightmapPages );
if ( mat_lightmap_pfms.GetBool()) { // This array will be used to write PFM files full of lightmap data
m_pLightmapDataPtrArray = new FloatBitMap_t*[GetNumLightmapPages()]; }
if( GetMaterialSystem()->GetPaintmaps()->IsEnabled() ) { GetMaterialSystem()->GetPaintmaps()->BeginPaintTextureAllocation( GetNumLightmapPages() ); }
int i; m_LightmapPageTextureHandles.EnsureCapacity( GetNumLightmapPages() ); for ( i = 0; i < GetNumLightmapPages(); i++ ) { // Compute lightmap dimensions
bool lastStaticLightmap = ( i == (m_firstDynamicLightmap-1)); m_pLightmapPages[i].m_Width = (unsigned short)(lastStaticLightmap ? lastLightmapPageWidth : GetMaxLightmapPageWidth()); m_pLightmapPages[i].m_Height = (unsigned short)(lastStaticLightmap ? lastLightmapPageHeight : GetMaxLightmapPageHeight()); m_pLightmapPages[i].m_Flags = 0;
AllocateLightmapTexture( i ); if ( GetMaterialSystem()->GetPaintmaps()->IsEnabled() ) { GetMaterialSystem()->GetPaintmaps()->AllocatePaintmap( i, GetLightmapWidth(i), GetLightmapHeight(i) ); }
if ( mat_lightmap_pfms.GetBool()) { // Initialize the pointers to lightmap data
m_pLightmapDataPtrArray[i] = NULL; } }
if( GetMaterialSystem()->GetPaintmaps()->IsEnabled() ) { GetMaterialSystem()->GetPaintmaps()->EndPaintTextureAllocation(); } }
ConVar mat_dynamiclightmaps( "mat_dynamiclightmaps", "0", FCVAR_CHEAT );
//-----------------------------------------------------------------------------
// Allocate lightmap textures
//-----------------------------------------------------------------------------
void CMatLightmaps::AllocateLightmapTexture( int lightmap ) { bool bUseDynamicTextures = HardwareConfig()->PreferDynamicTextures() && mat_dynamiclightmaps.GetBool();
int flags = 0; if ( bUseDynamicTextures || IsPS3() ) // On PS3, we need the dynamic flag as a hint that we're going to update this texture incrementally in the future
{ flags |= TEXTURE_CREATE_DYNAMIC; } else { flags |= TEXTURE_CREATE_MANAGED; }
int nPreviousTextureHandles = m_LightmapPageTextureHandles.Count(); m_LightmapPageTextureHandles.EnsureCount( lightmap + 1 ); for ( int nLightmap = nPreviousTextureHandles; nLightmap <= lightmap; ++nLightmap ) { m_LightmapPageTextureHandles[ nLightmap ] = INVALID_SHADERAPI_TEXTURE_HANDLE; }
char debugName[256]; Q_snprintf( debugName, sizeof( debugName ), "[lightmap %d]", lightmap ); ImageFormat imageFormat; switch ( HardwareConfig()->GetHDRType() ) { default: Assert( 0 ); // fall through.
case HDR_TYPE_NONE: #if !defined( _X360 )
imageFormat = IMAGE_FORMAT_RGBA8888; flags |= TEXTURE_CREATE_SRGB; #else
imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888; #endif
break;
case HDR_TYPE_INTEGER: #if !defined( _X360 )
imageFormat = IMAGE_FORMAT_RGBA16161616; #else
# if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888; # else
imageFormat = IMAGE_FORMAT_LINEAR_RGBA16161616; # endif
#endif
break;
case HDR_TYPE_FLOAT: imageFormat = IMAGE_FORMAT_RGBA16161616F; break; }
#ifdef _PS3
// PS3 needs 16F textures...but the HDR_TYPE_FLOAT codepath has a lot of other baggage with it. Just lie here.
imageFormat = IMAGE_FORMAT_RGBA16161616F;
#endif // _PS3
switch ( m_eLightmapsState ) { case STATE_DEFAULT: // Allow allocations in default state
{ int iWidth = GetLightmapWidth(lightmap); int iHeight = GetLightmapHeight(lightmap);
m_LightmapPageTextureHandles[lightmap] = g_pShaderAPI->CreateTexture( iWidth, iHeight, 1, imageFormat, 1, 1, flags, debugName, TEXTURE_GROUP_LIGHTMAP ); // don't mipmap lightmaps
// Load up the texture data
g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] ); g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR ); g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR );
if ( !bUseDynamicTextures ) { g_pShaderAPI->TexSetPriority( 1 ); }
// Blat out the lightmap bits
InitLightmapBits( lightmap ); } break;
case STATE_RELEASED: // Not assigned m_LightmapPageTextureHandles[lightmap];
DevMsg( "AllocateLightmapTexture(%d) in released lightmap state (STATE_RELEASED), delayed till \"Restore\".\n", lightmap ); return;
default: // Not assigned m_LightmapPageTextureHandles[lightmap];
Warning( "AllocateLightmapTexture(%d) in unknown lightmap state (%d), skipped.\n", lightmap, m_eLightmapsState ); Assert( !"AllocateLightmapTexture(?) in unknown lightmap state (?)" ); return; } }
int CMatLightmaps::AllocateWhiteLightmap( IMaterial *iMaterial ) { IMaterialInternal *pMaterial = static_cast<IMaterialInternal *>( iMaterial ); if( !pMaterial ) { Warning( "Programming error: CMatRenderContext::AllocateWhiteLightmap: NULL material\n" ); return m_numSortIDs; } pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally
if ( !m_currentWhiteLightmapMaterial || ( m_currentWhiteLightmapMaterial != pMaterial ) ) { if ( !GetCurrentMaterialInternal() && !m_currentWhiteLightmapMaterial ) { // don't increment if this is the very first material (ie. no lightmaps
// allocated with AllocateLightmap
// Assert( 0 );
} else { // material change
m_numSortIDs++; #if 0
char buf[128]; Q_snprintf( buf, sizeof( buf ), "AllocateWhiteLightmap: m_numSortIDs = %d %s\n", m_numSortIDs, pMaterial->GetName() ); OutputDebugString( buf ); #endif
} // Warning( "%d material: \"%s\" lightmapPageID: -1\n", m_numSortIDs, pMaterial->GetName() );
m_currentWhiteLightmapMaterial = pMaterial; pMaterial->SetNeedsWhiteLightmap( true ); }
return m_numSortIDs; }
//-----------------------------------------------------------------------------
// Releases/restores lightmap pages
//-----------------------------------------------------------------------------
void CMatLightmaps::ReleaseLightmapPages() { switch ( m_eLightmapsState ) { case STATE_DEFAULT: // Allow release in default state only
break; default: Warning( "ReleaseLightmapPages is expected in STATE_DEFAULT, current state = %d, discarded.\n", m_eLightmapsState ); Assert( !"ReleaseLightmapPages is expected in STATE_DEFAULT" ); return; }
for( int i = 0; i < GetNumLightmapPages(); i++ ) { g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] ); }
GetMaterialSystem()->GetPaintmaps()->ReleasePaintmaps(); // We are now in released state
m_eLightmapsState = STATE_RELEASED; }
void CMatLightmaps::RestoreLightmapPages() { switch ( m_eLightmapsState ) { case STATE_RELEASED: // Allow restore in released state only
break;
default: Warning( "RestoreLightmapPages is expected in STATE_RELEASED, current state = %d, discarded.\n", m_eLightmapsState ); Assert( !"RestoreLightmapPages is expected in STATE_RELEASED" ); return; }
// Switch to default state to allow allocations
m_eLightmapsState = STATE_DEFAULT;
if( GetMaterialSystem()->GetPaintmaps()->IsEnabled() ) { GetMaterialSystem()->GetPaintmaps()->RestorePaintmaps( GetNumLightmapPages() ); }
for( int i = 0; i < GetNumLightmapPages(); i++ ) { AllocateLightmapTexture( i ); } }
//-----------------------------------------------------------------------------
// This initializes the lightmap bits
//-----------------------------------------------------------------------------
void CMatLightmaps::InitLightmapBits( int lightmap ) { VPROF_( "CMatLightmaps::InitLightmapBits", 1, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); int width = GetLightmapWidth(lightmap); int height = GetLightmapHeight(lightmap);
CPixelWriter writer;
g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] ); if ( !g_pShaderAPI->TexLock( 0, 0, 0, 0, width, height, writer ) ) return;
// Debug mode, make em green checkerboard
if ( writer.IsUsingFloatFormat() ) { for ( int j = 0; j < height; ++j ) { writer.Seek( 0, j ); for ( int k = 0; k < width; ++k ) { #ifndef _DEBUG
writer.WritePixel( 1.0f, 1.0f, 1.0f ); #else // _DEBUG
if( ( j + k ) & 1 ) { writer.WritePixelF( 0.0f, 1.0f, 0.0f ); } else { writer.WritePixelF( 0.0f, 0.0f, 0.0f ); } #endif // _DEBUG
} } } else { #if defined( _X360 ) && defined( _DEBUG )
float vGreenData[4] = { 0.0f, 2.0f, 0.0f, 0.0f }; fltx4 vGreen = LoadUnalignedSIMD( vGreenData ); #endif
for ( int j = 0; j < height; ++j ) { writer.Seek( 0, j ); for ( int k = 0; k < width; ++k ) { #ifndef _DEBUG
// note: make this white to find multisample centroid sampling problems.
// writer.WritePixel( 255, 255, 255 );
#ifdef _X360
{ writer.WritePixel( Four_Zeros ); } #else
{ writer.WritePixel( 0, 0, 0 ); } #endif
#else // _DEBUG
#ifdef _X360
{ if ( ( j + k ) & 1 ) { writer.WritePixel( vGreen ); } else { writer.WritePixel( Four_Zeros ); } } #else
{ if ( ( j + k ) & 1 ) { writer.WritePixel( 0, 255, 0 ); } else { writer.WritePixel( 0, 0, 0 ); } } #endif // _X360
#endif // _DEBUG
} } }
g_pShaderAPI->TexUnlock(); }
bool CMatLightmaps::LockLightmap( int lightmap ) { // Warning( "locking lightmap page: %d\n", lightmap );
VPROF_INCREMENT_COUNTER( "lightmap fullpage texlock", 1 ); if( m_nLockedLightmap != -1 ) { g_pShaderAPI->TexUnlock(); } g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] ); int pageWidth = m_pLightmapPages[lightmap].m_Width; int pageHeight = m_pLightmapPages[lightmap].m_Height; if (!g_pShaderAPI->TexLock( 0, 0, 0, 0, pageWidth, pageHeight, m_LightmapPixelWriter )) { Assert( 0 ); return false; } m_nLockedLightmap = lightmap; return true; }
Vector4D ConvertLightmapColorToRGBScale( const float *lightmapColor ) { Vector4D result;
float fScale = lightmapColor[0]; for( int i = 1; i != 3; ++i ) { if( lightmapColor[i] > fScale ) fScale = lightmapColor[i]; }
fScale = ceil( fScale * (255.0f/16.0f) ) * (16.0f/255.0f); fScale = MIN( fScale, 16.0f );
float fInvScale = 1.0f / fScale;
for( int i = 0; i != 3; ++i ) { result[i] = lightmapColor[i] * fInvScale; result[i] = ceil( result[i] * 255.0f ) * (1.0f/255.0f); result[i] = MIN( result[i], 1.0f ); }
fScale /= 16.0f;
result.w = fScale;
return result; }
#ifdef _X360
// SIMD version of above
// input numbers from pSrc are on the domain [0..16]
// output is RGBA
// ignores contents of w channel of input
// the shader does this: rOut = Rin * Ain * 16.0f
// where Rin is [0..1], a float computed from a byte value [0..255]
// Ain is therefore the brightest channel (say R) divided by 16 and quantized
// Rin is computed from pSrc->r by dividing by Ain
// this outputs RGBa where RGB are [0..255] and a is the shader's scaling factor (also 0..255)
//
// WARNING - this code appears to be vulnerable to a compiler bug. Be very careful modifying and be
// sure to test
fltx4 ConvertLightmapColorToRGBScale( FLTX4 lightmapColor ) { static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f}; static const fltx4 FourPoint1s = { 0.1, 0.1, 0.1, 0.1 }; static const fltx4 vTwoFiftyFiveOverSixteen = {255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f}; // static const fltx4 vSixteenOverTwoFiftyFive = { 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f };
// find the highest color value in lightmapColor and replicate it
fltx4 scale = FindHighestSIMD3( lightmapColor ); fltx4 minscale = FindLowestSIMD3( lightmapColor ); fltx4 fl4OutofRange = OrSIMD( CmpGeSIMD( scale, Four_Ones ), CmpLeSIMD( scale, FourPoint1s ) ); fl4OutofRange = OrSIMD( fl4OutofRange, CmpGtSIMD( minscale, MulSIMD( Four_PointFives, scale ) ) );
// scale needs to be divided by 16 (because the shader multiplies it by 16)
// then mapped to 0..255 and quantized.
scale = __vrfip(MulSIMD(scale, vTwoFiftyFiveOverSixteen)); // scale = ceil(scale * 255/16)
fltx4 result = MulSIMD(vTwoFiftyFive, lightmapColor); // start the scale cooking on the final result
fltx4 invScale = ReciprocalEstSIMD(scale); // invScale = (16/255)(1/scale). may be +inf
invScale = MulSIMD(invScale, vTwoFiftyFiveOverSixteen); // take the quantizing factor back out
// of the inverse scale (one less
// dependent op if you do it this way)
// scale the input channels
// compute so the numbers are all 0..255 ints. (if one happens to
// be 256 due to numerical error in the reciprocation, the unsigned-saturate
// store we'll use later on will bake it back down to 255)
result = MulSIMD(result, invScale); // now, output --
// if the input color was nonzero, slip the scale into return value's w
// component and return. If the input was zero, return zero.
result = MaskedAssign( fl4OutofRange, SetWSIMD( result, scale ), SetWSIMD( MulSIMD( lightmapColor, vTwoFiftyFive ), vTwoFiftyFiveOverSixteen ) ); return result; } #endif
// write bumped lightmap update to LDR 8-bit lightmap
void CMatLightmaps::BumpedLightmapBitsToPixelWriter_LDR( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2, float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { const int nLightmapSize0 = pLightmapSize[0]; const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize(); const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );
for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned char color[4][4];
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3], &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3], &color[0][3], &color[1][3], &color[2][3], &color[3][3] ); } else { unsigned char alpha = RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f ); color[0][3] = alpha; color[1][3] = alpha; color[2][3] = alpha; color[3][3] = alpha; }
m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], color[0][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], color[1][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], color[2][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], color[3][3] ); } } if ( pfmOut ) { for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); for( int s = 0; s < nLightmapSize0; s++,srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned char color[4][4];
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] );
unsigned char alpha = RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f ); // Write data to the bitmapped represenations so that PFM files can be written
PixRGBAF pixelData; pixelData.Red = color[0][0]; pixelData.Green = color[0][1]; pixelData.Blue = color[0][2]; pixelData.Alpha = alpha; pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData); } }
} }
// write bumped lightmap update to HDR float lightmap
void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRF( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2, float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { if ( IsX360() ) { // 360 does not support HDR float mode
Assert( 0 ); return; }
Assert( !pfmOut ); // unsupported in this mode
const int nLightmapSize0 = pLightmapSize[0]; const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize(); const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );
for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
// if it's anything but 4 x float16 on a PPC...
/*
// The 'else' path uses ConvertFourFloatsTo16BitsAtOnce which is entirely broken
// so we need to always use the main path.
if ( !IsGameConsole() || !(m_LightmapPixelWriter.GetPixelSize() == 4*sizeof(unsigned short)) || !(m_LightmapPixelWriter.IsUsing16BitFloatFormat()) )*/ { for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { float color[4][4];
// [mariod] - LinearToBumpedLightmap() was entirely missing in the float path as of September '11
// looks like this only affected PS3 (PC/X360 use linear 16bit tex formats)
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3], &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3], &color[0][3], &color[1][3], &color[2][3], &color[3][3] ); } else { float alpha = pFloatImage[srcTexelOffset+3]; color[0][3] = alpha; color[1][3] = alpha; color[2][3] = alpha; color[3][3] = alpha; }
m_LightmapPixelWriter.WritePixelNoAdvanceF( color[0][0], color[0][1], color[0][2], color[0][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( color[1][0], color[1][1], color[1][2], color[1][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( color[2][0], color[2][1], color[2][2], color[2][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( color[3][0], color[3][1], color[3][2], color[3][3] ); } } /*
else // use a faster technique on PPC cores for float16 lightmaps, that's not so branchy and load-hit-store-y
{ for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) {
float color[4][4];
// [mariod] - LinearToBumpedLightmap() was entirely missing in the float path as of September '11
// looks like this only affected PS3 (PC/X360 use linear 16bit tex formats)
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] );
float alpha = pFloatImage[srcTexelOffset+3];
float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(), &color[0][0], &color[0][1], &color[0][2], &alpha );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(), &color[1][0], &color[1][1], &color[1][2], &alpha );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(), &color[2][0], &color[2][1], &color[2][2], &alpha );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(), &color[3][0], &color[3][1], &color[3][2], &alpha ); } } */ } }
#ifdef _X360
#pragma optimize("u", on)
#endif
#ifdef _X360
namespace { // pack a pixel into BGRA8888 and return it with the data packed into the w component
FORCEINLINE fltx4 PackPixel_BGRA8888( FLTX4 rgba ) { // this happens to be in an order such that we can use the handy builtin packing op
// clamp to 0..255 (coz it might have leaked over)
static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};
// the magic number such that when mul-accummulated against rbga,
// gets us a representation 3.0 + (r)*2^-22 -- puts the bits at
// the bottom of the float
static const XMVECTOR PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22)
static const XMVECTOR Three = {3.0f, 3.0f, 3.0f, 3.0f};
fltx4 N = MinSIMD(vTwoFiftyFive, rgba);
N = __vmaddfp(N, PackScale, Three); N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 0); // pack into w word
return N; }
// A small store-gather buffer used in the
// BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360().
// The store-gather buffers. Hopefully these will live in the L1
// cache, which will make writing to them, then to memory, faster
// than just using __stvewx to write directly into WC memory
// one noncontiguous float at a time. (If there weren't a huge
// compiler bug with __stvewx in the Apr07 XDK, that might not
// be the case.)
struct ALIGN128 CPixelWriterStoreGather { enum { kRows = 4, kWordsPerRow = 32, };
ALIGN128 uint32 m_data[kRows][kWordsPerRow]; // four rows of bgra data, aligned to 4 cache lines. dwords so memcpy works better.
int m_wordsGathered; int m_bytesBetweenWriterRows; // the number of bytes spacing the maps inside the writer from each other
// if we weren't gathering, we'd SkipBytes this many between the base map, bump1, etc.
// write four rows, as SIMD registers, into the buffers
inline void write( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0, FLTX4 row1, FLTX4 row2, FLTX4 row3 ) RESTRICT { // if full, commit
Assert(m_wordsGathered <= kWordsPerRow); AssertMsg((m_wordsGathered & 3) == 0, "Don't call CPixelWriterStoreGather::write after ::writeJustX"); // single-word writes have misaligned me
if (m_wordsGathered >= kWordsPerRow) { commitWhenFull(pLightmapPixelWriter); }
XMStoreVector4A( &m_data[0][m_wordsGathered], row0 ); XMStoreVector4A( &m_data[1][m_wordsGathered], row1 ); XMStoreVector4A( &m_data[2][m_wordsGathered], row2 ); XMStoreVector4A( &m_data[3][m_wordsGathered], row3 );
m_wordsGathered += 4 ; // four words per simd vec
}
// pluck the w component out of each of the rows, and store it into the gather buffer. Don't
// call the other write function after calling this.
inline void writeJustW( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0, FLTX4 row1, FLTX4 row2, FLTX4 row3 ) RESTRICT { // if full, commit
Assert(m_wordsGathered <= kWordsPerRow); if (m_wordsGathered >= kWordsPerRow) { commitWhenFull(pLightmapPixelWriter); }
// for each fltx4, splat out x and then use the __stvewx to store
// whichever word happens to align with the float pointer through
// that pointer.
__stvewx(__vspltw(row0, 3), &m_data[0][m_wordsGathered], 0 ); __stvewx(__vspltw(row1, 3), &m_data[1][m_wordsGathered], 0 ); __stvewx(__vspltw(row2, 3), &m_data[2][m_wordsGathered], 0 ); __stvewx(__vspltw(row3, 3), &m_data[3][m_wordsGathered], 0 );
m_wordsGathered += 1 ; // only stored one word
}
// Commit my buffers to the pixelwriter's memory, and advance its
// pointer.
void commit(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT { if (m_wordsGathered > 0) { unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel(); // we have to use memcpy because we're writing to non-cacheable memory,
// but we can't even assume that the addresses we're writing to are
// vector-aligned.
#ifdef memcpy // if someone's overriden the intrinsic, complain
#pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.")
#endif
memcpy(pWriteInto, m_data[0], m_wordsGathered * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, m_data[1], m_wordsGathered * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, m_data[2], m_wordsGathered * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, m_data[3], m_wordsGathered * sizeof(uint32));
pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32)); m_wordsGathered = 0; } }
// like commit, but the version we use when we know we're full.
// Takes advantage of better compile-time generation for
// memcpy.
void commitWhenFull(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT { unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel(); // we have to use memcpy because we're writing to non-cacheable memory,
// but we can't even assume that the addresses we're writing to are
// vector-aligned.
#ifdef memcpy // if someone's overriden the intrinsic, complain
#pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.")
#endif
// if we're full, use compile-time known version of
// mempcy to take advantage of its ability to generate
// inline code. In fact, use the dword-aligned
// version so that we use the 64-bit writing funcs.
Assert( m_wordsGathered == kWordsPerRow ); COMPILE_TIME_ASSERT((kWordsPerRow & 3) == 0); // the number of words per row has to be a multiple of four
memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[0]), kWordsPerRow * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[1]), kWordsPerRow * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[2]), kWordsPerRow * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[3]), kWordsPerRow * sizeof(uint32)); pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32)); m_wordsGathered = 0; }
// parameter: space between bump pages in the pixelwriter
CPixelWriterStoreGather(int writerSizeBytes) : m_wordsGathered(0), m_bytesBetweenWriterRows(writerSizeBytes) {};
}; }
// this is a function for specifically writing bumped BGRA lightmaps -- in order for it
// to be properly scheduled, I needed to break out the inline functions. Also,
// to make the write-combined memory more efficient (and work around a bug in the
// April 2007 XDK), we need to store-gather our writes on the cache before blasting
// them out to write-combined memory. We can't simply write from the SIMD registers
// into the pixelwriter's data, because the difference between the output rows,
// eg nLightmap0WriterSizeBytes[0], might not be a multiple of 16. Unaligned stores
// to non-cacheable memory cause an alignment exception.
static void BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2, float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut, CPixelWriter * RESTRICT m_LightmapPixelWriter) { AssertMsg(m_LightmapPixelWriter->GetPixelSize() == 4, "BGRA format is no longer four bytes long? This is unsupported on 360, and probably immoral as well."); const int nLightmap0WriterSizeBytes = pLightmapSize[0] * 4 /*m_LightmapPixelWriter->GetPixelSize()*/; // const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - 4 );
// assert that 1 * 4 = 4
COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4);
AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n");
// The store-gather buffers. Hopefully these will live in the L1
// cache, which will make writing to them, then to memory, faster
// than just using __stvewx to write directly into WC memory
// one noncontiguous float at a time. (If there weren't a huge
// compiler bug with __stvewx in the Apr07 XDK, that might not
// be the case.)
CPixelWriterStoreGather storeGather(nLightmap0WriterSizeBytes);
for( int t = 0; t < pLightmapSize[1]; t++ ) { #define FOUR (sizeof( Vector4D ) / sizeof( float )) // make explicit when we're incrementing by length of a 4dvec
int srcTexelOffset = ( FOUR ) * ( 0 + t * pLightmapSize[0] ); m_LightmapPixelWriter->Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
// Our code works best when we can process luxels in groups of four. So,
// figure out how many four-luxel groups we can process,
// then do them in groups, then process the remainder.
unsigned int groupsOfFourLimit = (((unsigned int)pLightmapSize[0]) & ~3); // we want to hang on to this index when we're done with groups so we can do the remainder.
unsigned int s; // counts the number of luxels processed
for( s = 0; s < groupsOfFourLimit; s += 4, srcTexelOffset += 4 * ( FOUR )) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; // the store-gather simds
fltx4 outBaseMap = Four_Zeros, outBump1 = Four_Zeros, outBump2 = Four_Zeros, outBump3 = Four_Zeros; // we'll read four at a time
fltx4 vFloatImage[4], vFloatImageBump1[4], vFloatImageBump2[4], vFloatImageBump3[4];
// stripe these loads to cause less ERAT thrashing
vFloatImage[0] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset ); vFloatImage[1] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 4 ); vFloatImage[2] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 8 ); vFloatImage[3] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 12 );
vFloatImageBump1[0] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset ); vFloatImageBump1[1] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 4 ); vFloatImageBump1[2] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 8 ); vFloatImageBump1[3] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 12 );
vFloatImageBump2[0] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset ); vFloatImageBump2[1] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 4 ); vFloatImageBump2[2] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 8 ); vFloatImageBump2[3] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 12 );
vFloatImageBump3[0] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset ); vFloatImageBump3[1] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 4 ); vFloatImageBump3[2] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 8 ); vFloatImageBump3[3] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 12 );
// perform an arcane averaging operation upon the bump map values
// (todo: make this not an inline so it will schedule better -- inlining is
// done by the linker, which is too late for operation scheduling)
ColorSpace::LinearToBumpedLightmap( vFloatImage[0], vFloatImageBump1[0], vFloatImageBump2[0], vFloatImageBump3[0], // transform "in place":
vFloatImage[0], vFloatImageBump1[0], vFloatImageBump2[0], vFloatImageBump3[0] ); ColorSpace::LinearToBumpedLightmap( vFloatImage[1], vFloatImageBump1[1], vFloatImageBump2[1], vFloatImageBump3[1], // transform "in place":
vFloatImage[1], vFloatImageBump1[1], vFloatImageBump2[1], vFloatImageBump3[1] ); ColorSpace::LinearToBumpedLightmap( vFloatImage[2], vFloatImageBump1[2], vFloatImageBump2[2], vFloatImageBump3[2], // transform "in place":
vFloatImage[2], vFloatImageBump1[2], vFloatImageBump2[2], vFloatImageBump3[2] ); ColorSpace::LinearToBumpedLightmap( vFloatImage[3], vFloatImageBump1[3], vFloatImageBump2[3], vFloatImageBump3[3], // transform "in place":
vFloatImage[3], vFloatImageBump1[3], vFloatImageBump2[3], vFloatImageBump3[3] );
// convert each color to RGB scaled.
// DO NOT! make this into a for loop. The (April07 XDK) compiler
// in fact DOES NOT unroll them, and will perform very naive
// scheduling if you try.
// clamp to 0..16 float
vFloatImage[0] = MinSIMD(vFloatImage[0], vSixteen); vFloatImageBump1[0] = MinSIMD(vFloatImageBump1[0], vSixteen); vFloatImageBump2[0] = MinSIMD(vFloatImageBump2[0], vSixteen); vFloatImageBump3[0] = MinSIMD(vFloatImageBump3[0], vSixteen);
vFloatImage[1] = MinSIMD(vFloatImage[1], vSixteen); vFloatImageBump1[1] = MinSIMD(vFloatImageBump1[1], vSixteen); vFloatImageBump2[1] = MinSIMD(vFloatImageBump2[1], vSixteen); vFloatImageBump3[1] = MinSIMD(vFloatImageBump3[1], vSixteen);
vFloatImage[2] = MinSIMD(vFloatImage[2], vSixteen); vFloatImageBump1[2] = MinSIMD(vFloatImageBump1[2], vSixteen); vFloatImageBump2[2] = MinSIMD(vFloatImageBump2[2], vSixteen); vFloatImageBump3[2] = MinSIMD(vFloatImageBump3[2], vSixteen);
vFloatImage[3] = MinSIMD(vFloatImage[3], vSixteen); vFloatImageBump1[3] = MinSIMD(vFloatImageBump1[3], vSixteen); vFloatImageBump2[3] = MinSIMD(vFloatImageBump2[3], vSixteen); vFloatImageBump3[3] = MinSIMD(vFloatImageBump3[3], vSixteen);
// compute the scaling factor, place it in w, and
// scale the rest by it. Obliterates whatever was
// already in alpha.
// This code is why it is important to not use a for
// loop: you need to let the compiler keep the value
// on registers (which it can't do if you use a
// variable indexed array) and interleave the
// inlined instructions.
vFloatImage[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[0]) ); vFloatImageBump1[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[0]) ); vFloatImageBump2[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[0]) ); vFloatImageBump3[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[0]) );
vFloatImage[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[1]) ); vFloatImageBump1[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[1]) ); vFloatImageBump2[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[1]) ); vFloatImageBump3[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[1]) );
vFloatImage[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[2]) ); vFloatImageBump1[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[2]) ); vFloatImageBump2[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[2]) ); vFloatImageBump3[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[2]) );
vFloatImage[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[3]) ); vFloatImageBump1[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[3]) ); vFloatImageBump2[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[3]) ); vFloatImageBump3[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[3]) );
// Each of the registers above contains one RGBA 32-bit struct
// in their w word. So, combine them such that each of the assignees
// below contains four RGBAs, in xyzw order (big-endian).
outBaseMap = __vrlimi(outBaseMap, vFloatImage[0], 8, 3 ); // insert into x
outBump1 = __vrlimi(outBump1, vFloatImageBump1[0], 8, 3 ); // insert into x
outBump2 = __vrlimi(outBump2, vFloatImageBump2[0], 8, 3 ); // insert into x
outBump3 = __vrlimi(outBump3, vFloatImageBump3[0], 8, 3 ); // insert into x
outBaseMap = __vrlimi(outBaseMap, vFloatImage[1], 4, 2 ); // insert into y
outBump1 = __vrlimi(outBump1, vFloatImageBump1[1], 4, 2 ); // insert into y
outBump2 = __vrlimi(outBump2, vFloatImageBump2[1], 4, 2 ); // insert into y
outBump3 = __vrlimi(outBump3, vFloatImageBump3[1], 4, 2 ); // insert into y
outBaseMap = __vrlimi(outBaseMap, vFloatImage[2], 2, 1 ); // insert into z
outBump1 = __vrlimi(outBump1, vFloatImageBump1[2], 2, 1 ); // insert into z
outBump2 = __vrlimi(outBump2, vFloatImageBump2[2], 2, 1 ); // insert into z
outBump3 = __vrlimi(outBump3, vFloatImageBump3[2], 2, 1 ); // insert into z
outBaseMap = __vrlimi(outBaseMap, vFloatImage[3], 1, 0 ); // insert into w
outBump1 = __vrlimi(outBump1, vFloatImageBump1[3], 1, 0 ); // insert into w
outBump2 = __vrlimi(outBump2, vFloatImageBump2[3], 1, 0 ); // insert into w
outBump3 = __vrlimi(outBump3, vFloatImageBump3[3], 1, 0 ); // insert into w
// push the data through the store-gather buffer.
storeGather.write(m_LightmapPixelWriter, outBaseMap, outBump1, outBump2, outBump3);
}
// Once here, make sure we've committed any leftover changes, then process
// the remainders singly.
storeGather.commit(m_LightmapPixelWriter);
for( ; // s is where it should be from the loop above
s < (unsigned int) pLightmapSize[0]; s++, // m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel), // now handled by store-gather
srcTexelOffset += ( FOUR )) {
static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 vColor[4]; fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]); fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]); fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]); fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]);
// perform an arcane averaging operation upon the bump map values
ColorSpace::LinearToBumpedLightmap( vFloatImage, vFloatImageBump1, vFloatImageBump2, vFloatImageBump3, vColor[0], vColor[1], vColor[2], vColor[3] );
// convert each color to RGB scaled.
// DO NOT! make this into a for loop. The (April07 XDK) compiler
// in fact DOES NOT unroll them, and will perform very naive
// scheduling if you try.
// clamp to 0..16 float
vColor[0] = MinSIMD(vColor[0], vSixteen); vColor[1] = MinSIMD(vColor[1], vSixteen); vColor[2] = MinSIMD(vColor[2], vSixteen); vColor[3] = MinSIMD(vColor[3], vSixteen);
// compute the scaling factor, place it in w, and
// scale the rest by it. Obliterates whatever was
// already in alpha.
// This code is why it is important to not use a for
// loop: you need to let the compiler interleave the
// inlined instructions.
vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] ); vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] ); vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] ); vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] );
#ifdef X360_DOUBLECHECK_LIGHTMAPS
unsigned short color[4][4];
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 ); color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;
if( IsX360() ) { for( int i = 0; i != 4; ++i ) { Vector4D vRGBScale;
vRGBScale.x = color[i][0] * (16.0f / 65535.0f); vRGBScale.y = color[i][1] * (16.0f / 65535.0f); vRGBScale.z = color[i][2] * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f ); color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f ); color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f ); color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f ); } }
/*
for (int ii = 0; ii < 4; ++ii) { uint32 pack = (PackPixel_BGRA8888( vColor[ii] ).u[3]); if (color[ii][3] != 0) Assert( color[ii][0] == (pack & 0xFF0000) >> 16 && color[ii][1] == (pack & 0xFF00) >> 8 && color[ii][2] == (pack & 0xFF) && color[ii][3] == (pack & 0xFF000000) >> 24 ); } */
#endif
vColor[0] = PackPixel_BGRA8888( vColor[0] ); vColor[1] = PackPixel_BGRA8888( vColor[1] ); vColor[2] = PackPixel_BGRA8888( vColor[2] ); vColor[3] = PackPixel_BGRA8888( vColor[3] );
storeGather.writeJustW(m_LightmapPixelWriter, vColor[0], vColor[1], vColor[2], vColor[3] );
/* // here is the old way of writing pixels:
// now we store-gather this
m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[0] ); Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[0] ).u[3] ); void * RESTRICT pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[1], pBits ); Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[1] ).u[3] ); pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[2], pBits ); Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[2] ).u[3] ); pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[3], pBits ); Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[3] ).u[3] );
m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel); */ }
storeGather.commit(m_LightmapPixelWriter);
} }
#endif //_X360
// write bumped lightmap update to HDR integer lightmap
void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2, float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) RESTRICT { const int nLightmapSize0 = pLightmapSize[0]; const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize(); const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );
if( m_LightmapPixelWriter.IsUsingFloatFormat() ) { AssertMsg(!IsX360(), "Tried to use a floating-point pixel format for lightmaps on 360, which is not supported."); if (!IsX360()) { for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned short color[4][4];
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); float alpha = pFloatImage[srcTexelOffset+3]; Assert( alpha >= 0.0f && alpha <= 1.0f );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { float alphaF[4];
ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3], &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3], &alphaF[0], &alphaF[1], &alphaF[2], &alphaF[3] );
unsigned short alphaUS[4]; alphaUS[0] = ColorSpace::LinearToUnsignedShort( alphaF[0], 16 ); alphaUS[1] = ColorSpace::LinearToUnsignedShort( alphaF[1], 16 ); alphaUS[2] = ColorSpace::LinearToUnsignedShort( alphaF[2], 16 ); alphaUS[3] = ColorSpace::LinearToUnsignedShort( alphaF[3], 16 );
color[0][3] = alphaUS[0]; color[1][3] = alphaUS[1]; color[2][3] = alphaUS[2]; color[3][3] = alphaUS[3]; } else { color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha; }
float toFloat = ( 1.0f / ( float )( 1 << 16 ) );
/* // This code is now a can't-happen, because we do not allow float formats on 360.
#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
if( IsX360() ) { for( int i = 0; i != 4; ++i ) { Vector4D vRGBScale;
vRGBScale.x = color[i][0] * (16.0f / 65535.0f); vRGBScale.y = color[i][1] * (16.0f / 65535.0f); vRGBScale.z = color[i][2] * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f ); color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f ); color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f ); color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f ); }
toFloat = ( 1.0f / ( float )( 1 << 8 ) ); } #endif
*/
m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[0][0], toFloat * color[0][1], toFloat * color[0][2], toFloat * color[0][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[1][0], toFloat * color[1][1], toFloat * color[1][2], toFloat * color[1][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[2][0], toFloat * color[2][1], toFloat * color[2][2], toFloat * color[2][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[3][0], toFloat * color[3][1], toFloat * color[3][2], toFloat * color[3][3] ); } } } } else { #ifndef X360_USE_SIMD_LIGHTMAP
for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned short color[4][4];
ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { float alpha[4]; ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3], &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3], &alpha[0], &alpha[1], &alpha[2], &alpha[3] );
unsigned short alphaUS[4]; alphaUS[0] = ColorSpace::LinearToUnsignedShort( alpha[0], 16 ); alphaUS[1] = ColorSpace::LinearToUnsignedShort( alpha[1], 16 ); alphaUS[2] = ColorSpace::LinearToUnsignedShort( alpha[2], 16 ); alphaUS[3] = ColorSpace::LinearToUnsignedShort( alpha[3], 16 );
color[0][3] = alphaUS[0]; color[1][3] = alphaUS[1]; color[2][3] = alphaUS[2]; color[3][3] = alphaUS[3]; } else { unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 ); color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha; }
#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
if( IsX360() ) { for( int i = 0; i != 4; ++i ) { Vector4D vRGBScale;
vRGBScale.x = color[i][0] * (16.0f / 65535.0f); vRGBScale.y = color[i][1] * (16.0f / 65535.0f); vRGBScale.z = color[i][2] * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f ); color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f ); color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f ); color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f ); } } #endif
m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], color[0][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], color[1][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], color[2][3] );
m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], color[3][3] );
// Write data to the bitmapped represenations so that PFM files can be written
if ( pfmOut ) { PixRGBAF pixelData; pixelData.Red = color[0][0]; pixelData.Green = color[0][1]; pixelData.Blue = color[0][2]; pixelData.Alpha = color[0][3]; pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData); } } } #else
// this is an optimized XBOX implementation. For a clearer
// presentation of the algorithm, see the PC implementation
// above.
// First check for the most common case, using an efficient
// branch rather than a switch:
if (m_LightmapPixelWriter.GetFormat() == IMAGE_FORMAT_LINEAR_BGRA8888) { // broken out into a static to make things more readable
// and be nicer to the instruction cache
BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, pLightmapSize, pOffsetIntoLightmapPage, pfmOut, &m_LightmapPixelWriter ); } else { // This case is used in Portal 2 to fill RGBA16161616 lightmaps
Assert( m_LightmapPixelWriter.GetPixelSize() == 8 );
for( int t = 0; t < pLightmapSize[1]; t++ ) { // assert that 1 * 4 = 4
COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4); #define FOUR (sizeof( Vector4D ) / sizeof( float )) // in case this ever changes
int srcTexelOffset = ( FOUR ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += ( FOUR )) {
static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 vColor[4]; fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]); fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]); fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]); fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]); // perform an arcane averaging operation upon the bump map values
ColorSpace::LinearToBumpedLightmap( vFloatImage, vFloatImageBump1, vFloatImageBump2, vFloatImageBump3, vColor[0], vColor[1], vColor[2], vColor[3] );
// convert each color to RGB scaled.
// DO NOT! make this into a for loop. The (April07 XDK) compiler
// in fact DOES NOT unroll them, and will perform very naive
// scheduling if you try.
// clamp to 0..16 float
vColor[0] = MinSIMD(vColor[0], vSixteen); vColor[1] = MinSIMD(vColor[1], vSixteen); vColor[2] = MinSIMD(vColor[2], vSixteen); vColor[3] = MinSIMD(vColor[3], vSixteen);
// Not doing the following anymore. This path is for writing 16161616 int lightmaps.
/*
// compute the scaling factor, transform the RGB,
// and place the scale in w. Obliterates whatever was
// already in alpha.
// This code is why it is important to not use a for
// loop: you need to let the compiler interleave the
// inlined instructions.
vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] ); vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] ); vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] ); vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] ); */
m_LightmapPixelWriter.WritePixelNoAdvance( vColor[0] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[1] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[2] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[3] );
AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n");
// Write data to the bitmapped represenations so that PFM files can be written
if ( pfmOut ) { Warning("**************************************************\n" "Lightmap output to files on 360 HAS BEEN DISABLED.\n" "A grave error has just occurred.\n" "**************************************************\n"); DebuggerBreakIfDebugging(); /*
PixRGBAF pixelData; pixelData.Red = color[0][0]; pixelData.Green = color[0][1]; pixelData.Blue = color[0][2]; pixelData.Alpha = alpha; pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData); */ } } } } #endif
} }
void CMatLightmaps::LightmapBitsToPixelWriter_LDR( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { // non-HDR lightmap processing
float *pSrc = pFloatImage; for( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { unsigned char color[4]; ColorSpace::LinearToLightmap( color, pSrc );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { ColorSpace::LinearToLightmapAlpha( &color[3], pSrc[3] ); } else { color[3] = RoundFloatToByte( pSrc[3] * 255.0f ); }
m_LightmapPixelWriter.WritePixel( color[0], color[1], color[2], color[3] );
if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written
PixRGBAF pixelData; pixelData.Red = color[0]; pixelData.Green = color[1]; pixelData.Blue = color[2]; pixelData.Alpha = color[3]; pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData ); } } } }
void CMatLightmaps::LightmapBitsToPixelWriter_HDRF( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { if ( IsX360() ) { // 360 does not support HDR float
Assert( 0 ); return; }
// float HDR lightmap processing
float *pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { ColorSpace::LinearToLightmapAlpha( &pSrc[3] ); }
for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { m_LightmapPixelWriter.WritePixelF( pSrc[0], pSrc[1], pSrc[2], pSrc[3] ); } } }
// numbers come in on the domain [0..16]
void CMatLightmaps::LightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t * RESTRICT pfmOut ) { #ifndef X360_USE_SIMD_LIGHTMAP
// PC code (and old, pre-SIMD xbox version -- unshippably slow)
if ( m_LightmapPixelWriter.IsUsingFloatFormat() ) { // integer HDR lightmap processing
float *pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { int r, g, b, a;
r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] ); g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] ); b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { ColorSpace::LinearToLightmapAlpha( &a, pSrc[3] ); } else { a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 ); }
float toFloat = ( 1.0f / ( float )( 1 << 16 ) );
#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
if( IsX360() ) { Vector4D vRGBScale;
vRGBScale.x = r * (16.0f / 65535.0f); vRGBScale.y = g * (16.0f / 65535.0f); vRGBScale.z = b * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
r = RoundFloatToByte( vRGBScale.x * 255.0f ); g = RoundFloatToByte( vRGBScale.y * 255.0f ); b = RoundFloatToByte( vRGBScale.z * 255.0f ); a = RoundFloatToByte( vRGBScale.w * 255.0f );
toFloat = ( 1.0f / ( float )( 1 << 8 ) ); }
#endif
Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f ); m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] ); } } } else { // integer HDR lightmap processing
float *pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { int r, g, b, a;
r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] ); g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] ); b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );
if ( HardwareConfig()->GetCSMAccurateBlending() ) { ColorSpace::LinearToLightmapAlpha( &a, pSrc[3] ); } else { a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 ); }
#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
if( IsX360() ) { Vector4D vRGBScale;
vRGBScale.x = r * (16.0f / 65535.0f); vRGBScale.y = g * (16.0f / 65535.0f); vRGBScale.z = b * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
r = RoundFloatToByte( vRGBScale.x * 255.0f ); g = RoundFloatToByte( vRGBScale.y * 255.0f ); b = RoundFloatToByte( vRGBScale.z * 255.0f ); a = RoundFloatToByte( vRGBScale.w * 255.0f ); } #endif
m_LightmapPixelWriter.WritePixel( r, g, b, a );
if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written
PixRGBAF pixelData; pixelData.Red = pSrc[0]; pixelData.Green = pSrc[1]; pixelData.Blue = pSrc[2]; pixelData.Alpha = pSrc[3]; pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData ); } } } } #else
// XBOX360 code
if ( m_LightmapPixelWriter.IsUsingFloatFormat() ) { if( IsX360() ) { AssertMsg( false, "Float-format pixel writers do not exist on x360." ); } else { // This code is here as an example only, in case floating point
// format is restored to 360.
// integer HDR lightmap processing
float * RESTRICT pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { int r, g, b, a;
r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] ); g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] ); b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] ); a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );
float toFloat = ( 1.0f / ( float )( 1 << 16 ) );
#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )
if( IsX360() ) { Vector4D vRGBScale;
vRGBScale.x = r * (16.0f / 65535.0f); vRGBScale.y = g * (16.0f / 65535.0f); vRGBScale.z = b * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );
r = RoundFloatToByte( vRGBScale.x * 255.0f ); g = RoundFloatToByte( vRGBScale.y * 255.0f ); b = RoundFloatToByte( vRGBScale.z * 255.0f ); a = RoundFloatToByte( vRGBScale.w * 255.0f );
toFloat = ( 1.0f / ( float )( 1 << 8 ) ); }
#endif
Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f ); m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] ); } } } } else { // This is the fast X360 pathway.
// integer HDR lightmap processing
float * RESTRICT pSrc = pFloatImage; // Assert((reinterpret_cast<unsigned int>(pSrc) & 15) == 0); // 16-byte aligned?
COMPILE_TIME_ASSERT(sizeof(Vector4D)/sizeof(*pSrc) == 4); // assert that 1 * 4 = 4
// input numbers from pSrc are on the domain [0..+inf]
// we clamp them to the range [0..16]
// output is RGBA
// the shader does this: rOut = Rin * Ain * 16.0f
// where Rin is [0..1], a float computed from a byte value [0..255]
// Ain is therefore the brightest channel (say R) divided by 16 and quantized
// Rin is computed from pSrc->r by dividing by Ain
// rather than switching inside WritePixel for each different format,
// thus causing a 23-cycle pipeline clear for every pixel, we'll
// branch on the format here. That will allow us to unroll the inline
// pixel write functions differently depending on their different
// latencies.
Assert(!pfmOut); // should never happen on 360.
#ifndef ALLOW_PFM_OUTPUT_ON_360
if ( pfmOut ) { Warning("*****************************************\n" "Lightmap output on 360 HAS BEEN DISABLED.\n" "A grave error has just occurred.\n" "*****************************************\n"); } #endif
// switch once, here, outside the loop, rather than
// switching inside each pixel. Switches are not fast
// on x360: they are usually implemented as jumps
// through function tables, which have a 24-cycle
// stall.
switch (m_LightmapPixelWriter.GetFormat()) { // note: format names are low-order-byte first.
case IMAGE_FORMAT_RGBA8888: case IMAGE_FORMAT_LINEAR_RGBA8888: { for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 ) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 rgba = LoadUnalignedSIMD(pSrc);
// clamp to 0..16 float
rgba = MinSIMD(rgba, vSixteen); // compute the scaling factor, place it in w, and
// scale the rest by it.
rgba = ConvertLightmapColorToRGBScale( rgba ); // rgba is now float 0..255 in each component
m_LightmapPixelWriter.WritePixelNoAdvance_RGBA8888(rgba);
/* // not supported on X360
if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written
PixRGBAF pixelData; XMStoreVector4(&pixelData,rgba); pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData ); } */ } } break; }
case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
case IMAGE_FORMAT_LINEAR_BGRA8888: { for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 ) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 rgba = LoadUnalignedSIMD(pSrc);
// clamp to 0..16 float
rgba = MinSIMD(rgba, vSixteen); // compute the scaling factor, place it in w, and
// scale the rest by it.
rgba = ConvertLightmapColorToRGBScale( rgba ); // rgba is now float 0..255 in each component
m_LightmapPixelWriter.WritePixelNoAdvance_BGRA8888(rgba); // forcibly advance
m_LightmapPixelWriter.SkipBytes(4);
/* // not supported on X360
if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written
PixRGBAF pixelData; XMStoreVector4(&pixelData,rgba); pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData ); } */ } } break; }
case IMAGE_FORMAT_RGBA16161616: case IMAGE_FORMAT_LINEAR_RGBA16161616: { for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 ) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 rgba = LoadUnalignedSIMD(pSrc); rgba = MinSIMD(rgba, vSixteen); // clamp to 0..16 float
m_LightmapPixelWriter.WritePixelNoAdvance_RGBA16161616(rgba); m_LightmapPixelWriter.SkipBytes(8); } } break; }
default: AssertMsg1(false,"Unsupported pixel format %d while writing lightmaps!", m_LightmapPixelWriter.GetFormat() ); Warning("Unsupported pixel format used in lightmap. Lightmaps could not be downloaded.\n"); break; } } #endif
}
void CMatLightmaps::BeginUpdateLightmaps( void ) { CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal(); if ( pCallQueue ) { pCallQueue->QueueCall( this, &CMatLightmaps::BeginUpdateLightmaps ); return; }
m_nUpdatingLightmapsStackDepth++; }
void CMatLightmaps::EndUpdateLightmaps( void ) { CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal(); if ( pCallQueue ) { pCallQueue->QueueCall( this, &CMatLightmaps::EndUpdateLightmaps ); return; }
m_nUpdatingLightmapsStackDepth--; Assert( m_nUpdatingLightmapsStackDepth >= 0 ); if( m_nUpdatingLightmapsStackDepth <= 0 && m_nLockedLightmap != -1 ) { g_pShaderAPI->TexUnlock(); m_nLockedLightmap = -1; } }
int CMatLightmaps::AllocateDynamicLightmap( int lightmapSize[2], int *pOutOffsetIntoPage, int frameID ) { // check frameID, fail if current
for ( int i = 0; i < COUNT_DYNAMIC_LIGHTMAP_PAGES; i++ ) { int dynamicIndex = (m_dynamic.currentDynamicIndex + i) % COUNT_DYNAMIC_LIGHTMAP_PAGES; int lightmapPageIndex = m_firstDynamicLightmap + dynamicIndex; if ( m_dynamic.lightmapLockFrame[dynamicIndex] != frameID ) { m_dynamic.lightmapLockFrame[dynamicIndex] = frameID; m_dynamic.imagePackers[dynamicIndex].Reset( 0, m_pLightmapPages[lightmapPageIndex].m_Width, m_pLightmapPages[lightmapPageIndex].m_Height ); }
if ( m_dynamic.imagePackers[dynamicIndex].AddBlock( lightmapSize[0], lightmapSize[1], &pOutOffsetIntoPage[0], &pOutOffsetIntoPage[1] ) ) { return lightmapPageIndex; } } return -1; }
//-----------------------------------------------------------------------------
// Updates the lightmap
//-----------------------------------------------------------------------------
void CMatLightmaps::UpdateLightmap( int lightmapPageID, int lightmapSize[2], int offsetIntoLightmapPage[2], float *pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2, float *pFloatImageBump3 ) { VPROF( "CMatRenderContext::UpdateLightmap" );
bool hasBump = false; int uSize = 1; FloatBitMap_t *pfmOut = NULL; if ( pFloatImageBump1 && pFloatImageBump2 && pFloatImageBump3 ) { hasBump = true; uSize = 4; }
if ( lightmapPageID >= GetNumLightmapPages() || lightmapPageID < 0 ) { Error( "MaterialSystem_Interface_t::UpdateLightmap lightmapPageID=%d out of range\n", lightmapPageID ); return; } bool bDynamic = IsDynamicLightmap(lightmapPageID);
if ( bDynamic ) { int dynamicIndex = lightmapPageID-m_firstDynamicLightmap; Assert(dynamicIndex < COUNT_DYNAMIC_LIGHTMAP_PAGES); m_dynamic.currentDynamicIndex = (dynamicIndex + 1) % COUNT_DYNAMIC_LIGHTMAP_PAGES; }
if ( mat_lightmap_pfms.GetBool()) { // Allocate and initialize lightmap data that will be written to a PFM file
if (NULL == m_pLightmapDataPtrArray[lightmapPageID]) { m_pLightmapDataPtrArray[lightmapPageID] = new FloatBitMap_t(m_pLightmapPages[lightmapPageID].m_Width, m_pLightmapPages[lightmapPageID].m_Height); m_pLightmapDataPtrArray[lightmapPageID]->Clear(0, 0, 0, 1); } pfmOut = m_pLightmapDataPtrArray[lightmapPageID]; }
// NOTE: Change how the lock is taking place if you ever change how bumped
// lightmaps are put into the page. Right now, we assume that they're all
// added to the right of the original lightmap.
bool bLockSubRect; { VPROF_( "Locking lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); // vprof scope
bLockSubRect = m_nUpdatingLightmapsStackDepth <= 0 && !bDynamic; if( bLockSubRect ) { VPROF_INCREMENT_COUNTER( "lightmap subrect texlock", 1 ); g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmapPageID] ); if (!g_pShaderAPI->TexLock( 0, 0, offsetIntoLightmapPage[0], offsetIntoLightmapPage[1], lightmapSize[0] * uSize, lightmapSize[1], m_LightmapPixelWriter )) { return; } } else if( lightmapPageID != m_nLockedLightmap ) { if ( !LockLightmap( lightmapPageID ) ) { ExecuteNTimes( 10, Warning( "Failed to lock lightmap\n" ) ); return; } } }
int subRectOffset[2] = {0,0};
{ // account for the part spent in math:
VPROF_( "LightmapBitsToPixelWriter", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); #ifdef _PS3
// PS3 uses 16-bit half floats per channel...but the HDR_TYPE_FLOAT codepath has a lot of other assumptions, so just
// lie about the format right here on PS3 only
if ( hasBump ) { BumpedLightmapBitsToPixelWriter_HDRF( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); } else { LightmapBitsToPixelWriter_HDRF( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); } #else // _PS3
if ( hasBump ) { switch( HardwareConfig()->GetHDRType() ) { case HDR_TYPE_NONE: BumpedLightmapBitsToPixelWriter_LDR( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; case HDR_TYPE_INTEGER: BumpedLightmapBitsToPixelWriter_HDRI( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; case HDR_TYPE_FLOAT: BumpedLightmapBitsToPixelWriter_HDRF( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; } } else { switch ( HardwareConfig()->GetHDRType() ) { case HDR_TYPE_NONE: LightmapBitsToPixelWriter_LDR( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break;
case HDR_TYPE_INTEGER: LightmapBitsToPixelWriter_HDRI( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break;
case HDR_TYPE_FLOAT: LightmapBitsToPixelWriter_HDRF( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break;
default: Assert( 0 ); break; } } #endif // !_PS3
}
if( bLockSubRect ) { VPROF_( "Unlocking Lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); g_pShaderAPI->TexUnlock(); } }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
int CMatLightmaps::GetNumSortIDs( void ) { return m_numSortIDs; }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void CMatLightmaps::ComputeSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha ) { int lightmapPageID;
for ( MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { IMaterialInternal* pMaterial = GetMaterialInternal(i);
if ( pMaterial->GetMinLightmapPageID() > pMaterial->GetMaxLightmapPageID() ) { continue; } // const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY );
// if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ||
// ( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) )
// {
// return true;
// }
// Warning( "sort stuff: %s %s\n", material->GetName(), bAlpha ? "alpha" : "not alpha" );
// fill in the lightmapped materials
for ( lightmapPageID = pMaterial->GetMinLightmapPageID(); lightmapPageID <= pMaterial->GetMaxLightmapPageID(); ++lightmapPageID ) { pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion(); pInfo[sortId].lightmapPageID = lightmapPageID; #if 0
char buf[128]; Q_snprintf( buf, sizeof( buf ), "ComputeSortInfo: %s lightmapPageID: %d sortID: %d\n", pMaterial->GetName(), lightmapPageID, sortId ); OutputDebugString( buf ); #endif
++sortId; } } }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void CMatLightmaps::ComputeWhiteLightmappedSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha ) { for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { IMaterialInternal* pMaterial = GetMaterialInternal(i);
// fill in the lightmapped materials that are actually used by this level
if( pMaterial->GetNeedsWhiteLightmap() && ( pMaterial->GetReferenceCount() > 0 ) ) { // const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY );
// if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ||
// ( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) )
// {
// return true;
// }
pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion(); if( pMaterial->GetPropertyFlag( MATERIAL_PROPERTY_NEEDS_BUMPED_LIGHTMAPS ) ) { pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP; } else { pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE; }
sortId++; } } }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void CMatLightmaps::GetSortInfo( MaterialSystem_SortInfo_t *pSortInfoArray ) { // sort non-alpha blended materials first
int sortId = 0; ComputeSortInfo( pSortInfoArray, sortId, false ); ComputeWhiteLightmappedSortInfo( pSortInfoArray, sortId, false ); Assert( m_numSortIDs == sortId ); }
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void CMatLightmaps::EnableLightmapFiltering( bool enabled ) { int i; for( i = 0; i < GetNumLightmapPages(); i++ ) { g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[i] ); if( enabled ) { g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR ); g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR ); } else { g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_NEAREST ); g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_NEAREST ); } } }
|