csgo/cstrike15_src/materialsystem/cmatlightmaps.cpp


								//========== Copyright (c) Valve Corporation, All rights reserved. ========

								//

								// Purpose:

								//

								//=============================================================================


								#include "pch_materialsystem.h"


								#ifndef _PS3

								#define MATSYS_INTERNAL

								#endif


								#include "cmatlightmaps.h"


								#include "colorspace.h"

								#include "IHardwareConfigInternal.h"


								#include "cmaterialsystem.h"


								// NOTE: This must be the last file included!!!

								#include "tier0/memdbgon.h"

								#include "bitmap/floatbitmap.h"


								static ConVar mat_lightmap_pfms( "mat_lightmap_pfms", "0", FCVAR_MATERIAL_SYSTEM_THREAD, "Outputs .pfm files containing lightmap data for each lightmap page when a level exits." ); // Write PFM files for each lightmap page in the game directory when exiting a level


								// Turning off 32 bit lightmaps for Portal 2, to save shader perf --Thorsten

								//#define USE_32BIT_LIGHTMAPS_ON_360 //uncomment to use 32bit lightmaps, be sure to keep this in sync with the same #define in stdshaders/lightmappedgeneric_ps2_3_x.h


								#ifdef _X360

								// 7LS - fixup support for lightmap alpha channel data for csm's, definitely do this when/if turning dynamic lightmaps back on

								// #define X360_USE_SIMD_LIGHTMAP

								#endif


								//-----------------------------------------------------------------------------


								inline IMaterialInternal* CMatLightmaps::GetCurrentMaterialInternal() const

								{

									return GetMaterialSystem()->GetRenderContextInternal()->GetCurrentMaterialInternal();

								}


								inline void CMatLightmaps::SetCurrentMaterialInternal(IMaterialInternal* pCurrentMaterial)

								{

									return GetMaterialSystem()->GetRenderContextInternal()->SetCurrentMaterialInternal( pCurrentMaterial );

								}


								inline IMaterialInternal *CMatLightmaps::GetMaterialInternal( MaterialHandle_t idx ) const

								{

									return GetMaterialSystem()->GetMaterialInternal( idx );

								}


								inline const IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal() const

								{

									return GetMaterialSystem()->GetRenderContextInternal();

								}


								inline IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal()

								{

									return GetMaterialSystem()->GetRenderContextInternal();

								}


								inline const CMaterialDict *CMatLightmaps::GetMaterialDict() const

								{

									return GetMaterialSystem()->GetMaterialDict();

								}


								inline CMaterialDict *CMatLightmaps::GetMaterialDict()

								{

									return GetMaterialSystem()->GetMaterialDict();

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								CMatLightmaps::CMatLightmaps()

								{

									m_currentWhiteLightmapMaterial = NULL;

									m_pLightmapPages = NULL;

									m_NumLightmapPages = 0;

									m_numSortIDs = 0;

									m_nUpdatingLightmapsStackDepth = 0;

									m_nLockedLightmap = -1;

									m_pLightmapDataPtrArray = NULL;

									m_eLightmapsState = STATE_DEFAULT;

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								void CMatLightmaps::Shutdown( )

								{

									// Clean up all lightmaps

									CleanupLightmaps();

								}


								//-----------------------------------------------------------------------------

								// Assign enumeration IDs to all materials

								//-----------------------------------------------------------------------------

								void CMatLightmaps::EnumerateMaterials( void )

								{

									// iterate in sorted order

									int id = 0;

									for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )

									{

										GetMaterialInternal(i)->SetEnumerationID( id );

										++id;

									}

								}


								//-----------------------------------------------------------------------------

								// Gets the maximum lightmap page size...

								//-----------------------------------------------------------------------------

								int CMatLightmaps::GetMaxLightmapPageWidth() const

								{

									// FIXME: It's unclear which we want here.

									// It doesn't drastically increase primitives per DrawIndexedPrimitive

									// call at the moment to increase it, so let's not for now.


									// If we're using dynamic textures though, we want bigger that's for sure.

									// The tradeoff here is how much memory we waste if we don't fill the lightmap


									// We need to go to 512x256 textures because that's the only way bumped

									// lighting on displacements can work given the 128x128 allowance..

									int nWidth = 512;

									if ( nWidth > HardwareConfig()->MaxTextureWidth() )

										nWidth = HardwareConfig()->MaxTextureWidth();


									return nWidth;

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								int CMatLightmaps::GetMaxLightmapPageHeight() const

								{

									int nHeight = 256;


									if ( nHeight > HardwareConfig()->MaxTextureHeight() )

										nHeight = HardwareConfig()->MaxTextureHeight();


									return nHeight;

								}


								//-----------------------------------------------------------------------------

								// Returns the lightmap page size

								//-----------------------------------------------------------------------------

								void CMatLightmaps::GetLightmapPageSize( int lightmapPageID, int *pWidth, int *pHeight ) const

								{

									switch( lightmapPageID )

									{

									default:

								 		Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() );

										*pWidth = m_pLightmapPages[lightmapPageID].m_Width;

										*pHeight = m_pLightmapPages[lightmapPageID].m_Height;

										break;


									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED:

										*pWidth = *pHeight = 1;

										AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" );

										break;


									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE:

									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP:

										*pWidth = *pHeight = 1;

										break;

									}

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								int CMatLightmaps::GetLightmapWidth( int lightmapPageID ) const

								{

									switch( lightmapPageID )

									{

									default:

								 		Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() );

										return m_pLightmapPages[lightmapPageID].m_Width;


									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED:

										AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" );

										return 1;


									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE:

									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP:

										return 1;

									}

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								int CMatLightmaps::GetLightmapHeight( int lightmapPageID ) const

								{

									switch( lightmapPageID )

									{

									default:

								 		Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() );

										return m_pLightmapPages[lightmapPageID].m_Height;


									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED:

										AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" );

										return 1;


									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE:

									case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP:

										return 1;

									}

								}


								//-----------------------------------------------------------------------------

								// Clean up lightmap pages.

								//-----------------------------------------------------------------------------

								void CMatLightmaps::CleanupLightmaps()

								{

									GetMaterialSystem()->GetPaintmaps()->CleanupPaintmaps();

									if ( mat_lightmap_pfms.GetBool())

									{

									  // Write PFM files containing lightmap data for this page

									  for (int lightmap = 0; lightmap < GetNumLightmapPages(); lightmap++)

									  {

										 if ((NULL != m_pLightmapDataPtrArray) && (NULL != m_pLightmapDataPtrArray[lightmap]))

										 {

											char szPFMFileName[MAX_PATH];


											sprintf(szPFMFileName, "Lightmap-Page-%d.pfm", lightmap);

											m_pLightmapDataPtrArray[lightmap]->WritePFM(szPFMFileName);

										 }

									  }

									}


									// Remove the lightmap data bitmap representations

									if (m_pLightmapDataPtrArray)

									{

									  int i;

									  for( i = 0; i < GetNumLightmapPages(); i++ )

									  {

										 delete m_pLightmapDataPtrArray[i];

									  }


									  delete [] m_pLightmapDataPtrArray;

									  m_pLightmapDataPtrArray = NULL;

									}


									// delete old lightmap pages

									if( m_pLightmapPages )

									{

										int i;

										for( i = 0; i < GetNumLightmapPages(); i++ )

										{

											g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] );

										}

										delete [] m_pLightmapPages;

										m_pLightmapPages = 0;

									}


									m_NumLightmapPages = 0;

								}


								//-----------------------------------------------------------------------------

								// Resets the lightmap page info for each material

								//-----------------------------------------------------------------------------

								void CMatLightmaps::ResetMaterialLightmapPageInfo( void )

								{

									for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )

									{

										IMaterialInternal *pMaterial = GetMaterialInternal(i);

										pMaterial->SetMinLightmapPageID( 9999 );

										pMaterial->SetMaxLightmapPageID( -9999 );

										pMaterial->SetNeedsWhiteLightmap( false );

									}

								}


								//-----------------------------------------------------------------------------

								// This is called before any lightmap allocations take place

								//-----------------------------------------------------------------------------

								void CMatLightmaps::BeginLightmapAllocation()

								{

									// we clean up lightmaps on console right before we load the next map

									if ( IsPC() )

									{

										CleanupLightmaps();

									}


									m_ImagePackers.RemoveAll();

									int i = m_ImagePackers.AddToTail();

									m_ImagePackers[i].Reset( 0, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );


									SetCurrentMaterialInternal(0);

									m_currentWhiteLightmapMaterial = 0;

									m_numSortIDs = 0;


									// need to set the min and max sorting id number for each material to

									// a default value that basically means that it hasn't been used yet.

									ResetMaterialLightmapPageInfo();


									EnumerateMaterials();

								}


								//-----------------------------------------------------------------------------

								// Allocates space in the lightmaps; must be called after BeginLightmapAllocation

								//-----------------------------------------------------------------------------

								int CMatLightmaps::AllocateLightmap( int width, int height,

										                               int offsetIntoLightmapPage[2],

																	   IMaterial *iMaterial )

								{

									IMaterialInternal *pMaterial = static_cast<IMaterialInternal *>( iMaterial );

									if ( !pMaterial )

									{

										Warning( "Programming error: CMatRenderContext::AllocateLightmap: NULL material\n" );

										return m_numSortIDs;

									}

									pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally


									// material change

									int i;

									int nPackCount = m_ImagePackers.Count();

									if ( GetCurrentMaterialInternal() != pMaterial )

									{

										// If this happens, then we need to close out all image packers other than

										// the last one so as to produce as few sort IDs as possible

										for ( i = nPackCount - 1; --i >= 0; )

										{

											// NOTE: We *must* use the order preserving one here so the remaining one

											// is the last lightmap

											m_ImagePackers.Remove( i );

											--nPackCount;

										}


										// If it's not the first material, increment the sort id

										if (GetCurrentMaterialInternal())

										{

											m_ImagePackers[0].IncrementSortId( );

											++m_numSortIDs;

										}


										SetCurrentMaterialInternal(pMaterial);


										// This assertion guarantees we don't see the same material twice in this loop.

										Assert( pMaterial->GetMinLightmapPageID( ) > pMaterial->GetMaxLightmapPageID() );


										// NOTE: We may not use this lightmap page, but we might

										// we won't know for sure until the next material is passed in.

										// So, for now, we're going to forcibly add the current lightmap

										// page to this material so the sort IDs work out correctly.

										GetCurrentMaterialInternal()->SetMinLightmapPageID( GetNumLightmapPages() );

										GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() );

									}


									// Try to add it to any of the current images...

									bool bAdded = false;

									for ( i = 0; i < nPackCount; ++i )

									{

										bAdded = m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] );

										if ( bAdded )

											break;

									}


									if ( !bAdded )

									{

										++m_numSortIDs;

										i = m_ImagePackers.AddToTail();

										m_ImagePackers[i].Reset( m_numSortIDs, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );

										++m_NumLightmapPages;

										if ( !m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] ) )

										{

											Error( "MaterialSystem_Interface_t::AllocateLightmap: lightmap (%dx%d) too big to fit in page (%dx%d)\n",

												width, height, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() );

										}


										// Add this lightmap to the material...

										GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() );

									}


									return m_ImagePackers[i].GetSortId();

								}


								// UNDONE: This needs testing, but it appears as though creating these textures managed

								// results in huge stalls whenever they are locked for modify.

								// That makes sense given the d3d docs, but these have been flagged as managed for quite some time.

								#define DYNAMIC_TEXTURES_NO_BACKING 1


								void CMatLightmaps::EndLightmapAllocation()

								{

									// count the last page that we were on.if it wasn't

									// and count the last sortID that we were on

									m_NumLightmapPages++;

									m_numSortIDs++;


									m_firstDynamicLightmap = m_NumLightmapPages;

									// UNDONE: Until we start using the separate dynamic lighting textures don't allocate them

									// NOTE: Enable this if we want to stop locking the base lightmaps and instead only lock update

									// these completely dynamic pages

								//	m_NumLightmapPages += COUNT_DYNAMIC_LIGHTMAP_PAGES;

									m_dynamic.Init();


									// Compute the dimensions of the last lightmap

									int lastLightmapPageWidth, lastLightmapPageHeight;

									int nLastIdx = m_ImagePackers.Count();

									m_ImagePackers[nLastIdx - 1].GetMinimumDimensions( &lastLightmapPageWidth, &lastLightmapPageHeight );

									m_ImagePackers.Purge();


									m_pLightmapPages = new LightmapPageInfo_t[GetNumLightmapPages()];

									Assert( m_pLightmapPages );


								   if ( mat_lightmap_pfms.GetBool())

								   {

								      // This array will be used to write PFM files full of lightmap data

								      m_pLightmapDataPtrArray = new FloatBitMap_t*[GetNumLightmapPages()];

								   }


								   if( GetMaterialSystem()->GetPaintmaps()->IsEnabled() )

								   {

										GetMaterialSystem()->GetPaintmaps()->BeginPaintTextureAllocation( GetNumLightmapPages() );

								   }


									int i;

									m_LightmapPageTextureHandles.EnsureCapacity( GetNumLightmapPages() );

									for ( i = 0; i < GetNumLightmapPages(); i++ )

									{

										// Compute lightmap dimensions

										bool lastStaticLightmap = ( i == (m_firstDynamicLightmap-1));

										m_pLightmapPages[i].m_Width = (unsigned short)(lastStaticLightmap ? lastLightmapPageWidth : GetMaxLightmapPageWidth());

										m_pLightmapPages[i].m_Height = (unsigned short)(lastStaticLightmap ? lastLightmapPageHeight : GetMaxLightmapPageHeight());

										m_pLightmapPages[i].m_Flags = 0;


										AllocateLightmapTexture( i );


										if ( GetMaterialSystem()->GetPaintmaps()->IsEnabled() )

										{

											GetMaterialSystem()->GetPaintmaps()->AllocatePaintmap( i, GetLightmapWidth(i), GetLightmapHeight(i) );

										}


								        if ( mat_lightmap_pfms.GetBool())

								        {

								           // Initialize the pointers to lightmap data

								           m_pLightmapDataPtrArray[i] = NULL;

								        }

									}


									if( GetMaterialSystem()->GetPaintmaps()->IsEnabled() )

									{

										GetMaterialSystem()->GetPaintmaps()->EndPaintTextureAllocation();

									}

								}


								ConVar mat_dynamiclightmaps( "mat_dynamiclightmaps", "0", FCVAR_CHEAT );


								//-----------------------------------------------------------------------------

								// Allocate lightmap textures

								//-----------------------------------------------------------------------------

								void CMatLightmaps::AllocateLightmapTexture( int lightmap )

								{

									bool bUseDynamicTextures = HardwareConfig()->PreferDynamicTextures() && mat_dynamiclightmaps.GetBool();


									int flags = 0;

									if ( bUseDynamicTextures || IsPS3() ) // On PS3, we need the dynamic flag as a hint that we're going to update this texture incrementally in the future

									{

										flags |= TEXTURE_CREATE_DYNAMIC;

									}

									else

									{

										flags |= TEXTURE_CREATE_MANAGED;

									}


									int nPreviousTextureHandles = m_LightmapPageTextureHandles.Count();

									m_LightmapPageTextureHandles.EnsureCount( lightmap + 1 );

									for ( int nLightmap = nPreviousTextureHandles; nLightmap <= lightmap; ++nLightmap )

									{

										m_LightmapPageTextureHandles[ nLightmap ] = INVALID_SHADERAPI_TEXTURE_HANDLE;

									}


									char debugName[256];

									Q_snprintf( debugName, sizeof( debugName ), "[lightmap %d]", lightmap );


									ImageFormat imageFormat;

									switch ( HardwareConfig()->GetHDRType() )

									{

									default:

										Assert( 0 );

										// fall through.


									case HDR_TYPE_NONE:

								#if !defined( _X360 )

										imageFormat = IMAGE_FORMAT_RGBA8888;

										flags |= TEXTURE_CREATE_SRGB;

								#else

										imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888;

								#endif

										break;


									case HDR_TYPE_INTEGER:

								#if !defined( _X360 )

										imageFormat = IMAGE_FORMAT_RGBA16161616;

								#else

								#		if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )

											imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888;

								#		else

											imageFormat = IMAGE_FORMAT_LINEAR_RGBA16161616;

								#		endif

								#endif

										break;


									case HDR_TYPE_FLOAT:

										imageFormat = IMAGE_FORMAT_RGBA16161616F;

										break;

									}


								#ifdef _PS3

									// PS3 needs 16F textures...but the HDR_TYPE_FLOAT codepath has a lot of other baggage with it.  Just lie here.

									imageFormat = IMAGE_FORMAT_RGBA16161616F;


								#endif // _PS3


									switch ( m_eLightmapsState )

									{

									case STATE_DEFAULT:

										// Allow allocations in default state

										{

											int iWidth = GetLightmapWidth(lightmap);

											int iHeight = GetLightmapHeight(lightmap);


											m_LightmapPageTextureHandles[lightmap] = g_pShaderAPI->CreateTexture(

												iWidth, iHeight, 1,

												imageFormat,

												1, 1, flags, debugName, TEXTURE_GROUP_LIGHTMAP );	// don't mipmap lightmaps


											// Load up the texture data

											g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] );

											g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR );

											g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR );


											if ( !bUseDynamicTextures )

											{

												g_pShaderAPI->TexSetPriority( 1 );

											}


											// Blat out the lightmap bits

											InitLightmapBits( lightmap );

										}

										break;


									case STATE_RELEASED:

										// Not assigned m_LightmapPageTextureHandles[lightmap];

										DevMsg( "AllocateLightmapTexture(%d) in released lightmap state (STATE_RELEASED), delayed till \"Restore\".\n", lightmap );

										return;


									default:

										// Not assigned m_LightmapPageTextureHandles[lightmap];

										Warning( "AllocateLightmapTexture(%d) in unknown lightmap state (%d), skipped.\n", lightmap, m_eLightmapsState );

										Assert( !"AllocateLightmapTexture(?) in unknown lightmap state (?)" );

										return;

									}

								}


								int	CMatLightmaps::AllocateWhiteLightmap( IMaterial *iMaterial )

								{

									IMaterialInternal *pMaterial = static_cast<IMaterialInternal *>( iMaterial );

									if( !pMaterial )

									{

										Warning( "Programming error: CMatRenderContext::AllocateWhiteLightmap: NULL material\n" );

										return m_numSortIDs;

									}

									pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally


									if ( !m_currentWhiteLightmapMaterial || ( m_currentWhiteLightmapMaterial != pMaterial ) )

									{

										if ( !GetCurrentMaterialInternal() && !m_currentWhiteLightmapMaterial )

										{

											// don't increment if this is the very first material (ie. no lightmaps

											// allocated with AllocateLightmap

											// Assert( 0 );

										}

										else

										{

											// material change

											m_numSortIDs++;

								#if 0

											char buf[128];

											Q_snprintf( buf, sizeof( buf ), "AllocateWhiteLightmap: m_numSortIDs = %d %s\n", m_numSortIDs, pMaterial->GetName() );

											OutputDebugString( buf );

								#endif

										}

								//		Warning( "%d material: \"%s\" lightmapPageID: -1\n", m_numSortIDs, pMaterial->GetName() );

										m_currentWhiteLightmapMaterial = pMaterial;

										pMaterial->SetNeedsWhiteLightmap( true );

									}


									return m_numSortIDs;

								}


								//-----------------------------------------------------------------------------

								// Releases/restores lightmap pages

								//-----------------------------------------------------------------------------

								void CMatLightmaps::ReleaseLightmapPages()

								{

									switch ( m_eLightmapsState )

									{

									case STATE_DEFAULT:

										// Allow release in default state only

										break;


									default:

										Warning( "ReleaseLightmapPages is expected in STATE_DEFAULT, current state = %d, discarded.\n", m_eLightmapsState );

										Assert( !"ReleaseLightmapPages is expected in STATE_DEFAULT" );

										return;

									}


									for( int i = 0; i < GetNumLightmapPages(); i++ )

									{

										g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] );

									}


									GetMaterialSystem()->GetPaintmaps()->ReleasePaintmaps();


									// We are now in released state

									m_eLightmapsState = STATE_RELEASED;

								}


								void CMatLightmaps::RestoreLightmapPages()

								{

									switch ( m_eLightmapsState )

									{

									case STATE_RELEASED:

										// Allow restore in released state only

										break;


									default:

										Warning( "RestoreLightmapPages is expected in STATE_RELEASED, current state = %d, discarded.\n", m_eLightmapsState );

										Assert( !"RestoreLightmapPages is expected in STATE_RELEASED" );

										return;

									}


									// Switch to default state to allow allocations

									m_eLightmapsState = STATE_DEFAULT;


									if( GetMaterialSystem()->GetPaintmaps()->IsEnabled() )

									{

										GetMaterialSystem()->GetPaintmaps()->RestorePaintmaps( GetNumLightmapPages() );

									}


									for( int i = 0; i < GetNumLightmapPages(); i++ )

									{

										AllocateLightmapTexture( i );

									}

								}


								//-----------------------------------------------------------------------------

								// This initializes the lightmap bits

								//-----------------------------------------------------------------------------

								void CMatLightmaps::InitLightmapBits( int lightmap )

								{

									VPROF_( "CMatLightmaps::InitLightmapBits", 1, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 );

									int width = GetLightmapWidth(lightmap);

									int height = GetLightmapHeight(lightmap);


									CPixelWriter writer;


									g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] );

									if ( !g_pShaderAPI->TexLock( 0, 0, 0, 0, width, height, writer ) )

										return;


									// Debug mode, make em green checkerboard

									if ( writer.IsUsingFloatFormat() )

									{

										for ( int j = 0; j < height; ++j )

										{

											writer.Seek( 0, j );

											for ( int k = 0; k < width; ++k )

											{

								#ifndef _DEBUG

												writer.WritePixel( 1.0f, 1.0f, 1.0f );

								#else // _DEBUG

												if( ( j + k ) & 1 )

												{

													writer.WritePixelF( 0.0f, 1.0f, 0.0f );

												}

												else

												{

													writer.WritePixelF( 0.0f, 0.0f, 0.0f );

												}

								#endif // _DEBUG

											}

										}

									}

									else

									{

								#if defined( _X360 ) && defined( _DEBUG )

										float vGreenData[4] =  { 0.0f, 2.0f, 0.0f, 0.0f };

										fltx4 vGreen = LoadUnalignedSIMD( vGreenData );

								#endif

										for ( int j = 0; j < height; ++j )

										{

											writer.Seek( 0, j );

											for ( int k = 0; k < width; ++k )

											{

								#ifndef _DEBUG

												// note: make this white to find multisample centroid sampling problems.

												//				writer.WritePixel( 255, 255, 255 );

												#ifdef _X360

												{

													writer.WritePixel( Four_Zeros );

												}

												#else

												{

													writer.WritePixel( 0, 0, 0 );

												}

												#endif

								#else // _DEBUG

												#ifdef _X360

												{

													if ( ( j + k ) & 1 )

													{

														writer.WritePixel( vGreen );

													}

													else

													{

														writer.WritePixel( Four_Zeros );

													}

												}

												#else

												{

													if ( ( j + k ) & 1 )

													{

														writer.WritePixel( 0, 255, 0 );

													}

													else

													{

														writer.WritePixel( 0, 0, 0 );

													}

												}

												#endif // _X360

								#endif // _DEBUG

											}

										}

									}


									g_pShaderAPI->TexUnlock();

								}


								bool CMatLightmaps::LockLightmap( int lightmap )

								{

								//	Warning( "locking lightmap page: %d\n", lightmap );

									VPROF_INCREMENT_COUNTER( "lightmap fullpage texlock", 1 );

									if( m_nLockedLightmap != -1 )

									{

										g_pShaderAPI->TexUnlock();

									}

									g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] );

									int pageWidth  = m_pLightmapPages[lightmap].m_Width;

									int pageHeight = m_pLightmapPages[lightmap].m_Height;

									if (!g_pShaderAPI->TexLock( 0, 0, 0, 0,	pageWidth, pageHeight, m_LightmapPixelWriter ))

									{

										Assert( 0 );

										return false;

									}

									m_nLockedLightmap = lightmap;

									return true;

								}


								Vector4D ConvertLightmapColorToRGBScale( const float *lightmapColor )

								{

									Vector4D result;


									float fScale = lightmapColor[0];

									for( int i = 1; i != 3; ++i )

									{

										if( lightmapColor[i] > fScale )

											fScale = lightmapColor[i];

									}


									fScale = ceil( fScale * (255.0f/16.0f) ) * (16.0f/255.0f);

									fScale = MIN( fScale, 16.0f );


									float fInvScale = 1.0f / fScale;


									for( int i = 0; i != 3; ++i )

									{

										result[i] = lightmapColor[i] * fInvScale;

										result[i] = ceil( result[i] * 255.0f ) * (1.0f/255.0f);

										result[i] = MIN( result[i], 1.0f );

									}


									fScale /= 16.0f;


									result.w = fScale;


									return result;

								}


								#ifdef _X360

								// SIMD version of above

								// input numbers from pSrc are on the domain [0..16]

								// output is RGBA

								// ignores contents of w channel of input

								// the shader does this: rOut = Rin * Ain * 16.0f

								// where Rin is [0..1], a float computed from a byte value [0..255]

								// Ain is therefore the brightest channel (say R) divided by 16 and quantized

								// Rin is computed from pSrc->r by dividing by Ain

								// this outputs RGBa where RGB are [0..255] and a is the shader's scaling factor (also 0..255)

								//

								// WARNING - this code appears to be vulnerable to a compiler bug. Be very careful modifying and be

								// sure to test

								fltx4 ConvertLightmapColorToRGBScale( FLTX4 lightmapColor )

								{


									static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};

									static const fltx4 FourPoint1s = { 0.1, 0.1, 0.1, 0.1 };

									static const fltx4 vTwoFiftyFiveOverSixteen = {255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f};

									// static const fltx4 vSixteenOverTwoFiftyFive = { 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f };


									// find the highest color value in lightmapColor and replicate it

									fltx4 scale = FindHighestSIMD3( lightmapColor );

									fltx4 minscale = FindLowestSIMD3( lightmapColor );

									fltx4 fl4OutofRange = OrSIMD( CmpGeSIMD( scale, Four_Ones ), CmpLeSIMD( scale, FourPoint1s ) );

									fl4OutofRange = OrSIMD( fl4OutofRange, CmpGtSIMD( minscale, MulSIMD( Four_PointFives, scale ) ) );


									// scale needs to be divided by 16 (because the shader multiplies it by 16)

									// then mapped to 0..255 and quantized.

									scale = __vrfip(MulSIMD(scale, vTwoFiftyFiveOverSixteen)); // scale = ceil(scale * 255/16)


									fltx4 result = MulSIMD(vTwoFiftyFive, lightmapColor); // start the scale cooking on the final result


									fltx4 invScale = ReciprocalEstSIMD(scale); // invScale = (16/255)(1/scale). may be +inf

									invScale = MulSIMD(invScale, vTwoFiftyFiveOverSixteen); // take the quantizing factor back out

																							// of the inverse scale (one less

																							// dependent op if you do it this way)


									// scale the input channels

									// compute so the numbers are all 0..255 ints. (if one happens to

									// be 256 due to numerical error in the reciprocation, the unsigned-saturate

									// store we'll use later on will bake it back down to 255)

									result = MulSIMD(result, invScale);


									// now, output --

									// if the input color was nonzero, slip the scale into return value's w

									// component and return. If the input was zero, return zero.


									result = MaskedAssign(

										fl4OutofRange,

										SetWSIMD( result, scale ),

										SetWSIMD( MulSIMD( lightmapColor, vTwoFiftyFive ), vTwoFiftyFiveOverSixteen ) );

									return result;

								}

								#endif


								// write bumped lightmap update to LDR 8-bit lightmap

								void CMatLightmaps::BumpedLightmapBitsToPixelWriter_LDR( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2,

									float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )

								{

									const int nLightmapSize0 = pLightmapSize[0];

									const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize();

									const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );


									for( int t = 0; t < pLightmapSize[1]; t++ )

									{

										int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );

										m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


										for( int s = 0; s < nLightmapSize0;

											s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))

										{

											unsigned char color[4][4];


											ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

												&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

												&pFloatImageBump3[srcTexelOffset],

												color[0], color[1], color[2], color[3] );


											if ( HardwareConfig()->GetCSMAccurateBlending() )

											{

												ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3],

																						 &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3],

																						 &color[0][3], &color[1][3], &color[2][3], &color[3][3] );

											}

											else

											{

												unsigned char alpha =  RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f );

												color[0][3] = alpha;

												color[1][3] = alpha;

												color[2][3] = alpha;

												color[3][3] = alpha;

											}


											m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], color[0][3] );


											m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

											m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], color[1][3] );


											m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

											m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], color[2][3] );


											m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

											m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], color[3][3] );

										}

									}

									if ( pfmOut )

									{

										for( int t = 0; t < pLightmapSize[1]; t++ )

										{

											int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );

											for( int s = 0;  s < nLightmapSize0; s++,srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))

											{

												unsigned char color[4][4];


												ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

													&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

													&pFloatImageBump3[srcTexelOffset],

													color[0], color[1], color[2], color[3] );


												unsigned char alpha =  RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f );

												// Write data to the bitmapped represenations so that PFM files can be written

												PixRGBAF pixelData;

												pixelData.Red = color[0][0];

												pixelData.Green = color[0][1];

												pixelData.Blue = color[0][2];

												pixelData.Alpha = alpha;

												pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData);

											}

										}


									}

								}


								// write bumped lightmap update to HDR float lightmap

								void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRF( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2,

																				 float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )

								{

									if ( IsX360() )

									{

										// 360 does not support HDR float mode

										Assert( 0 );

										return;

									}


									Assert( !pfmOut );		// unsupported in this mode


									const int nLightmapSize0 = pLightmapSize[0];

									const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize();

									const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );


									for( int t = 0; t < pLightmapSize[1]; t++ )

									{

										int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );

										m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


										// if it's anything but 4 x float16 on a PPC...

								 		/*

										// The 'else' path uses ConvertFourFloatsTo16BitsAtOnce which is entirely broken

										// so we need to always use the main path.

										if ( !IsGameConsole() ||

								 			 !(m_LightmapPixelWriter.GetPixelSize() == 4*sizeof(unsigned short)) ||

								 			 !(m_LightmapPixelWriter.IsUsing16BitFloatFormat())

											 )*/

										{

											for( int s = 0;

												s < nLightmapSize0;

												s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))

											{

												float color[4][4];


												// [mariod] - LinearToBumpedLightmap() was entirely missing in the float path as of September '11

												// looks like this only affected PS3 (PC/X360 use linear 16bit tex formats)

												ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

													&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

													&pFloatImageBump3[srcTexelOffset],

													color[0], color[1], color[2], color[3] );


												if ( HardwareConfig()->GetCSMAccurateBlending() )

												{

													ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3],

																							 &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3],

																							 &color[0][3], &color[1][3], &color[2][3], &color[3][3] );

												}

												else

												{

													float alpha = pFloatImage[srcTexelOffset+3];

													color[0][3] = alpha;

													color[1][3] = alpha;

													color[2][3] = alpha;

													color[3][3] = alpha;

												}


												m_LightmapPixelWriter.WritePixelNoAdvanceF( color[0][0], color[0][1], color[0][2], color[0][3] );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter.WritePixelNoAdvanceF( color[1][0], color[1][1], color[1][2], color[1][3] );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter.WritePixelNoAdvanceF( color[2][0], color[2][1], color[2][2], color[2][3] );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter.WritePixelNoAdvanceF( color[3][0], color[3][1], color[3][2], color[3][3] );

											}

										}

										/*

										else // use a faster technique on PPC cores for float16 lightmaps, that's not so branchy and load-hit-store-y

										{

											for( int s = 0;

												s < nLightmapSize0;

												s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))

											{


												float color[4][4];


												// [mariod] - LinearToBumpedLightmap() was entirely missing in the float path as of September '11

												// looks like this only affected PS3 (PC/X360 use linear 16bit tex formats)

												ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

													&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

													&pFloatImageBump3[srcTexelOffset],

													color[0], color[1], color[2], color[3] );


												float alpha = pFloatImage[srcTexelOffset+3];


												float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(),

													&color[0][0], &color[0][1], &color[0][2], &alpha );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(),

													&color[1][0], &color[1][1], &color[1][2], &alpha );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(),

													&color[2][0], &color[2][1], &color[2][2], &alpha );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												float16::ConvertFourFloatsTo16BitsAtOnce( (float16*) m_LightmapPixelWriter.GetCurrentPixel(),

													&color[3][0], &color[3][1], &color[3][2], &alpha );

											}

										}

										*/

									}

								}


								#ifdef _X360

								#pragma optimize("u", on)

								#endif


								#ifdef _X360


								namespace {

									// pack a pixel into BGRA8888 and return it with the data packed into the w component

								FORCEINLINE fltx4 PackPixel_BGRA8888( FLTX4 rgba )

								{

									// this happens to be in an order such that we can use the handy builtin packing op

									// clamp to 0..255 (coz it might have leaked over)

									static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};


									// the magic number such that when mul-accummulated against rbga,

									// gets us a representation 3.0 + (r)*2^-22 -- puts the bits at

									// the bottom of the float

									static const XMVECTOR   PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22)

									static const XMVECTOR   Three = {3.0f, 3.0f, 3.0f, 3.0f};


									fltx4 N = MinSIMD(vTwoFiftyFive, rgba);


									N = __vmaddfp(N, PackScale, Three);

									N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 0);  // pack into w word

									return N;

								}


								// A small store-gather buffer used in the

								// BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360().

								// The store-gather buffers. Hopefully these will live in the L1

								// cache, which will make writing to them, then to memory, faster

								// than just using __stvewx to write directly into WC memory

								// one noncontiguous float at a time. (If there weren't a huge

								// compiler bug with __stvewx in the Apr07 XDK, that might not

								// be the case.)

								struct ALIGN128 CPixelWriterStoreGather

								{

									enum {

										kRows = 4,

										kWordsPerRow = 32,

									};


									ALIGN128 uint32 m_data[kRows][kWordsPerRow]; // four rows of bgra data, aligned to 4 cache lines. dwords so memcpy works better.

									int m_wordsGathered;

									int m_bytesBetweenWriterRows; // the number of bytes spacing the maps inside the writer from each other

																// if we weren't gathering, we'd SkipBytes this many between the base map, bump1, etc.


									// write four rows, as SIMD registers, into the buffers

									inline void write( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0,  FLTX4 row1,  FLTX4 row2,  FLTX4 row3 ) RESTRICT

									{

										// if full, commit

										Assert(m_wordsGathered <= kWordsPerRow);

										AssertMsg((m_wordsGathered & 3) == 0, "Don't call CPixelWriterStoreGather::write after ::writeJustX"); // single-word writes have misaligned me

										if (m_wordsGathered >= kWordsPerRow)

										{

											commitWhenFull(pLightmapPixelWriter);

										}


										XMStoreVector4A( &m_data[0][m_wordsGathered], row0 );

										XMStoreVector4A( &m_data[1][m_wordsGathered], row1 );

										XMStoreVector4A( &m_data[2][m_wordsGathered], row2 );

										XMStoreVector4A( &m_data[3][m_wordsGathered], row3 );


										m_wordsGathered += 4 ; // four words per simd vec

									}


									// pluck the w component out of each of the rows, and store it into the gather buffer. Don't

									// call the other write function after calling this.

									inline void writeJustW( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0,  FLTX4 row1,  FLTX4 row2,  FLTX4 row3 ) RESTRICT

									{

										// if full, commit

										Assert(m_wordsGathered <= kWordsPerRow);

										if (m_wordsGathered >= kWordsPerRow)

										{

											commitWhenFull(pLightmapPixelWriter);

										}


										// for each fltx4, splat out x and then use the __stvewx to store

										// whichever word happens to align with the float pointer through

										// that pointer.


										__stvewx(__vspltw(row0, 3), &m_data[0][m_wordsGathered], 0 );

										__stvewx(__vspltw(row1, 3), &m_data[1][m_wordsGathered], 0 );

										__stvewx(__vspltw(row2, 3), &m_data[2][m_wordsGathered], 0 );

										__stvewx(__vspltw(row3, 3), &m_data[3][m_wordsGathered], 0 );


										m_wordsGathered += 1 ; // only stored one word

									}


									// Commit my buffers to the pixelwriter's memory, and advance its

									// pointer.

									void commit(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT

									{

										if (m_wordsGathered > 0)

										{

											unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel();

											// we have to use memcpy because we're writing to non-cacheable memory,

											// but we can't even assume that the addresses we're writing to are

											// vector-aligned.

								#ifdef memcpy // if someone's overriden the intrinsic, complain

								#pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.")

								#endif


											memcpy(pWriteInto, m_data[0], m_wordsGathered * sizeof(uint32));

											pWriteInto += m_bytesBetweenWriterRows;

											memcpy(pWriteInto, m_data[1], m_wordsGathered * sizeof(uint32));

											pWriteInto += m_bytesBetweenWriterRows;

											memcpy(pWriteInto, m_data[2], m_wordsGathered * sizeof(uint32));

											pWriteInto += m_bytesBetweenWriterRows;

											memcpy(pWriteInto, m_data[3], m_wordsGathered * sizeof(uint32));


											pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32));

											m_wordsGathered = 0;

										}

									}


									// like commit, but the version we use when we know we're full.

									// Takes advantage of better compile-time generation for

									// memcpy.

									void commitWhenFull(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT

									{

										unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel();

										// we have to use memcpy because we're writing to non-cacheable memory,

										// but we can't even assume that the addresses we're writing to are

										// vector-aligned.

								#ifdef memcpy // if someone's overriden the intrinsic, complain

								#pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.")

								#endif


										// if we're full, use compile-time known version of

										// mempcy to take advantage of its ability to generate

										// inline code. In fact, use the dword-aligned

										// version so that we use the 64-bit writing funcs.

										Assert( m_wordsGathered == kWordsPerRow );

										COMPILE_TIME_ASSERT((kWordsPerRow & 3) == 0); // the number of words per row has to be a multiple of four


										memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[0]), kWordsPerRow * sizeof(uint32));

										pWriteInto += m_bytesBetweenWriterRows;

										memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[1]), kWordsPerRow * sizeof(uint32));

										pWriteInto += m_bytesBetweenWriterRows;

										memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[2]), kWordsPerRow * sizeof(uint32));

										pWriteInto += m_bytesBetweenWriterRows;

										memcpy(pWriteInto, reinterpret_cast<uint64* RESTRICT>(m_data[3]), kWordsPerRow * sizeof(uint32));


										pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32));

										m_wordsGathered = 0;

									}


									// parameter: space between bump pages in the pixelwriter

									CPixelWriterStoreGather(int writerSizeBytes) : m_wordsGathered(0), m_bytesBetweenWriterRows(writerSizeBytes) {};


								};

								}


								// this is a function for specifically writing bumped BGRA lightmaps -- in order for it

								// to be properly scheduled, I needed to break out the inline functions. Also,

								// to make the write-combined memory more efficient (and work around a bug in the

								// April 2007 XDK), we need to store-gather our writes on the cache before blasting

								// them out to write-combined memory. We can't simply write from the SIMD registers

								// into the pixelwriter's data, because the difference between the output rows,

								// eg nLightmap0WriterSizeBytes[0], might not be a multiple of 16. Unaligned stores

								// to non-cacheable memory cause an alignment exception.

								static void BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2,

																					  float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut,

																					  CPixelWriter * RESTRICT m_LightmapPixelWriter)

								{

									AssertMsg(m_LightmapPixelWriter->GetPixelSize() == 4, "BGRA format is no longer four bytes long? This is unsupported on 360, and probably immoral as well.");

									const int nLightmap0WriterSizeBytes = pLightmapSize[0] * 4 /*m_LightmapPixelWriter->GetPixelSize()*/;

									// const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - 4 );


									// assert that 1 * 4 = 4

									COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4);


									AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n");


									// The store-gather buffers. Hopefully these will live in the L1

									// cache, which will make writing to them, then to memory, faster

									// than just using __stvewx to write directly into WC memory

									// one noncontiguous float at a time. (If there weren't a huge

									// compiler bug with __stvewx in the Apr07 XDK, that might not

									// be the case.)

									CPixelWriterStoreGather storeGather(nLightmap0WriterSizeBytes);


									for( int t = 0; t < pLightmapSize[1]; t++ )

									{

								#define	FOUR (sizeof( Vector4D ) / sizeof( float ))  //  make explicit when we're incrementing by length of a 4dvec

										int srcTexelOffset = ( FOUR ) * ( 0 + t * pLightmapSize[0] );

										m_LightmapPixelWriter->Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


										// Our code works best when we can process luxels in groups of four. So,

										// figure out how many four-luxel groups we can process,

										// then do them in groups, then process the remainder.

										unsigned int groupsOfFourLimit = (((unsigned int)pLightmapSize[0]) & ~3);


										// we want to hang on to this index when we're done with groups so we can do the remainder.

										unsigned int s; // counts the number of luxels processed

										for( s = 0;

											s < groupsOfFourLimit;

											s += 4, srcTexelOffset += 4 * ( FOUR ))

										{

											static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};

											// the store-gather simds

											fltx4 outBaseMap = Four_Zeros, outBump1 = Four_Zeros, outBump2 = Four_Zeros, outBump3 = Four_Zeros;

											// we'll read four at a time

											fltx4 vFloatImage[4], vFloatImageBump1[4], vFloatImageBump2[4], vFloatImageBump3[4];


											// stripe these loads to cause less ERAT thrashing

											vFloatImage[0]	  = LoadUnalignedSIMD(pFloatImage	   + srcTexelOffset );

											vFloatImage[1]	  = LoadUnalignedSIMD(pFloatImage	   + srcTexelOffset + 4 );

											vFloatImage[2]	  = LoadUnalignedSIMD(pFloatImage	   + srcTexelOffset + 8 );

											vFloatImage[3]	  = LoadUnalignedSIMD(pFloatImage	   + srcTexelOffset + 12 );


											vFloatImageBump1[0] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset );

											vFloatImageBump1[1] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 4 );

											vFloatImageBump1[2] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 8 );

											vFloatImageBump1[3] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 12 );


											vFloatImageBump2[0] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset );

											vFloatImageBump2[1] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 4 );

											vFloatImageBump2[2] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 8 );

											vFloatImageBump2[3] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 12 );


											vFloatImageBump3[0] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset );

											vFloatImageBump3[1] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 4 );

											vFloatImageBump3[2] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 8 );

											vFloatImageBump3[3] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 12 );


											// perform an arcane averaging operation upon the bump map values

											// (todo: make this not an inline so it will schedule better -- inlining is

											//  done by the linker, which is too late for operation scheduling)

											ColorSpace::LinearToBumpedLightmap( vFloatImage[0],	vFloatImageBump1[0],

																				vFloatImageBump2[0], vFloatImageBump3[0],

																				// transform "in place":

																				vFloatImage[0], vFloatImageBump1[0],

																				vFloatImageBump2[0], vFloatImageBump3[0] );

											ColorSpace::LinearToBumpedLightmap( vFloatImage[1],	vFloatImageBump1[1],

																				vFloatImageBump2[1], vFloatImageBump3[1],

																				// transform "in place":

																				vFloatImage[1], vFloatImageBump1[1],

																				vFloatImageBump2[1], vFloatImageBump3[1] );

											ColorSpace::LinearToBumpedLightmap( vFloatImage[2],	vFloatImageBump1[2],

																				vFloatImageBump2[2], vFloatImageBump3[2],

																				// transform "in place":

																				vFloatImage[2], vFloatImageBump1[2],

																				vFloatImageBump2[2], vFloatImageBump3[2] );

											ColorSpace::LinearToBumpedLightmap( vFloatImage[3],	vFloatImageBump1[3],

																				vFloatImageBump2[3], vFloatImageBump3[3],

																				// transform "in place":

																				vFloatImage[3], vFloatImageBump1[3],

																				vFloatImageBump2[3], vFloatImageBump3[3] );


											// convert each color to RGB scaled.

											// DO NOT! make this into a for loop. The (April07 XDK) compiler

											// in fact DOES NOT unroll them, and will perform very naive

											// scheduling if you try.


											// clamp to 0..16 float

											vFloatImage[0]		= MinSIMD(vFloatImage[0], vSixteen);

											vFloatImageBump1[0] = MinSIMD(vFloatImageBump1[0], vSixteen);

											vFloatImageBump2[0] = MinSIMD(vFloatImageBump2[0], vSixteen);

											vFloatImageBump3[0] = MinSIMD(vFloatImageBump3[0], vSixteen);


											vFloatImage[1]		= MinSIMD(vFloatImage[1], vSixteen);

											vFloatImageBump1[1] = MinSIMD(vFloatImageBump1[1], vSixteen);

											vFloatImageBump2[1] = MinSIMD(vFloatImageBump2[1], vSixteen);

											vFloatImageBump3[1] = MinSIMD(vFloatImageBump3[1], vSixteen);


											vFloatImage[2]		= MinSIMD(vFloatImage[2], vSixteen);

											vFloatImageBump1[2] = MinSIMD(vFloatImageBump1[2], vSixteen);

											vFloatImageBump2[2] = MinSIMD(vFloatImageBump2[2], vSixteen);

											vFloatImageBump3[2] = MinSIMD(vFloatImageBump3[2], vSixteen);


											vFloatImage[3]		= MinSIMD(vFloatImage[3], vSixteen);

											vFloatImageBump1[3] = MinSIMD(vFloatImageBump1[3], vSixteen);

											vFloatImageBump2[3] = MinSIMD(vFloatImageBump2[3], vSixteen);

											vFloatImageBump3[3] = MinSIMD(vFloatImageBump3[3], vSixteen);


											// compute the scaling factor, place it in w, and

											// scale the rest by it. Obliterates whatever was

											// already in alpha.

											// This code is why it is important to not use a for

											// loop: you need to let the compiler keep the value

											// on registers (which it can't do if you use a

											// variable indexed array) and interleave the

											// inlined instructions.


											vFloatImage[0]		= PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[0]) );

											vFloatImageBump1[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[0]) );

											vFloatImageBump2[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[0]) );

											vFloatImageBump3[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[0]) );


											vFloatImage[1]		= PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[1]) );

											vFloatImageBump1[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[1]) );

											vFloatImageBump2[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[1]) );

											vFloatImageBump3[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[1]) );


											vFloatImage[2]		= PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[2]) );

											vFloatImageBump1[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[2]) );

											vFloatImageBump2[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[2]) );

											vFloatImageBump3[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[2]) );


											vFloatImage[3]		= PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[3]) );

											vFloatImageBump1[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[3]) );

											vFloatImageBump2[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[3]) );

											vFloatImageBump3[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[3]) );


											// Each of the registers above contains one RGBA 32-bit struct

											// in their w word. So, combine them such that each of the assignees

											// below contains four RGBAs, in xyzw order (big-endian).


											outBaseMap = __vrlimi(outBaseMap, vFloatImage[0], 8, 3 ); // insert into x

											outBump1 =	 __vrlimi(outBump1, vFloatImageBump1[0], 8, 3 ); // insert into x

											outBump2 =	 __vrlimi(outBump2, vFloatImageBump2[0], 8, 3 ); // insert into x

											outBump3 =	 __vrlimi(outBump3, vFloatImageBump3[0], 8, 3 ); // insert into x


											outBaseMap = __vrlimi(outBaseMap, vFloatImage[1], 4, 2 ); // insert into y

											outBump1 =	 __vrlimi(outBump1, vFloatImageBump1[1], 4, 2 ); // insert into y

											outBump2 =	 __vrlimi(outBump2, vFloatImageBump2[1], 4, 2 ); // insert into y

											outBump3 =	 __vrlimi(outBump3, vFloatImageBump3[1], 4, 2 ); // insert into y


											outBaseMap = __vrlimi(outBaseMap, vFloatImage[2], 2, 1 ); // insert into z

											outBump1 =	 __vrlimi(outBump1, vFloatImageBump1[2], 2, 1 ); // insert into z

											outBump2 =	 __vrlimi(outBump2, vFloatImageBump2[2], 2, 1 ); // insert into z

											outBump3 =	 __vrlimi(outBump3, vFloatImageBump3[2], 2, 1 ); // insert into z


											outBaseMap = __vrlimi(outBaseMap, vFloatImage[3], 1, 0 ); // insert into w

											outBump1 =	 __vrlimi(outBump1, vFloatImageBump1[3], 1, 0 ); // insert into w

											outBump2 =	 __vrlimi(outBump2, vFloatImageBump2[3], 1, 0 ); // insert into w

											outBump3 =	 __vrlimi(outBump3, vFloatImageBump3[3], 1, 0 ); // insert into w


											// push the data through the store-gather buffer.

											storeGather.write(m_LightmapPixelWriter, outBaseMap, outBump1, outBump2, outBump3);


										}


										// Once here, make sure we've committed any leftover changes, then process

										// the remainders singly.

										storeGather.commit(m_LightmapPixelWriter);


										for( ;  // s is where it should be from the loop above

											s < (unsigned int) pLightmapSize[0];

											s++,

												// m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel), // now handled by store-gather

												srcTexelOffset += ( FOUR ))

										{


											static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};

											fltx4 vColor[4];

											fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]);

											fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]);

											fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]);

											fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]);


											// perform an arcane averaging operation upon the bump map values

											ColorSpace::LinearToBumpedLightmap( vFloatImage,

												vFloatImageBump1, vFloatImageBump2,

												vFloatImageBump3,

												vColor[0], vColor[1], vColor[2], vColor[3] );


											// convert each color to RGB scaled.

											// DO NOT! make this into a for loop. The (April07 XDK) compiler

											// in fact DOES NOT unroll them, and will perform very naive

											// scheduling if you try.


											// clamp to 0..16 float

											vColor[0] = MinSIMD(vColor[0], vSixteen);

											vColor[1] = MinSIMD(vColor[1], vSixteen);

											vColor[2] = MinSIMD(vColor[2], vSixteen);

											vColor[3] = MinSIMD(vColor[3], vSixteen);


											// compute the scaling factor, place it in w, and

											// scale the rest by it. Obliterates whatever was

											// already in alpha.

											// This code is why it is important to not use a for

											// loop: you need to let the compiler interleave the

											// inlined instructions.

											vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] );

											vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] );

											vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] );

											vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] );


								#ifdef X360_DOUBLECHECK_LIGHTMAPS

											unsigned short color[4][4];


											ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

												&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

												&pFloatImageBump3[srcTexelOffset],

												color[0], color[1], color[2], color[3] );

											unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 );

											color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;


											if( IsX360() )

											{

												for( int i = 0; i != 4; ++i )

												{

													Vector4D vRGBScale;


													vRGBScale.x = color[i][0] * (16.0f / 65535.0f);

													vRGBScale.y = color[i][1] * (16.0f / 65535.0f);

													vRGBScale.z = color[i][2] * (16.0f / 65535.0f);

													vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );

													color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f );

													color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f );

													color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f );

													color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f );

												}

											}


											/*

											for (int ii = 0; ii < 4; ++ii)

											{

												uint32 pack = (PackPixel_BGRA8888( vColor[ii] ).u[3]);

												if (color[ii][3] != 0)

												Assert(	color[ii][0] == (pack & 0xFF0000) >> 16	&&

														color[ii][1] == (pack & 0xFF00) >> 8		&&

														color[ii][2] == (pack & 0xFF)				&&

														color[ii][3] == (pack & 0xFF000000) >> 24 );

											}

											*/


								#endif


												vColor[0] = PackPixel_BGRA8888( vColor[0] );

												vColor[1] = PackPixel_BGRA8888( vColor[1] );

												vColor[2] = PackPixel_BGRA8888( vColor[2] );

												vColor[3] = PackPixel_BGRA8888( vColor[3] );


												storeGather.writeJustW(m_LightmapPixelWriter, vColor[0], vColor[1], vColor[2], vColor[3] );


												/* // here is the old way of writing pixels:

												// now we store-gather this

												m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[0] );

												Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[0] ).u[3] );

												void * RESTRICT pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[1], pBits );

												Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[1] ).u[3] );

												pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[2], pBits );

												Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[2] ).u[3] );

												pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[3], pBits );

												Assert(*reinterpret_cast<unsigned int *>(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[3] ).u[3] );


												m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel);

												*/

										}


										storeGather.commit(m_LightmapPixelWriter);


									}

								}


								#endif //_X360


								// write bumped lightmap update to HDR integer lightmap

								void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2,

																				 float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) RESTRICT

								{

									const int nLightmapSize0 = pLightmapSize[0];

									const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize();

									const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() );


									if( m_LightmapPixelWriter.IsUsingFloatFormat() )

									{

										AssertMsg(!IsX360(), "Tried to use a floating-point pixel format for lightmaps on 360, which is not supported.");

										if (!IsX360())

										{

											for( int t = 0; t < pLightmapSize[1]; t++ )

											{

												int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );

												m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


												for( int s = 0;

													s < nLightmapSize0;

													s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))

												{

													unsigned short color[4][4];


													ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

														&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

														&pFloatImageBump3[srcTexelOffset],

														color[0], color[1], color[2], color[3] );

													float alpha = pFloatImage[srcTexelOffset+3];

													Assert( alpha >= 0.0f && alpha <= 1.0f );


													if ( HardwareConfig()->GetCSMAccurateBlending() )

													{

														float alphaF[4];


														ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3],

																								 &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3],

																								 &alphaF[0], &alphaF[1], &alphaF[2], &alphaF[3] );


														unsigned short alphaUS[4];

														alphaUS[0] = ColorSpace::LinearToUnsignedShort( alphaF[0], 16 );

														alphaUS[1] = ColorSpace::LinearToUnsignedShort( alphaF[1], 16 );

														alphaUS[2] = ColorSpace::LinearToUnsignedShort( alphaF[2], 16 );

														alphaUS[3] = ColorSpace::LinearToUnsignedShort( alphaF[3], 16 );


														color[0][3] = alphaUS[0];

														color[1][3] = alphaUS[1];

														color[2][3] = alphaUS[2];

														color[3][3] = alphaUS[3];

													}

													else

													{

														color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;

													}


													float toFloat = ( 1.0f / ( float )( 1 << 16 ) );


													/* // This code is now a can't-happen, because we do not allow float formats on 360.

								#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )

													if( IsX360() )

													{

														for( int i = 0; i != 4; ++i )

														{

															Vector4D vRGBScale;


															vRGBScale.x = color[i][0] * (16.0f / 65535.0f);

															vRGBScale.y = color[i][1] * (16.0f / 65535.0f);

															vRGBScale.z = color[i][2] * (16.0f / 65535.0f);

															vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );

															color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f );

															color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f );

															color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f );

															color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f );

														}


														toFloat = ( 1.0f / ( float )( 1 << 8 ) );

													}

								#endif

													*/


													m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[0][0], toFloat * color[0][1], toFloat * color[0][2], toFloat * color[0][3] );


													m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

													m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[1][0], toFloat * color[1][1], toFloat * color[1][2], toFloat * color[1][3] );


													m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

													m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[2][0], toFloat * color[2][1], toFloat * color[2][2], toFloat * color[2][3] );


													m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

													m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[3][0], toFloat * color[3][1], toFloat * color[3][2], toFloat * color[3][3] );

												}

											}

										}

									}

									else

									{

								#ifndef X360_USE_SIMD_LIGHTMAP

										for( int t = 0; t < pLightmapSize[1]; t++ )

										{

											int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 );

											m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


											for( int s = 0;

												s < nLightmapSize0;

												s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float)))

											{

												unsigned short color[4][4];


												ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset],

													&pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset],

													&pFloatImageBump3[srcTexelOffset],

													color[0], color[1], color[2], color[3] );


												if ( HardwareConfig()->GetCSMAccurateBlending() )

												{

													float alpha[4];

													ColorSpace::LinearToBumpedLightmapAlpha( &pFloatImage[srcTexelOffset + 3],

																							 &pFloatImageBump1[srcTexelOffset + 3], &pFloatImageBump2[srcTexelOffset + 3], &pFloatImageBump3[srcTexelOffset + 3],

																							 &alpha[0], &alpha[1], &alpha[2], &alpha[3] );


													unsigned short alphaUS[4];

													alphaUS[0] = ColorSpace::LinearToUnsignedShort( alpha[0], 16 );

													alphaUS[1] = ColorSpace::LinearToUnsignedShort( alpha[1], 16 );

													alphaUS[2] = ColorSpace::LinearToUnsignedShort( alpha[2], 16 );

													alphaUS[3] = ColorSpace::LinearToUnsignedShort( alpha[3], 16 );


													color[0][3] = alphaUS[0];

													color[1][3] = alphaUS[1];

													color[2][3] = alphaUS[2];

													color[3][3] = alphaUS[3];

												}

												else

												{

													unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 );

													color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha;

												}


								#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )

												if( IsX360() )

												{

													for( int i = 0; i != 4; ++i )

													{

														Vector4D vRGBScale;


														vRGBScale.x = color[i][0] * (16.0f / 65535.0f);

														vRGBScale.y = color[i][1] * (16.0f / 65535.0f);

														vRGBScale.z = color[i][2] * (16.0f / 65535.0f);

														vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );

														color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f );

														color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f );

														color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f );

														color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f );

													}

												}

								#endif

												m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], color[0][3] );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], color[1][3] );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], color[2][3] );


												m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

												m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], color[3][3] );


												// Write data to the bitmapped represenations so that PFM files can be written

												if ( pfmOut )

												{

													PixRGBAF pixelData;

													pixelData.Red = color[0][0];

													pixelData.Green = color[0][1];

													pixelData.Blue = color[0][2];

													pixelData.Alpha = color[0][3];

													pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData);

												}

											}

										}

								#else

										// this is an optimized XBOX implementation. For a clearer

										// presentation of the algorithm, see the PC implementation

										// above.

										// First check for the most common case, using an efficient

										// branch rather than a switch:

										if (m_LightmapPixelWriter.GetFormat() == IMAGE_FORMAT_LINEAR_BGRA8888)

										{

											// broken out into a static to make things more readable

											// and be nicer to the instruction cache

											BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( pFloatImage, pFloatImageBump1, pFloatImageBump2,

												pFloatImageBump3, pLightmapSize, pOffsetIntoLightmapPage, pfmOut, &m_LightmapPixelWriter );

										}

										else

										{

											// This case is used in Portal 2 to fill RGBA16161616 lightmaps

											Assert( m_LightmapPixelWriter.GetPixelSize() == 8 );


											for( int t = 0; t < pLightmapSize[1]; t++ )

											{

												// assert that 1 * 4 = 4

												COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4);

								#define	FOUR (sizeof( Vector4D ) / sizeof( float ))  // in case this ever changes

												int srcTexelOffset = ( FOUR ) * ( 0 + t * nLightmapSize0 );

												m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


												for( int s = 0;

													s < nLightmapSize0;

													s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += ( FOUR ))

												{


													static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};

													fltx4 vColor[4];

													fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]);

													fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]);

													fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]);

													fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]);


													// perform an arcane averaging operation upon the bump map values

													ColorSpace::LinearToBumpedLightmap( vFloatImage,

														vFloatImageBump1, vFloatImageBump2,

														vFloatImageBump3,

														vColor[0], vColor[1], vColor[2], vColor[3] );


													// convert each color to RGB scaled.

													// DO NOT! make this into a for loop. The (April07 XDK) compiler

													// in fact DOES NOT unroll them, and will perform very naive

													// scheduling if you try.


													// clamp to 0..16 float

													vColor[0] = MinSIMD(vColor[0], vSixteen);

													vColor[1] = MinSIMD(vColor[1], vSixteen);

													vColor[2] = MinSIMD(vColor[2], vSixteen);

													vColor[3] = MinSIMD(vColor[3], vSixteen);


													// Not doing the following anymore. This path is for writing 16161616 int lightmaps.

													/*

													// compute the scaling factor, transform the RGB,

													// and place the scale in w. Obliterates whatever was

													// already in alpha.

													// This code is why it is important to not use a for

													// loop: you need to let the compiler interleave the

													// inlined instructions.

													vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] );

													vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] );

													vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] );

													vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] );

													*/


													m_LightmapPixelWriter.WritePixelNoAdvance( vColor[0] );

													m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

													m_LightmapPixelWriter.WritePixelNoAdvance( vColor[1] );

													m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

													m_LightmapPixelWriter.WritePixelNoAdvance( vColor[2] );

													m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes );

													m_LightmapPixelWriter.WritePixelNoAdvance( vColor[3] );


													AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n");


													// Write data to the bitmapped represenations so that PFM files can be written

													if ( pfmOut )

													{

														Warning("**************************************************\n"

																"Lightmap output to files on 360 HAS BEEN DISABLED.\n"

																"A grave error has just occurred.\n"

																"**************************************************\n");

														DebuggerBreakIfDebugging();

														/*

														PixRGBAF pixelData;

														pixelData.Red = color[0][0];

														pixelData.Green = color[0][1];

														pixelData.Blue = color[0][2];

														pixelData.Alpha = alpha;

														pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData);

														*/

													}

												}

											}

										}

								#endif

									}

								}


								void CMatLightmaps::LightmapBitsToPixelWriter_LDR( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )

								{

									// non-HDR lightmap processing

									float *pSrc = pFloatImage;

									for( int t = 0; t < pLightmapSize[1]; ++t )

									{

										m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

										for( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )

										{

											unsigned char color[4];

											ColorSpace::LinearToLightmap( color, pSrc );


											if ( HardwareConfig()->GetCSMAccurateBlending() )

											{

												ColorSpace::LinearToLightmapAlpha( &color[3], pSrc[3] );

											}

											else

											{

												color[3] = RoundFloatToByte( pSrc[3] * 255.0f );

											}


											m_LightmapPixelWriter.WritePixel( color[0], color[1], color[2], color[3] );


											if ( pfmOut )

											{

												// Write data to the bitmapped represenations so that PFM files can be written

												PixRGBAF pixelData;

												pixelData.Red = color[0];

												pixelData.Green = color[1];

												pixelData.Blue = color[2];

												pixelData.Alpha = color[3];

												pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData );

											}

										}

									}

								}


								void CMatLightmaps::LightmapBitsToPixelWriter_HDRF( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut )

								{

									if ( IsX360() )

									{

										// 360 does not support HDR float

										Assert( 0 );

										return;

									}


									// float HDR lightmap processing

									float *pSrc = pFloatImage;

									for ( int t = 0; t < pLightmapSize[1]; ++t )

									{

										m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );


										if ( HardwareConfig()->GetCSMAccurateBlending() )

										{

											ColorSpace::LinearToLightmapAlpha( &pSrc[3] );

										}


										for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )

										{

											m_LightmapPixelWriter.WritePixelF( pSrc[0], pSrc[1], pSrc[2], pSrc[3] );

										}

									}

								}


								// numbers come in on the domain [0..16]

								void CMatLightmaps::LightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t * RESTRICT pfmOut )

								{

								#ifndef X360_USE_SIMD_LIGHTMAP

									// PC code (and old, pre-SIMD xbox version -- unshippably slow)

									if ( m_LightmapPixelWriter.IsUsingFloatFormat() )

									{

										// integer HDR lightmap processing

										float *pSrc = pFloatImage;

										for ( int t = 0; t < pLightmapSize[1]; ++t )

										{

											m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

											for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )

											{

												int r, g, b, a;


												r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] );

												g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] );

												b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );


												if ( HardwareConfig()->GetCSMAccurateBlending() )

												{

													ColorSpace::LinearToLightmapAlpha( &a, pSrc[3] );

												}

												else

												{

													a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );

												}


												float toFloat = ( 1.0f / ( float )( 1 << 16 ) );


								#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )

												if( IsX360() )

												{

													Vector4D vRGBScale;


													vRGBScale.x = r * (16.0f / 65535.0f);

													vRGBScale.y = g * (16.0f / 65535.0f);

													vRGBScale.z = b * (16.0f / 65535.0f);

													vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );


													r = RoundFloatToByte( vRGBScale.x * 255.0f );

													g = RoundFloatToByte( vRGBScale.y * 255.0f );

													b = RoundFloatToByte( vRGBScale.z * 255.0f );

													a = RoundFloatToByte( vRGBScale.w * 255.0f );


													toFloat = ( 1.0f / ( float )( 1 << 8 ) );

												}


								#endif

												Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f );

												m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] );

											}

										}

									}

									else

									{

										// integer HDR lightmap processing

										float *pSrc = pFloatImage;

										for ( int t = 0; t < pLightmapSize[1]; ++t )

										{

											m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

											for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )

											{

												int r, g, b, a;


												r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] );

												g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] );

												b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );


												if ( HardwareConfig()->GetCSMAccurateBlending() )

												{

													ColorSpace::LinearToLightmapAlpha( &a, pSrc[3] );

												}

												else

												{

													a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );

												}


								#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )

												if( IsX360() )

												{

													Vector4D vRGBScale;


													vRGBScale.x = r * (16.0f / 65535.0f);

													vRGBScale.y = g * (16.0f / 65535.0f);

													vRGBScale.z = b * (16.0f / 65535.0f);

													vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );


													r = RoundFloatToByte( vRGBScale.x * 255.0f );

													g = RoundFloatToByte( vRGBScale.y * 255.0f );

													b = RoundFloatToByte( vRGBScale.z * 255.0f );

													a = RoundFloatToByte( vRGBScale.w * 255.0f );

												}

								#endif

												m_LightmapPixelWriter.WritePixel( r, g, b, a );


												if ( pfmOut )

												{

													// Write data to the bitmapped represenations so that PFM files can be written

													PixRGBAF pixelData;

													pixelData.Red = pSrc[0];

													pixelData.Green = pSrc[1];

													pixelData.Blue = pSrc[2];

													pixelData.Alpha = pSrc[3];

													pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, 0, pixelData );

												}

											}

										}

									}

								#else

									// XBOX360 code

									if ( m_LightmapPixelWriter.IsUsingFloatFormat() )

									{

										if( IsX360() )

										{

											AssertMsg( false, "Float-format pixel writers do not exist on x360." );

										}

										else

										{	// This code is here as an example only, in case floating point

											// format is restored to 360.


											// integer HDR lightmap processing

											float * RESTRICT pSrc = pFloatImage;

											for ( int t = 0; t < pLightmapSize[1]; ++t )

											{

												m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

												for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) )

												{

													int r, g, b, a;


													r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] );

													g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] );

													b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] );

													a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 );


													float toFloat = ( 1.0f / ( float )( 1 << 16 ) );


								#if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) )

													if( IsX360() )

													{

														Vector4D vRGBScale;


														vRGBScale.x = r * (16.0f / 65535.0f);

														vRGBScale.y = g * (16.0f / 65535.0f);

														vRGBScale.z = b * (16.0f / 65535.0f);

														vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x );


														r = RoundFloatToByte( vRGBScale.x * 255.0f );

														g = RoundFloatToByte( vRGBScale.y * 255.0f );

														b = RoundFloatToByte( vRGBScale.z * 255.0f );

														a = RoundFloatToByte( vRGBScale.w * 255.0f );


														toFloat = ( 1.0f / ( float )( 1 << 8 ) );

													}


								#endif

													Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f );

													m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] );

												}

											}

										}

									}

									else

									{

										// This is the fast X360 pathway.


										// integer HDR lightmap processing

										float * RESTRICT pSrc = pFloatImage;

										// Assert((reinterpret_cast<unsigned int>(pSrc) & 15) == 0); // 16-byte aligned?

										COMPILE_TIME_ASSERT(sizeof(Vector4D)/sizeof(*pSrc) == 4); // assert that 1 * 4 = 4


										// input numbers from pSrc are on the domain [0..+inf]

										// we clamp them to the range [0..16]

										// output is RGBA

										// the shader does this: rOut = Rin * Ain * 16.0f

										// where Rin is [0..1], a float computed from a byte value [0..255]

										// Ain is therefore the brightest channel (say R) divided by 16 and quantized

										// Rin is computed from pSrc->r by dividing by Ain


										// rather than switching inside WritePixel for each different format,

										// thus causing a 23-cycle pipeline clear for every pixel, we'll

										// branch on the format here. That will allow us to unroll the inline

										// pixel write functions differently depending on their different

										// latencies.


										Assert(!pfmOut); // should never happen on 360.

								#ifndef ALLOW_PFM_OUTPUT_ON_360

										if ( pfmOut )

										{

											Warning("*****************************************\n"

													"Lightmap output on 360 HAS BEEN DISABLED.\n"

													"A grave error has just occurred.\n"

													"*****************************************\n");

										}

								#endif


										// switch once, here, outside the loop, rather than

										// switching inside each pixel. Switches are not fast

										// on x360: they are usually implemented as jumps

										// through function tables, which have a 24-cycle

										// stall.

										switch (m_LightmapPixelWriter.GetFormat())

										{

											// note: format names are low-order-byte first.

										case IMAGE_FORMAT_RGBA8888:

										case IMAGE_FORMAT_LINEAR_RGBA8888:

										{

											for ( int t = 0; t < pLightmapSize[1]; ++t )

											{

												m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

												for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 )

												{

													static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};

													fltx4 rgba = LoadUnalignedSIMD(pSrc);


													// clamp to 0..16 float

													rgba = MinSIMD(rgba, vSixteen);

													// compute the scaling factor, place it in w, and

													// scale the rest by it.

													rgba = ConvertLightmapColorToRGBScale( rgba );

													// rgba is now  float 0..255 in each component

													m_LightmapPixelWriter.WritePixelNoAdvance_RGBA8888(rgba);


													/*  // not supported on X360

													if ( pfmOut )

													{

														// Write data to the bitmapped represenations so that PFM files can be written

														PixRGBAF pixelData;

														XMStoreVector4(&pixelData,rgba);

														pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData );

													}

													*/

												}

											}

											break;

										}


										case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.

										case IMAGE_FORMAT_LINEAR_BGRA8888:

										{

											for ( int t = 0; t < pLightmapSize[1]; ++t )

											{

												m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

												for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 )

												{

													static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};

													fltx4 rgba = LoadUnalignedSIMD(pSrc);


													// clamp to 0..16 float

													rgba = MinSIMD(rgba, vSixteen);

													// compute the scaling factor, place it in w, and

													// scale the rest by it.

													rgba = ConvertLightmapColorToRGBScale( rgba );

													// rgba is now  float 0..255 in each component

													m_LightmapPixelWriter.WritePixelNoAdvance_BGRA8888(rgba);

													// forcibly advance

													m_LightmapPixelWriter.SkipBytes(4);


													/* // not supported on X360

													if ( pfmOut )

													{

														// Write data to the bitmapped represenations so that PFM files can be written

														PixRGBAF pixelData;

														XMStoreVector4(&pixelData,rgba);

														pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData );

													}

													*/

												}

											}

											break;

										}


										case IMAGE_FORMAT_RGBA16161616:

										case IMAGE_FORMAT_LINEAR_RGBA16161616:

											{

												for ( int t = 0; t < pLightmapSize[1]; ++t )

												{

													m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t );

													for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 )

													{

														static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f};

														fltx4 rgba = LoadUnalignedSIMD(pSrc);

														rgba = MinSIMD(rgba, vSixteen);	// clamp to 0..16 float

														m_LightmapPixelWriter.WritePixelNoAdvance_RGBA16161616(rgba);

														m_LightmapPixelWriter.SkipBytes(8);

													}

												}

												break;

											}


										default:

											AssertMsg1(false,"Unsupported pixel format %d while writing lightmaps!", m_LightmapPixelWriter.GetFormat() );

											Warning("Unsupported pixel format used in lightmap. Lightmaps could not be downloaded.\n");

											break;

										}

									}

								#endif

								}


								void CMatLightmaps::BeginUpdateLightmaps( void )

								{

									CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal();

									if ( pCallQueue )

									{

										pCallQueue->QueueCall( this, &CMatLightmaps::BeginUpdateLightmaps );

										return;

									}


									m_nUpdatingLightmapsStackDepth++;

								}


								void CMatLightmaps::EndUpdateLightmaps( void )

								{

									CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal();

									if ( pCallQueue )

									{

										pCallQueue->QueueCall( this, &CMatLightmaps::EndUpdateLightmaps );

										return;

									}


									m_nUpdatingLightmapsStackDepth--;

									Assert( m_nUpdatingLightmapsStackDepth >= 0 );

									if( m_nUpdatingLightmapsStackDepth <= 0 && m_nLockedLightmap != -1 )

									{

										g_pShaderAPI->TexUnlock();

										m_nLockedLightmap = -1;

									}

								}


								int CMatLightmaps::AllocateDynamicLightmap( int lightmapSize[2], int *pOutOffsetIntoPage, int frameID )

								{

									// check frameID, fail if current

									for ( int i = 0; i < COUNT_DYNAMIC_LIGHTMAP_PAGES; i++ )

									{

										int dynamicIndex = (m_dynamic.currentDynamicIndex + i) % COUNT_DYNAMIC_LIGHTMAP_PAGES;

										int lightmapPageIndex = m_firstDynamicLightmap + dynamicIndex;

										if ( m_dynamic.lightmapLockFrame[dynamicIndex] != frameID )

										{

											m_dynamic.lightmapLockFrame[dynamicIndex] = frameID;

											m_dynamic.imagePackers[dynamicIndex].Reset( 0, m_pLightmapPages[lightmapPageIndex].m_Width, m_pLightmapPages[lightmapPageIndex].m_Height );

										}


										if ( m_dynamic.imagePackers[dynamicIndex].AddBlock( lightmapSize[0], lightmapSize[1], &pOutOffsetIntoPage[0], &pOutOffsetIntoPage[1] ) )

										{

											return lightmapPageIndex;

										}

									}


									return -1;

								}


								//-----------------------------------------------------------------------------

								// Updates the lightmap

								//-----------------------------------------------------------------------------

								void CMatLightmaps::UpdateLightmap( int lightmapPageID, int lightmapSize[2],

																	  int offsetIntoLightmapPage[2],

																	  float *pFloatImage, float *pFloatImageBump1,

																	  float *pFloatImageBump2, float *pFloatImageBump3 )

								{

									VPROF( "CMatRenderContext::UpdateLightmap" );


									bool hasBump = false;

									int uSize = 1;

									FloatBitMap_t *pfmOut = NULL;

									if ( pFloatImageBump1 && pFloatImageBump2 && pFloatImageBump3 )

									{

										hasBump = true;

										uSize = 4;

									}


									if ( lightmapPageID >= GetNumLightmapPages() || lightmapPageID < 0 )

									{

										Error( "MaterialSystem_Interface_t::UpdateLightmap lightmapPageID=%d out of range\n", lightmapPageID );

										return;

									}

									bool bDynamic = IsDynamicLightmap(lightmapPageID);


									if ( bDynamic )

									{

										int dynamicIndex = lightmapPageID-m_firstDynamicLightmap;

										Assert(dynamicIndex < COUNT_DYNAMIC_LIGHTMAP_PAGES);

										m_dynamic.currentDynamicIndex = (dynamicIndex + 1) % COUNT_DYNAMIC_LIGHTMAP_PAGES;

									}


									if ( mat_lightmap_pfms.GetBool())

									{

										// Allocate and initialize lightmap data that will be written to a PFM file

										if (NULL == m_pLightmapDataPtrArray[lightmapPageID])

										{

											m_pLightmapDataPtrArray[lightmapPageID] = new FloatBitMap_t(m_pLightmapPages[lightmapPageID].m_Width, m_pLightmapPages[lightmapPageID].m_Height);

											m_pLightmapDataPtrArray[lightmapPageID]->Clear(0, 0, 0, 1);

										}

										pfmOut = m_pLightmapDataPtrArray[lightmapPageID];

									}


									// NOTE: Change how the lock is taking place if you ever change how bumped

									// lightmaps are put into the page. Right now, we assume that they're all

									// added to the right of the original lightmap.

									bool bLockSubRect;

									{

										VPROF_( "Locking lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); // vprof scope


										bLockSubRect = m_nUpdatingLightmapsStackDepth <= 0 && !bDynamic;

										if( bLockSubRect )

										{

											VPROF_INCREMENT_COUNTER( "lightmap subrect texlock", 1 );

											g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmapPageID] );

											if (!g_pShaderAPI->TexLock( 0, 0, offsetIntoLightmapPage[0], offsetIntoLightmapPage[1],

												lightmapSize[0] * uSize, lightmapSize[1], m_LightmapPixelWriter ))

											{

												return;

											}

										}

										else if( lightmapPageID != m_nLockedLightmap )

										{

											if ( !LockLightmap( lightmapPageID ) )

											{

												ExecuteNTimes( 10, Warning( "Failed to lock lightmap\n" ) );

												return;

											}

										}

									}


									int subRectOffset[2] = {0,0};


									{

										// account for the part spent in math:

										VPROF_( "LightmapBitsToPixelWriter", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 );

								#ifdef _PS3

										// PS3 uses 16-bit half floats per channel...but the HDR_TYPE_FLOAT codepath has a lot of other assumptions, so just

										// lie about the format right here on PS3 only

										if ( hasBump )

										{

											BumpedLightmapBitsToPixelWriter_HDRF( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,

												lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

										}

										else

										{

											LightmapBitsToPixelWriter_HDRF( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

										}

								#else // _PS3

										if ( hasBump )

										{

											switch( HardwareConfig()->GetHDRType() )

											{

											case HDR_TYPE_NONE:

												BumpedLightmapBitsToPixelWriter_LDR( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,

													lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

												break;

											case HDR_TYPE_INTEGER:

												BumpedLightmapBitsToPixelWriter_HDRI( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,

													lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

												break;

											case HDR_TYPE_FLOAT:

												BumpedLightmapBitsToPixelWriter_HDRF( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3,

													lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

												break;

											}

										}

										else

										{

											switch ( HardwareConfig()->GetHDRType() )

											{

											case HDR_TYPE_NONE:

												LightmapBitsToPixelWriter_LDR( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

												break;


											case HDR_TYPE_INTEGER:

												LightmapBitsToPixelWriter_HDRI( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

												break;


											case HDR_TYPE_FLOAT:

												LightmapBitsToPixelWriter_HDRF( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut );

												break;


											default:

												Assert( 0 );

												break;

											}

										}

								#endif // !_PS3

									}


									if( bLockSubRect )

									{

										VPROF_( "Unlocking Lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 );

										g_pShaderAPI->TexUnlock();

									}

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								int	CMatLightmaps::GetNumSortIDs( void )

								{

									return m_numSortIDs;

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								void CMatLightmaps::ComputeSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha )

								{

									int lightmapPageID;


									for ( MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )

									{

										IMaterialInternal* pMaterial = GetMaterialInternal(i);


										if ( pMaterial->GetMinLightmapPageID() > pMaterial->GetMaxLightmapPageID() )

										{

											continue;

										}


										//	const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY );

										//	if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ||

										//		( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) )

										//	{

										//		return true;

										//	}


								//		Warning( "sort stuff: %s %s\n", material->GetName(), bAlpha ? "alpha" : "not alpha" );


										// fill in the lightmapped materials

										for ( lightmapPageID = pMaterial->GetMinLightmapPageID();

											 lightmapPageID <= pMaterial->GetMaxLightmapPageID(); ++lightmapPageID )

										{

											pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion();

											pInfo[sortId].lightmapPageID = lightmapPageID;

								#if 0

											char buf[128];

											Q_snprintf( buf, sizeof( buf ), "ComputeSortInfo: %s lightmapPageID: %d sortID: %d\n", pMaterial->GetName(), lightmapPageID, sortId );

											OutputDebugString( buf );

								#endif

											++sortId;

										}

									}

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								void CMatLightmaps::ComputeWhiteLightmappedSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha )

								{

									for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) )

									{

										IMaterialInternal* pMaterial = GetMaterialInternal(i);


										// fill in the lightmapped materials that are actually used by this level

										if( pMaterial->GetNeedsWhiteLightmap() &&

											( pMaterial->GetReferenceCount() > 0 ) )

										{

											// const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY );

											//		if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ||

											//			( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) )

											//		{

											//			return true;

											//		}


											pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion();

											if( pMaterial->GetPropertyFlag( MATERIAL_PROPERTY_NEEDS_BUMPED_LIGHTMAPS ) )

											{

												pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP;

											}

											else

											{

												pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE;

											}


											sortId++;

										}

									}

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								void CMatLightmaps::GetSortInfo( MaterialSystem_SortInfo_t *pSortInfoArray )

								{

									// sort non-alpha blended materials first

									int sortId = 0;

									ComputeSortInfo( pSortInfoArray, sortId, false );

									ComputeWhiteLightmappedSortInfo( pSortInfoArray, sortId, false );

									Assert( m_numSortIDs == sortId );

								}


								//-----------------------------------------------------------------------------

								//

								//-----------------------------------------------------------------------------

								void CMatLightmaps::EnableLightmapFiltering( bool enabled )

								{

									int i;

									for( i = 0; i < GetNumLightmapPages(); i++ )

									{

										g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[i] );

										if( enabled )

										{

											g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR );

											g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR );

										}

										else

										{

											g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_NEAREST );

											g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_NEAREST );

										}

									}

								}