Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2205 lines
80 KiB

  1. //========= Copyright � 1996-2005, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //=============================================================================//
  8. #include "render_pch.h"
  9. #include "gl_lightmap.h"
  10. #include "view.h"
  11. #include "gl_cvars.h"
  12. #include "zone.h"
  13. #include "gl_water.h"
  14. #include "r_local.h"
  15. #include "gl_model_private.h"
  16. #include "mathlib/bumpvects.h"
  17. #include "gl_matsysiface.h"
  18. #include <float.h>
  19. #include "materialsystem/imaterialsystemhardwareconfig.h"
  20. #include "materialsystem/imesh.h"
  21. #include "tier0/dbg.h"
  22. #include "tier0/vprof.h"
  23. #include "tier1/callqueue.h"
  24. #include "lightcache.h"
  25. #include "cl_main.h"
  26. #include "materialsystem/imaterial.h"
  27. #include "utlsortvector.h"
  28. #include "cache_hints.h"
  29. // memdbgon must be the last include file in a .cpp file!!!
  30. #include "tier0/memdbgon.h"
  31. //-----------------------------------------------------------------------------
  32. // globals
  33. //-----------------------------------------------------------------------------
  34. // Only enable this if you are testing lightstyle performance.
  35. //#define UPDATE_LIGHTSTYLES_EVERY_FRAME
  36. ALIGN128 Vector4D blocklights[NUM_BUMP_VECTS+1][ MAX_LIGHTMAP_DIM_INCLUDING_BORDER * MAX_LIGHTMAP_DIM_INCLUDING_BORDER ];
  37. ConVar r_avglightmap( "r_avglightmap", "0", FCVAR_CHEAT | FCVAR_MATERIAL_SYSTEM_THREAD );
  38. ConVar r_maxdlights( "r_maxdlights", "32" );
  39. // Disable dlights on console by default (for the sake of memory and perf):
  40. #ifdef _GAMECONSOLE
  41. ConVar r_dlightsenable( "r_dlightsenable", "0", FCVAR_CHEAT | FCVAR_MATERIAL_SYSTEM_THREAD );
  42. #else
  43. ConVar r_dlightsenable( "r_dlightsenable", "1", FCVAR_CHEAT | FCVAR_MATERIAL_SYSTEM_THREAD );
  44. #endif
  45. extern ConVar r_unloadlightmaps;
  46. extern ConVar r_keepstyledlightmapsonly;
  47. extern bool g_bHunkAllocLightmaps;
  48. static int r_dlightvisible;
  49. static int r_dlightvisiblethisframe;
  50. static int s_nVisibleDLightCount;
  51. static int s_nMaxVisibleDLightCount;
  52. //-----------------------------------------------------------------------------
  53. // Visible, not visible DLights
  54. //-----------------------------------------------------------------------------
  55. void R_MarkDLightVisible( int dlight )
  56. {
  57. if ( (r_dlightvisible & ( 1 << dlight )) == 0 )
  58. {
  59. ++s_nVisibleDLightCount;
  60. r_dlightvisible |= 1 << dlight;
  61. }
  62. }
  63. void R_MarkDLightNotVisible( int dlight )
  64. {
  65. if ( r_dlightvisible & ( 1 << dlight ))
  66. {
  67. --s_nVisibleDLightCount;
  68. r_dlightvisible &= ~( 1 << dlight );
  69. }
  70. }
  71. //-----------------------------------------------------------------------------
  72. // Must call these at the start + end of rendering each view
  73. //-----------------------------------------------------------------------------
  74. void R_DLightStartView()
  75. {
  76. r_dlightvisiblethisframe = 0;
  77. s_nMaxVisibleDLightCount = r_maxdlights.GetInt();
  78. }
  79. void R_DLightEndView()
  80. {
  81. if ( !g_bActiveDlights )
  82. return;
  83. for( int lnum=0 ; lnum<MAX_DLIGHTS; lnum++ )
  84. {
  85. if ( r_dlightvisiblethisframe & ( 1 << lnum ))
  86. continue;
  87. R_MarkDLightNotVisible( lnum );
  88. }
  89. }
  90. //-----------------------------------------------------------------------------
  91. // Can we use another dynamic light, or is it just too expensive?
  92. //-----------------------------------------------------------------------------
  93. bool R_CanUseVisibleDLight( int dlight )
  94. {
  95. r_dlightvisiblethisframe |= (1 << dlight);
  96. if ( r_dlightvisible & ( 1 << dlight ) )
  97. return true;
  98. if ( s_nVisibleDLightCount >= s_nMaxVisibleDLightCount )
  99. return false;
  100. R_MarkDLightVisible( dlight );
  101. return true;
  102. }
  103. //-----------------------------------------------------------------------------
  104. // Adds a single dynamic light
  105. //-----------------------------------------------------------------------------
  106. static bool AddSingleDynamicLight( dlight_t& dl, SurfaceHandle_t surfID, const Vector &lightOrigin, float perpDistSq, float lightRadiusSq )
  107. {
  108. // transform the light into brush local space
  109. Vector local;
  110. // Spotlight early outs...
  111. if (dl.m_OuterAngle)
  112. {
  113. if (dl.m_OuterAngle < 180.0f)
  114. {
  115. // Can't light anything from the rear...
  116. if (DotProduct(dl.m_Direction, MSurf_Plane( surfID ).normal) >= 0.0f)
  117. return false;
  118. }
  119. }
  120. // Transform the light center point into (u,v) space of the lightmap
  121. mtexinfo_t* tex = MSurf_TexInfo( surfID );
  122. local[0] = DotProduct (lightOrigin, tex->lightmapVecsLuxelsPerWorldUnits[0].AsVector3D()) +
  123. tex->lightmapVecsLuxelsPerWorldUnits[0][3];
  124. local[1] = DotProduct (lightOrigin, tex->lightmapVecsLuxelsPerWorldUnits[1].AsVector3D()) +
  125. tex->lightmapVecsLuxelsPerWorldUnits[1][3];
  126. // Now put the center points into the space of the lightmap rectangle
  127. // defined by the lightmapMins + lightmapExtents
  128. local[0] -= MSurf_LightmapMins( surfID )[0];
  129. local[1] -= MSurf_LightmapMins( surfID )[1];
  130. // Figure out the quadratic attenuation factor...
  131. Vector intensity;
  132. float lightStyleValue = LightStyleValue( dl.style );
  133. intensity[0] = TexLightToLinear( dl.color.r, dl.color.exponent ) * lightStyleValue;
  134. intensity[1] = TexLightToLinear( dl.color.g, dl.color.exponent ) * lightStyleValue;
  135. intensity[2] = TexLightToLinear( dl.color.b, dl.color.exponent ) * lightStyleValue;
  136. float minlight = fpmax( g_flMinLightingValue, dl.minlight );
  137. float ooQuadraticAttn = lightRadiusSq * minlight;
  138. float ooRadiusSq = 1.0f / lightRadiusSq;
  139. // Compute a color at each luxel
  140. // We want to know the square distance from luxel center to light
  141. // so we can compute an 1/r^2 falloff in light color
  142. int smax = MSurf_LightmapExtents( surfID )[0] + 1;
  143. int tmax = MSurf_LightmapExtents( surfID )[1] + 1;
  144. for (int t=0; t<tmax; ++t)
  145. {
  146. float td = (local[1] - t) * tex->worldUnitsPerLuxel;
  147. for (int s=0; s<smax; ++s)
  148. {
  149. float sd = (local[0] - s) * tex->worldUnitsPerLuxel;
  150. float inPlaneDistSq = sd * sd + td * td;
  151. float totalDistSq = inPlaneDistSq + perpDistSq;
  152. if (totalDistSq < lightRadiusSq)
  153. {
  154. // at least all floating point only happens when a luxel is lit.
  155. float scale = (totalDistSq != 0.0f) ? ooQuadraticAttn / totalDistSq : 1.0f;
  156. // Apply a little extra attenuation
  157. scale *= (1.0f - totalDistSq * ooRadiusSq);
  158. if (scale > 2.0f)
  159. scale = 2.0f;
  160. int idx = t*smax + s;
  161. // Compute the base lighting just as is done in the non-bump case...
  162. blocklights[0][idx][0] += scale * intensity[0];
  163. blocklights[0][idx][1] += scale * intensity[1];
  164. blocklights[0][idx][2] += scale * intensity[2];
  165. }
  166. }
  167. }
  168. return true;
  169. }
  170. //-----------------------------------------------------------------------------
  171. // Adds a dynamic light to the bumped lighting
  172. //-----------------------------------------------------------------------------
  173. static void AddSingleDynamicLightToBumpLighting( dlight_t& dl, SurfaceHandle_t surfID,
  174. const Vector &lightOrigin, float perpDistSq, float lightRadiusSq, Vector* pBumpBasis, const Vector& luxelBasePosition )
  175. {
  176. Vector local;
  177. // FIXME: For now, only elights can be spotlights
  178. // the lightmap computation could get expensive for spotlights...
  179. Assert( dl.m_OuterAngle == 0.0f );
  180. // Transform the light center point into (u,v) space of the lightmap
  181. mtexinfo_t *pTexInfo = MSurf_TexInfo( surfID );
  182. local[0] = DotProduct (lightOrigin, pTexInfo->lightmapVecsLuxelsPerWorldUnits[0].AsVector3D()) +
  183. pTexInfo->lightmapVecsLuxelsPerWorldUnits[0][3];
  184. local[1] = DotProduct (lightOrigin, pTexInfo->lightmapVecsLuxelsPerWorldUnits[1].AsVector3D()) +
  185. pTexInfo->lightmapVecsLuxelsPerWorldUnits[1][3];
  186. // Now put the center points into the space of the lightmap rectangle
  187. // defined by the lightmapMins + lightmapExtents
  188. local[0] -= MSurf_LightmapMins( surfID )[0];
  189. local[1] -= MSurf_LightmapMins( surfID )[1];
  190. // Figure out the quadratic attenuation factor...
  191. Vector intensity;
  192. float lightStyleValue = LightStyleValue( dl.style );
  193. intensity[0] = TexLightToLinear( dl.color.r, dl.color.exponent ) * lightStyleValue;
  194. intensity[1] = TexLightToLinear( dl.color.g, dl.color.exponent ) * lightStyleValue;
  195. intensity[2] = TexLightToLinear( dl.color.b, dl.color.exponent ) * lightStyleValue;
  196. float minlight = fpmax( g_flMinLightingValue, dl.minlight );
  197. float ooQuadraticAttn = lightRadiusSq * minlight;
  198. float ooRadiusSq = 1.0f / lightRadiusSq;
  199. // The algorithm here is necessary to make dynamic lights live in the
  200. // same world as the non-bumped dynamic lights. Therefore, we compute
  201. // the intensity of the flat lightmap the exact same way here as when
  202. // we've got a non-bumped surface.
  203. // Then, I compute an actual light direction vector per luxel (FIXME: !!expensive!!)
  204. // and compute what light would have to come in along that direction
  205. // in order to produce the same illumination on the flat lightmap. That's
  206. // computed by dividing the flat lightmap color by n dot l.
  207. Vector lightDirection, texelWorldPosition;
  208. #if 1
  209. bool useLightDirection = (dl.m_OuterAngle != 0.0f) &&
  210. (fabs(dl.m_Direction.LengthSqr() - 1.0f) < 1e-3);
  211. if (useLightDirection)
  212. VectorMultiply( dl.m_Direction, -1.0f, lightDirection );
  213. #endif
  214. // Since there's a scale factor used when going from world to luxel,
  215. // we gotta undo that scale factor when going from luxel to world
  216. float fixupFactor = pTexInfo->worldUnitsPerLuxel * pTexInfo->worldUnitsPerLuxel;
  217. // Compute a color at each luxel
  218. // We want to know the square distance from luxel center to light
  219. // so we can compute an 1/r^2 falloff in light color
  220. int smax = MSurf_LightmapExtents( surfID )[0] + 1;
  221. int tmax = MSurf_LightmapExtents( surfID )[1] + 1;
  222. for (int t=0; t<tmax; ++t)
  223. {
  224. float td = (local[1] - t) * pTexInfo->worldUnitsPerLuxel;
  225. // Move along the v direction
  226. VectorMA( luxelBasePosition, t * fixupFactor, pTexInfo->lightmapVecsLuxelsPerWorldUnits[1].AsVector3D(),
  227. texelWorldPosition );
  228. for (int s=0; s<smax; ++s)
  229. {
  230. float sd = (local[0] - s) * pTexInfo->worldUnitsPerLuxel;
  231. float inPlaneDistSq = sd * sd + td * td;
  232. float totalDistSq = inPlaneDistSq + perpDistSq;
  233. if (totalDistSq < lightRadiusSq)
  234. {
  235. // at least all floating point only happens when a luxel is lit.
  236. float scale = (totalDistSq != 0.0f) ? ooQuadraticAttn / totalDistSq : 1.0f;
  237. // Apply a little extra attenuation
  238. scale *= (1.0f - totalDistSq * ooRadiusSq);
  239. if (scale > 2.0f)
  240. scale = 2.0f;
  241. int idx = t*smax + s;
  242. // Compute the base lighting just as is done in the non-bump case...
  243. VectorMA( blocklights[0][idx].AsVector3D(), scale, intensity, blocklights[0][idx].AsVector3D() );
  244. #if 1
  245. if (!useLightDirection)
  246. {
  247. VectorSubtract( lightOrigin, texelWorldPosition, lightDirection );
  248. VectorNormalize( lightDirection );
  249. }
  250. float lDotN = DotProduct( lightDirection, MSurf_Plane( surfID ).normal );
  251. if (lDotN < 1e-3)
  252. lDotN = 1e-3;
  253. scale *= lDotN;
  254. int i;
  255. for( i = 1; i < NUM_BUMP_VECTS + 1; i++ )
  256. {
  257. float dot = DotProduct( lightDirection, pBumpBasis[i-1] );
  258. if( dot <= 0.0f )
  259. continue;
  260. VectorMA( blocklights[i][idx].AsVector3D(), dot * scale, intensity,
  261. blocklights[i][idx].AsVector3D() );
  262. }
  263. #else
  264. VectorMA( blocklights[1][idx].AsVector3D(), scale, intensity, blocklights[1][idx].AsVector3D() );
  265. VectorMA( blocklights[2][idx].AsVector3D(), scale, intensity, blocklights[2][idx].AsVector3D() );
  266. VectorMA( blocklights[3][idx].AsVector3D(), scale, intensity, blocklights[3][idx].AsVector3D() );
  267. #endif
  268. }
  269. }
  270. // Move along u
  271. VectorMA( texelWorldPosition, fixupFactor,
  272. pTexInfo->lightmapVecsLuxelsPerWorldUnits[0].AsVector3D(), texelWorldPosition );
  273. }
  274. }
  275. //-----------------------------------------------------------------------------
  276. // Compute the bumpmap basis for this surface
  277. //-----------------------------------------------------------------------------
  278. static void R_ComputeSurfaceBasis( SurfaceHandle_t surfID, Vector *pBumpNormals, Vector &luxelBasePosition )
  279. {
  280. // NOTE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  281. // This function gives incorrect results when the plane made by the lightmapVecs isn't parallel to the surface plane.
  282. // buildmodelforworld has similar code that is correct. Probably doesn't matter too much at this point since
  283. // we don't use dlights much anymore.
  284. // NOTE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  285. // Get the bump basis vects in the space of the surface.
  286. Vector sVect, tVect;
  287. VectorCopy( MSurf_TexInfo( surfID )->lightmapVecsLuxelsPerWorldUnits[0].AsVector3D(), sVect );
  288. VectorNormalize( sVect );
  289. VectorCopy( MSurf_TexInfo( surfID )->lightmapVecsLuxelsPerWorldUnits[1].AsVector3D(), tVect );
  290. VectorNormalize( tVect );
  291. GetBumpNormals( sVect, tVect, MSurf_Plane( surfID ).normal, MSurf_Plane( surfID ).normal, pBumpNormals );
  292. // Compute the location of the first luxel in worldspace
  293. // Since there's a scale factor used when going from world to luxel,
  294. // we gotta undo that scale factor when going from luxel to world
  295. float fixupFactor =
  296. MSurf_TexInfo( surfID )->worldUnitsPerLuxel *
  297. MSurf_TexInfo( surfID )->worldUnitsPerLuxel;
  298. // The starting u of the surface is surf->lightmapMins[0];
  299. // since N * P + D = u, N * P = u - D, therefore we gotta move (u-D) along uvec
  300. VectorMultiply( MSurf_TexInfo( surfID )->lightmapVecsLuxelsPerWorldUnits[0].AsVector3D(),
  301. (MSurf_LightmapMins( surfID )[0] - MSurf_TexInfo( surfID )->lightmapVecsLuxelsPerWorldUnits[0][3]) * fixupFactor,
  302. luxelBasePosition );
  303. // Do the same thing for the v direction.
  304. VectorMA( luxelBasePosition,
  305. (MSurf_LightmapMins( surfID )[1] -
  306. MSurf_TexInfo( surfID )->lightmapVecsLuxelsPerWorldUnits[1][3]) * fixupFactor,
  307. MSurf_TexInfo( surfID )->lightmapVecsLuxelsPerWorldUnits[1].AsVector3D(),
  308. luxelBasePosition );
  309. // Move out in the direction of the plane normal...
  310. VectorMA( luxelBasePosition, MSurf_Plane( surfID ).dist, MSurf_Plane( surfID ).normal, luxelBasePosition );
  311. }
  312. //-----------------------------------------------------------------------------
  313. // Purpose: Compute the mask of which dlights affect a surface
  314. // NOTE: Also has the side effect of updating the surface lighting dlight flags!
  315. //-----------------------------------------------------------------------------
  316. unsigned int R_ComputeDynamicLightMask( dlight_t *pLights, SurfaceHandle_t surfID, msurfacelighting_t *pLighting, const matrix3x4_t& entityToWorld )
  317. {
  318. ASSERT_SURF_VALID( surfID );
  319. Vector bumpNormals[3];
  320. Vector luxelBasePosition;
  321. // Displacements do dynamic lights different
  322. if( SurfaceHasDispInfo( surfID ) )
  323. {
  324. return MSurf_DispInfo( surfID )->ComputeDynamicLightMask(pLights);
  325. }
  326. if ( !g_bActiveDlights )
  327. return 0;
  328. int lightMask = 0;
  329. for ( int lnum = 0, testBit = 1, mask = r_dlightactive; lnum < MAX_DLIGHTS; lnum++, mask >>= 1, testBit <<= 1 )
  330. {
  331. if ( mask & 1 )
  332. {
  333. // not lit by this light
  334. if ( !(pLighting->m_fDLightBits & testBit ) )
  335. continue;
  336. // This light doesn't affect the world
  337. if ( pLights[lnum].flags & (DLIGHT_NO_WORLD_ILLUMINATION|DLIGHT_DISPLACEMENT_MASK))
  338. continue;
  339. // This is used to ensure a maximum number of dlights in a frame
  340. if ( !R_CanUseVisibleDLight( lnum ) )
  341. continue;
  342. // Cull surface to light radius
  343. Vector lightOrigin;
  344. VectorITransform( pLights[lnum].origin, entityToWorld, lightOrigin );
  345. // NOTE: Dist can be negative because muzzle flashes can actually get behind walls
  346. // since the gun isn't checked for collision tests.
  347. float perpDistSq = DotProduct (lightOrigin, MSurf_Plane( surfID ).normal) - MSurf_Plane( surfID ).dist;
  348. if (perpDistSq < DLIGHT_BEHIND_PLANE_DIST)
  349. {
  350. // update the surfacelighting and remove this light's bit
  351. pLighting->m_fDLightBits &= ~testBit;
  352. continue;
  353. }
  354. perpDistSq *= perpDistSq;
  355. // If the perp distance > radius of light, blow it off
  356. float lightRadiusSq = pLights[lnum].GetRadiusSquared();
  357. if (lightRadiusSq <= perpDistSq)
  358. {
  359. // update the surfacelighting and remove this light's bit
  360. pLighting->m_fDLightBits &= ~testBit;
  361. continue;
  362. }
  363. lightMask |= testBit;
  364. }
  365. }
  366. return lightMask;
  367. }
  368. //-----------------------------------------------------------------------------
  369. // Purpose: Modifies blocklights[][][] to include the state of the dlights
  370. // affecting this surface.
  371. // NOTE: Can be threaded, should not reference or modify any global state
  372. // other than blocklights.
  373. //-----------------------------------------------------------------------------
  374. void R_AddDynamicLights( dlight_t *pLights, SurfaceHandle_t surfID, const matrix3x4_t& entityToWorld, bool needsBumpmap, unsigned int lightMask )
  375. {
  376. ASSERT_SURF_VALID( surfID );
  377. VPROF( "R_AddDynamicLights" );
  378. // Early-out if dlights are disabled:
  379. if ( !r_dlightsenable.GetBool() )
  380. return;
  381. Vector bumpNormals[3];
  382. bool computedBumpBasis = false;
  383. Vector luxelBasePosition;
  384. // Displacements do dynamic lights different
  385. if( SurfaceHasDispInfo( surfID ) )
  386. {
  387. MSurf_DispInfo( surfID )->AddDynamicLights(pLights, lightMask);
  388. return;
  389. }
  390. // iterate all of the active dynamic lights. Uses several iterators to keep
  391. // the light mask (bit), light index, and active mask current
  392. for ( int lnum = 0, testBit = 1, mask = lightMask; lnum < MAX_DLIGHTS && mask != 0; lnum++, mask >>= 1, testBit <<= 1 )
  393. {
  394. // shift over the mask of active lights each iteration, if this one is active, apply it
  395. if ( mask & 1 )
  396. {
  397. // Cull surface to light radius
  398. Vector lightOrigin;
  399. VectorITransform( pLights[lnum].origin, entityToWorld, lightOrigin );
  400. // NOTE: Dist can be negative because muzzle flashes can actually get behind walls
  401. // since the gun isn't checked for collision tests.
  402. float perpDistSq = DotProduct (lightOrigin, MSurf_Plane( surfID ).normal) - MSurf_Plane( surfID ).dist;
  403. if (perpDistSq < DLIGHT_BEHIND_PLANE_DIST)
  404. continue;
  405. perpDistSq *= perpDistSq;
  406. // If the perp distance > radius of light, blow it off
  407. float lightRadiusSq = pLights[lnum].GetRadiusSquared();
  408. if (lightRadiusSq <= perpDistSq)
  409. continue;
  410. if (!needsBumpmap)
  411. {
  412. AddSingleDynamicLight( pLights[lnum], surfID, lightOrigin, perpDistSq, lightRadiusSq );
  413. continue;
  414. }
  415. // Here, I'm precomputing things needed by bumped lighting that
  416. // are the same for a surface...
  417. if (!computedBumpBasis)
  418. {
  419. R_ComputeSurfaceBasis( surfID, bumpNormals, luxelBasePosition );
  420. computedBumpBasis = true;
  421. }
  422. AddSingleDynamicLightToBumpLighting( pLights[lnum], surfID, lightOrigin, perpDistSq, lightRadiusSq, bumpNormals, luxelBasePosition );
  423. }
  424. }
  425. }
  426. // Fixed point (8.8) color/intensity ratios
  427. #define I_RED ((int)(0.299*255))
  428. #define I_GREEN ((int)(0.587*255))
  429. #define I_BLUE ((int)(0.114*255))
  430. ConVar mat_defaultlightmap( "mat_defaultlightmap", "1", FCVAR_NONE, "Default brightness for lightmaps where none have been created in the level." );
  431. //-----------------------------------------------------------------------------
  432. // Sets all elements in a lightmap to a particular opaque greyscale value
  433. //-----------------------------------------------------------------------------
  434. static void InitLMSamples( Vector4D *pSamples, int nSamples, float value )
  435. {
  436. for( int i=0; i < nSamples; i++ )
  437. {
  438. pSamples[i][0] = pSamples[i][1] = pSamples[i][2] = value;
  439. pSamples[i][3] = 0.0f; // Init the alpha to 0.0
  440. }
  441. }
  442. //-----------------------------------------------------------------------------
  443. // Computes the lightmap size
  444. //-----------------------------------------------------------------------------
  445. static int ComputeLightmapSize( SurfaceHandle_t surfID )
  446. {
  447. int smax = ( MSurf_LightmapExtents( surfID )[0] ) + 1;
  448. int tmax = ( MSurf_LightmapExtents( surfID )[1] ) + 1;
  449. int size = smax * tmax;
  450. int nMaxSize = MSurf_MaxLightmapSizeWithBorder( surfID );
  451. if (size > nMaxSize * nMaxSize)
  452. {
  453. ConMsg("Bad lightmap extents on material \"%s\"\n",
  454. materialSortInfoArray[MSurf_MaterialSortID( surfID )].material->GetName());
  455. return 0;
  456. }
  457. return size;
  458. }
  459. //#ifndef PLATFORM_PPC
  460. #if 1 // 7LS TODO - implement use of pLightmapExtraData in SIMD paths, especially if/when we get dynamic lightmaps working again
  461. //-----------------------------------------------------------------------------
  462. // Compute the portion of the lightmap generated from lightstyles
  463. //-----------------------------------------------------------------------------
  464. static void AccumulateLightstyles( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, float scalar )
  465. {
  466. Assert( pLightmap );
  467. for (int i=0; i<lightmapSize ; ++i)
  468. {
  469. float flR = scalar * TexLightToLinear( pLightmap[i].r, pLightmap[i].exponent );
  470. float flG = scalar * TexLightToLinear( pLightmap[i].g, pLightmap[i].exponent );
  471. float flB = scalar * TexLightToLinear( pLightmap[i].b, pLightmap[i].exponent );
  472. blocklights[0][i][0] += flR;
  473. blocklights[0][i][1] += flG;
  474. blocklights[0][i][2] += flB;
  475. #if defined(_PS3)
  476. blocklights[0][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) : 0.0f;
  477. #else
  478. // this won't work on platforms that have fp lightmaps
  479. // lightmapAlphaData3 implies new data in alpha for fixed CSM blending, old path for compatibility
  480. if ( g_bHasLightmapAlphaData3 )
  481. {
  482. Assert( pLightmapExtraData );
  483. blocklights[ 0 ][ i ][ 3 ] += ( (float)( pLightmapExtraData[ i * 4 ] ) ) * ( 1.0f / 255.0f );
  484. }
  485. else
  486. {
  487. blocklights[0][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) / 16.0f : 0.0f;
  488. }
  489. #endif
  490. }
  491. }
  492. static void AccumulateLightstylesNoAlpha( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, float scalar )
  493. {
  494. Assert( pLightmap );
  495. for ( int i = 0; i < lightmapSize; ++i )
  496. {
  497. float flR = scalar * TexLightToLinear( pLightmap[ i ].r, pLightmap[ i ].exponent );
  498. float flG = scalar * TexLightToLinear( pLightmap[ i ].g, pLightmap[ i ].exponent );
  499. float flB = scalar * TexLightToLinear( pLightmap[ i ].b, pLightmap[ i ].exponent );
  500. blocklights[ 0 ][ i ][ 0 ] += flR;
  501. blocklights[ 0 ][ i ][ 1 ] += flG;
  502. blocklights[ 0 ][ i ][ 2 ] += flB;
  503. }
  504. }
  505. static void AccumulateLightstylesFlat( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, float scalar )
  506. {
  507. Assert( pLightmap );
  508. for (int i=0; i<lightmapSize ; ++i)
  509. {
  510. float flR = scalar * TexLightToLinear( pLightmap->r, pLightmap->exponent );
  511. float flG = scalar * TexLightToLinear( pLightmap->g, pLightmap->exponent );
  512. float flB = scalar * TexLightToLinear( pLightmap->b, pLightmap->exponent );
  513. blocklights[0][i][0] += flR;
  514. blocklights[0][i][1] += flG;
  515. blocklights[0][i][2] += flB;
  516. #if defined(_PS3)
  517. blocklights[0][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) : 0.0f;
  518. #else
  519. // this won't work on platforms that have fp lightmaps
  520. if ( g_bHasLightmapAlphaData3 )
  521. {
  522. Assert( pLightmapExtraData );
  523. blocklights[0][i][3] += ((float)(pLightmapExtraData[i*4])) * (1.0f / 255.0f);
  524. }
  525. else
  526. {
  527. blocklights[0][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) / 16.0f : 0.0f;
  528. }
  529. #endif
  530. }
  531. }
  532. static void AccumulateLightstylesFlatNoAlpha( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, float scalar )
  533. {
  534. Assert( pLightmap );
  535. for ( int i = 0; i < lightmapSize; ++i )
  536. {
  537. float flR = scalar * TexLightToLinear( pLightmap->r, pLightmap->exponent );
  538. float flG = scalar * TexLightToLinear( pLightmap->g, pLightmap->exponent );
  539. float flB = scalar * TexLightToLinear( pLightmap->b, pLightmap->exponent );
  540. blocklights[ 0 ][ i ][ 0 ] += flR;
  541. blocklights[ 0 ][ i ][ 1 ] += flG;
  542. blocklights[ 0 ][ i ][ 2 ] += flB;
  543. }
  544. }
  545. static void AccumulateBumpedLightstyles( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, float scalar )
  546. {
  547. ColorRGBExp32 *pBumpedLightmaps[3];
  548. pBumpedLightmaps[0] = pLightmap + lightmapSize;
  549. pBumpedLightmaps[1] = pLightmap + 2 * lightmapSize;
  550. pBumpedLightmaps[2] = pLightmap + 3 * lightmapSize;
  551. float flR;
  552. float flG;
  553. float flB;
  554. if ( g_bHasLightmapAlphaData3 )
  555. {
  556. Assert( pLightmapExtraData );
  557. }
  558. // I chose to split up the loops this way because it was the best tradeoff
  559. // based on profiles between cache miss + loop overhead
  560. for (int i=0, j=0; i<lightmapSize ; ++i, j+=4 )
  561. {
  562. flR = scalar * TexLightToLinear( pLightmap[i].r, pLightmap[i].exponent );
  563. flG = scalar * TexLightToLinear( pLightmap[i].g, pLightmap[i].exponent );
  564. flB = scalar * TexLightToLinear( pLightmap[i].b, pLightmap[i].exponent );
  565. blocklights[0][i][0] += flR;
  566. blocklights[0][i][1] += flG;
  567. blocklights[0][i][2] += flB;
  568. #if defined(_PS3)
  569. blocklights[0][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) : 0.0f;
  570. #else
  571. // this won't work on platforms that have fp lightmaps
  572. if ( g_bHasLightmapAlphaData3 )
  573. {
  574. blocklights[0][i][3] += ((float)(pLightmapExtraData[j])) * (1.0f / 255.0f);
  575. }
  576. else
  577. {
  578. blocklights[0][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) / 16.0f : 0.0f;
  579. }
  580. #endif
  581. Assert( blocklights[0][i][0] >= 0.0f );
  582. Assert( blocklights[0][i][1] >= 0.0f );
  583. Assert( blocklights[0][i][2] >= 0.0f );
  584. flR = scalar * TexLightToLinear( pBumpedLightmaps[0][i].r, pBumpedLightmaps[0][i].exponent );
  585. flG = scalar * TexLightToLinear( pBumpedLightmaps[0][i].g, pBumpedLightmaps[0][i].exponent );
  586. flB = scalar * TexLightToLinear( pBumpedLightmaps[0][i].b, pBumpedLightmaps[0][i].exponent );
  587. blocklights[1][i][0] += flR;
  588. blocklights[1][i][1] += flG;
  589. blocklights[1][i][2] += flB;
  590. #if defined(_PS3)
  591. blocklights[1][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) : 0.0f;
  592. #else
  593. // this won't work on platforms that have fp lightmaps
  594. if ( g_bHasLightmapAlphaData3 )
  595. {
  596. blocklights[1][i][3] += ((float)pLightmapExtraData[j + 1]) * (1.0f / 255.0f);
  597. }
  598. else
  599. {
  600. blocklights[1][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) / 16.0f : 0.0f;
  601. }
  602. #endif
  603. Assert( blocklights[1][i][0] >= 0.0f );
  604. Assert( blocklights[1][i][1] >= 0.0f );
  605. Assert( blocklights[1][i][2] >= 0.0f );
  606. }
  607. for ( int i=0, j=0 ; i<lightmapSize ; ++i, j+=4 )
  608. {
  609. flR = scalar * TexLightToLinear( pBumpedLightmaps[1][i].r, pBumpedLightmaps[1][i].exponent );
  610. flG = scalar * TexLightToLinear( pBumpedLightmaps[1][i].g, pBumpedLightmaps[1][i].exponent );
  611. flB = scalar * TexLightToLinear( pBumpedLightmaps[1][i].b, pBumpedLightmaps[1][i].exponent );
  612. blocklights[2][i][0] += flR;
  613. blocklights[2][i][1] += flG;
  614. blocklights[2][i][2] += flB;
  615. #if defined(_PS3)
  616. blocklights[2][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) : 0.0f;
  617. #else
  618. // this won't work on platforms that have fp lightmaps
  619. if ( g_bHasLightmapAlphaData3 )
  620. {
  621. blocklights[2][i][3] += ((float)pLightmapExtraData[j + 2]) * (1.0f / 255.0f);
  622. }
  623. else
  624. {
  625. blocklights[2][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) / 16.0f : 0.0f;
  626. }
  627. #endif
  628. Assert( blocklights[2][i][0] >= 0.0f );
  629. Assert( blocklights[2][i][1] >= 0.0f );
  630. Assert( blocklights[2][i][2] >= 0.0f );
  631. flR = scalar * TexLightToLinear( pBumpedLightmaps[2][i].r, pBumpedLightmaps[2][i].exponent );
  632. flG = scalar * TexLightToLinear( pBumpedLightmaps[2][i].g, pBumpedLightmaps[2][i].exponent );
  633. flB = scalar * TexLightToLinear( pBumpedLightmaps[2][i].b, pBumpedLightmaps[2][i].exponent );
  634. blocklights[3][i][0] += flR;
  635. blocklights[3][i][1] += flG;
  636. blocklights[3][i][2] += flB;
  637. #if defined(_PS3)
  638. blocklights[3][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) : 0.0f;
  639. #else
  640. // this won't work on platforms that have fp lightmaps
  641. if ( g_bHasLightmapAlphaData3 )
  642. {
  643. blocklights[3][i][3] += ((float)pLightmapExtraData[j + 3]) * (1.0f / 255.0f);
  644. }
  645. else
  646. {
  647. blocklights[3][i][3] += pLightmapExtraData ? ( ( float )pLightmapExtraData[i] ) * ( 1.0f / 255.0f ) * ( flR * 0.2125 + flG * 0.7154 + flB * 0.0721 ) / 16.0f : 0.0f;
  648. }
  649. #endif
  650. Assert( blocklights[3][i][0] >= 0.0f );
  651. Assert( blocklights[3][i][1] >= 0.0f );
  652. Assert( blocklights[3][i][2] >= 0.0f );
  653. }
  654. }
  655. static void AccumulateBumpedLightstylesNoAlpha( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, float scalar )
  656. {
  657. ColorRGBExp32 *pBumpedLightmaps[ 3 ];
  658. pBumpedLightmaps[ 0 ] = pLightmap + lightmapSize;
  659. pBumpedLightmaps[ 1 ] = pLightmap + 2 * lightmapSize;
  660. pBumpedLightmaps[ 2 ] = pLightmap + 3 * lightmapSize;
  661. float flR;
  662. float flG;
  663. float flB;
  664. if ( g_bHasLightmapAlphaData3 )
  665. {
  666. Assert( pLightmapExtraData );
  667. }
  668. // I chose to split up the loops this way because it was the best tradeoff
  669. // based on profiles between cache miss + loop overhead
  670. for ( int i = 0, j = 0; i < lightmapSize; ++i, j += 4 )
  671. {
  672. flR = scalar * TexLightToLinear( pLightmap[ i ].r, pLightmap[ i ].exponent );
  673. flG = scalar * TexLightToLinear( pLightmap[ i ].g, pLightmap[ i ].exponent );
  674. flB = scalar * TexLightToLinear( pLightmap[ i ].b, pLightmap[ i ].exponent );
  675. blocklights[ 0 ][ i ][ 0 ] += flR;
  676. blocklights[ 0 ][ i ][ 1 ] += flG;
  677. blocklights[ 0 ][ i ][ 2 ] += flB;
  678. Assert( blocklights[ 0 ][ i ][ 0 ] >= 0.0f );
  679. Assert( blocklights[ 0 ][ i ][ 1 ] >= 0.0f );
  680. Assert( blocklights[ 0 ][ i ][ 2 ] >= 0.0f );
  681. flR = scalar * TexLightToLinear( pBumpedLightmaps[ 0 ][ i ].r, pBumpedLightmaps[ 0 ][ i ].exponent );
  682. flG = scalar * TexLightToLinear( pBumpedLightmaps[ 0 ][ i ].g, pBumpedLightmaps[ 0 ][ i ].exponent );
  683. flB = scalar * TexLightToLinear( pBumpedLightmaps[ 0 ][ i ].b, pBumpedLightmaps[ 0 ][ i ].exponent );
  684. blocklights[ 1 ][ i ][ 0 ] += flR;
  685. blocklights[ 1 ][ i ][ 1 ] += flG;
  686. blocklights[ 1 ][ i ][ 2 ] += flB;
  687. Assert( blocklights[ 1 ][ i ][ 0 ] >= 0.0f );
  688. Assert( blocklights[ 1 ][ i ][ 1 ] >= 0.0f );
  689. Assert( blocklights[ 1 ][ i ][ 2 ] >= 0.0f );
  690. }
  691. for ( int i = 0, j = 0; i < lightmapSize; ++i, j += 4 )
  692. {
  693. flR = scalar * TexLightToLinear( pBumpedLightmaps[ 1 ][ i ].r, pBumpedLightmaps[ 1 ][ i ].exponent );
  694. flG = scalar * TexLightToLinear( pBumpedLightmaps[ 1 ][ i ].g, pBumpedLightmaps[ 1 ][ i ].exponent );
  695. flB = scalar * TexLightToLinear( pBumpedLightmaps[ 1 ][ i ].b, pBumpedLightmaps[ 1 ][ i ].exponent );
  696. blocklights[ 2 ][ i ][ 0 ] += flR;
  697. blocklights[ 2 ][ i ][ 1 ] += flG;
  698. blocklights[ 2 ][ i ][ 2 ] += flB;
  699. Assert( blocklights[ 2 ][ i ][ 0 ] >= 0.0f );
  700. Assert( blocklights[ 2 ][ i ][ 1 ] >= 0.0f );
  701. Assert( blocklights[ 2 ][ i ][ 2 ] >= 0.0f );
  702. flR = scalar * TexLightToLinear( pBumpedLightmaps[ 2 ][ i ].r, pBumpedLightmaps[ 2 ][ i ].exponent );
  703. flG = scalar * TexLightToLinear( pBumpedLightmaps[ 2 ][ i ].g, pBumpedLightmaps[ 2 ][ i ].exponent );
  704. flB = scalar * TexLightToLinear( pBumpedLightmaps[ 2 ][ i ].b, pBumpedLightmaps[ 2 ][ i ].exponent );
  705. blocklights[ 3 ][ i ][ 0 ] += flR;
  706. blocklights[ 3 ][ i ][ 1 ] += flG;
  707. blocklights[ 3 ][ i ][ 2 ] += flB;
  708. Assert( blocklights[ 3 ][ i ][ 0 ] >= 0.0f );
  709. Assert( blocklights[ 3 ][ i ][ 1 ] >= 0.0f );
  710. Assert( blocklights[ 3 ][ i ][ 2 ] >= 0.0f );
  711. }
  712. }
  713. #else
  714. /*
  715. // unpack four ColorRGBExp32's loaded into a single vector register
  716. // into four. Can't do this as a function coz you can't return four
  717. // values and even the inliner falls down on pass-by-ref.
  718. #define UNPACK_COLORRGBEXP(fromVec, toVec0, toVec1, toVec2, toVec3) {\
  719. }
  720. */
  721. #ifdef _PS3
  722. // map the names of some 360 intrinsics to the SN intriniscs
  723. #define __vmrghb(a,b) (fltx4) vec_vmrghb( (vector unsigned char)(a), (vector unsigned char)(b) )
  724. #define __vmrglb(a,b) (fltx4) vec_vmrglb( (vector unsigned char)(a), (vector unsigned char)(b) )
  725. #define __vupkhsb(a) (fltx4) vec_vupkhsb( (vector signed char)(a) )
  726. #define __vupklsb(a) (fltx4) vec_vupklsb( (vector signed char)(a) )
  727. #define __vupkhsh(a) (fltx4) vec_vupkhsh( (vector signed short) (a) )
  728. #define __vupklsh(a) (fltx4) vec_vupklsh( (vector signed short) (a) )
  729. #define __vcfsx(a,b) vec_vcfsx( ((vector signed int) (a)), b )
  730. #endif
  731. // because the e component of the colors is signed, we need to mask
  732. // off the corresponding channel in the intermediate halfword expansion
  733. // when we combine it with the unsigned unpack for the other channels
  734. static const int32 ALIGN16 g_SIMD_HalfWordMask[4]= { 0x0000000, 0x0000FFFF, 0x0000000, 0x0000FFFF };
  735. static const fltx4 vOneOverTwoFiftyFive = { 1.0f / 255.0f , 1.0f / 255.0f , 1.0f / 255.0f , 1.0f / 255.0f };
  736. // grind through accumlating onto the blocklights,
  737. // one cache line at a time. Input pointers are assumed
  738. // to be cache aligned.
  739. // For a simpler reference implementation, see the PC version in the ifdef above.
  740. // This function makes heavy use of the special XBOX360 opcodes for
  741. // packing and unpacking integer d3d data. (Not available in SSE, sadly.)
  742. static void AccumulateLightstyles_EightAtAtime( ColorRGBExp32* RESTRICT pLightmap, // the input lightmap (not necessarily aligned)
  743. unsigned char *pLightmapExtraData,
  744. int lightmapSize,
  745. fltx4 vScalar,
  746. Vector4D * RESTRICT bLights // pointer to the blocklights row we'll be writing into -- should be cache aligned, but only hurts perf if it's not
  747. )
  748. {
  749. // We process blockLights in groups of four at a time, because we load the pLightmap four
  750. // at a time (four words fit into a vector register).
  751. // On top of that, we do two groups at once, because that's the length
  752. // of a cache line, and it helps us better hide latency.
  753. AssertMsg((lightmapSize & 7) == 0, "Input to Accumulate...EightAtATime not divisible by eight. Data corruption is the likely result." );
  754. VPROF_2("AccumulateLightstyles_EightAtAtime", VPROF_BUDGETGROUP_OTHER_UNACCOUNTED, false, BUDGETFLAG_CLIENT);
  755. const fltx4 vHalfWordMask = LoadAlignedSIMD(g_SIMD_HalfWordMask);
  756. fltx4 zero = Four_Zeros;
  757. for (int i = 0 ; i < lightmapSize ; i += 8 )
  758. {
  759. // cache prefetch two lines ahead on bLights, and one on pLightmap
  760. PREFETCH_128(bLights, 256);
  761. PREFETCH_128(pLightmap, 128);
  762. // the naming convention on these psuedoarrays (they are actually
  763. // registers) is that the number before the index is the group id,
  764. // and the index itself is which word in the group. If this seems
  765. // unclear to you, feel free to just use array indices 0..7
  766. // The compiler doesn't seem to deal properly with multidim arrays
  767. // (at least in the sense of aliasing them to registers)
  768. // However, if you always access through the arrays by using
  769. // compile-time immediate constants (eg, foo[2] rather than
  770. // int x = 2; foo[x]
  771. // it will at least treat them as register variables.
  772. // load four blockLights entries, and four colors
  773. fltx4 vLight0[4], vLight1[4];
  774. fltx4 colorLightMap0[4], colorLightMap1[4];
  775. fltx4 bytePackedLightMap0 = LoadUnalignedSIMD(pLightmap+i); // because each colorrgbexp is actually a 32-bit struct,
  776. // this loads four of them into one vector -- they are ubytes for rgb and sbyte for e
  777. fltx4 bytePackedLightMap1 = LoadUnalignedSIMD(pLightmap+i+4);
  778. // load group 0
  779. vLight0[0] = LoadAlignedSIMD( &(bLights + i + 0)->x );
  780. vLight0[1] = LoadAlignedSIMD( &(bLights + i + 1)->x );
  781. vLight0[2] = LoadAlignedSIMD( &(bLights + i + 2)->x );
  782. vLight0[3] = LoadAlignedSIMD( &(bLights + i + 3)->x );
  783. // load group 1
  784. vLight1[0] = LoadAlignedSIMD( &(bLights + i + 4)->x );
  785. vLight1[1] = LoadAlignedSIMD( &(bLights + i + 5)->x );
  786. vLight1[2] = LoadAlignedSIMD( &(bLights + i + 6)->x );
  787. vLight1[3] = LoadAlignedSIMD( &(bLights + i + 7)->x );
  788. // unpack the color light maps now that they have loaded
  789. // interleaving (four-vector) group 0 and 1
  790. // unpack rgbe 0 and 1:
  791. // like an unsigned unpack: { 0x00, colorLightMap[0].r, 0x00, colorLightMap[0].g, 0x00, colorLightMap[0].b, 0x00, colorLightMap[0].e,
  792. // 0x00, colorLightMap[1].r, 0x00, colorLightMap[1].g, 0x00, colorLightMap[1].b, 0x00, colorLightMap[1].e}
  793. fltx4 unsignedUnpackHi0 = __vmrghb(zero, bytePackedLightMap0); // GROUP 0
  794. fltx4 unsignedUnpackLo0 = __vmrglb(zero, bytePackedLightMap0); // rgbe words 2 and 3
  795. fltx4 unsignedUnpackHi1 = __vmrghb(zero, bytePackedLightMap1); // GROUP 1
  796. fltx4 unsignedUnpackLo1 = __vmrglb(zero, bytePackedLightMap1); // rgbe words 2 and 3
  797. fltx4 signedUnpackHi0 = __vupkhsb(bytePackedLightMap0); // signed unpack of words 0 and 1, like the unsigned unpack but replaces 0x00 w/ sign extension
  798. fltx4 signedUnpackLo0 = __vupklsb(bytePackedLightMap0); // GROUP 0
  799. fltx4 signedUnpackHi1 = __vupkhsb(bytePackedLightMap1); // signed unpack of words 0 and 1, like the unsigned unpack but replaces 0x00 w/ sign extension
  800. fltx4 signedUnpackLo1 = __vupklsb(bytePackedLightMap1); // GROUP 1
  801. // merge the signed and unsigned unpacks together to make the full halfwords
  802. unsignedUnpackHi0 = MaskedAssign(vHalfWordMask, signedUnpackHi0, unsignedUnpackHi0 );
  803. unsignedUnpackLo0 = MaskedAssign(vHalfWordMask, signedUnpackLo0, unsignedUnpackLo0 );
  804. unsignedUnpackHi1 = MaskedAssign(vHalfWordMask, signedUnpackHi1, unsignedUnpackHi1 );
  805. unsignedUnpackLo1 = MaskedAssign(vHalfWordMask, signedUnpackLo1, unsignedUnpackLo1 );
  806. // now complete the unpack from halfwords to words (we can just use signed because there are 0x00's above the rgb channels)
  807. colorLightMap0[0] = __vupkhsh(unsignedUnpackHi0); // vector unpack high signed halfword
  808. colorLightMap0[1] = __vupklsh(unsignedUnpackHi0); // vector unpack low signed halfword
  809. colorLightMap0[2] = __vupkhsh(unsignedUnpackLo0);
  810. colorLightMap0[3] = __vupklsh(unsignedUnpackLo0);
  811. colorLightMap0[0] = __vcfsx( colorLightMap0[0], 0); // convert to floats
  812. colorLightMap1[0] = __vupkhsh(unsignedUnpackHi1); // interleave group 1 unpacks
  813. colorLightMap0[1] = __vcfsx( colorLightMap0[1], 0); // convert to floats
  814. colorLightMap1[1] = __vupklsh(unsignedUnpackHi1); // should dual issue
  815. colorLightMap0[2] = __vcfsx( colorLightMap0[2], 0); // convert to floats
  816. colorLightMap1[2] = __vupkhsh(unsignedUnpackLo1);
  817. colorLightMap0[3] = __vcfsx( colorLightMap0[3], 0); // convert to floats
  818. colorLightMap1[3] = __vupklsh(unsignedUnpackLo1);
  819. // finish unpacking group 1 (giving group 0 time to finish converting)
  820. colorLightMap1[0] = __vcfsx( colorLightMap1[0], 0);
  821. colorLightMap1[1] = __vcfsx( colorLightMap1[1], 0);
  822. colorLightMap1[2] = __vcfsx( colorLightMap1[2], 0);
  823. colorLightMap1[3] = __vcfsx( colorLightMap1[3], 0);
  824. // manufacture exponent splats and start normalizing the rgb channels (eg *= 1/255)
  825. fltx4 expW0[4], expW1[4];
  826. expW0[0] = SplatWSIMD(colorLightMap0[0]);
  827. colorLightMap0[0] = MulSIMD(colorLightMap0[0], vOneOverTwoFiftyFive); // normalize the rgb channels
  828. expW0[1] = SplatWSIMD(colorLightMap0[1]);
  829. colorLightMap0[1] = MulSIMD(colorLightMap0[1], vOneOverTwoFiftyFive); // normalize the rgb channels
  830. expW0[2] = SplatWSIMD(colorLightMap0[2]);
  831. colorLightMap0[2] = MulSIMD(colorLightMap0[2], vOneOverTwoFiftyFive); // normalize the rgb channels
  832. expW0[3] = SplatWSIMD(colorLightMap0[3]);
  833. colorLightMap0[3] = MulSIMD(colorLightMap0[3], vOneOverTwoFiftyFive); // normalize the rgb channels
  834. // scale each of the color channels by the exponent channel
  835. // (the estimate operation is exact for integral inputs, as here)
  836. expW0[0] = Exp2EstSIMD( expW0[0] ); // x = 2^x
  837. expW1[0] = SplatWSIMD(colorLightMap1[0]); // interleave splats on exp group 1 (dual issue)
  838. colorLightMap1[0] = MulSIMD(colorLightMap1[0], vOneOverTwoFiftyFive); // normalize the rgb channels
  839. expW0[1] = Exp2EstSIMD( expW0[1] );
  840. expW1[1] = SplatWSIMD(colorLightMap1[1]);
  841. colorLightMap1[1] = MulSIMD(colorLightMap1[1], vOneOverTwoFiftyFive); // normalize the rgb channels
  842. expW0[2] = Exp2EstSIMD( expW0[2] );
  843. expW1[2] = SplatWSIMD(colorLightMap1[2]);
  844. colorLightMap1[2] = MulSIMD(colorLightMap1[2], vOneOverTwoFiftyFive); // normalize the rgb channels
  845. expW0[3] = Exp2EstSIMD( expW0[3] );
  846. expW1[3] = SplatWSIMD(colorLightMap1[3]);
  847. colorLightMap1[3] = MulSIMD(colorLightMap1[3], vOneOverTwoFiftyFive); // normalize the rgb channels
  848. // finish scale-by-exponent on group 1
  849. expW1[0] = Exp2EstSIMD( expW1[0] );
  850. expW1[1] = Exp2EstSIMD( expW1[1] );
  851. expW1[2] = Exp2EstSIMD( expW1[2] );
  852. expW1[3] = Exp2EstSIMD( expW1[3] );
  853. colorLightMap0[0] = MulSIMD(expW0[0], colorLightMap0[0]);
  854. colorLightMap0[1] = MulSIMD(expW0[1], colorLightMap0[1]);
  855. colorLightMap0[2] = MulSIMD(expW0[2], colorLightMap0[2]);
  856. colorLightMap0[3] = MulSIMD(expW0[3], colorLightMap0[3]);
  857. colorLightMap1[0] = MulSIMD(expW1[0], colorLightMap1[0]);
  858. colorLightMap1[1] = MulSIMD(expW1[1], colorLightMap1[1]);
  859. colorLightMap1[2] = MulSIMD(expW1[2], colorLightMap1[2]);
  860. colorLightMap1[3] = MulSIMD(expW1[3], colorLightMap1[3]);
  861. #ifdef X360_DOUBLECHECK_LIGHTMAPS
  862. for (int group = 0 ; group < 4 ; ++group)
  863. {
  864. Assert( colorLightMap0[group].v[0] == TexLightToLinear( pLightmap[i + group].r, pLightmap[i + group].exponent ) &&
  865. colorLightMap0[group].v[1] == TexLightToLinear( pLightmap[i + group].g, pLightmap[i + group].exponent ) &&
  866. colorLightMap0[group].v[2] == TexLightToLinear( pLightmap[i + group].b, pLightmap[i + group].exponent ) );
  867. }
  868. #endif
  869. // accumulate into blocklights
  870. vLight0[0] = MaddSIMD(vScalar, colorLightMap0[0], vLight0[0]);
  871. vLight0[1] = MaddSIMD(vScalar, colorLightMap0[1], vLight0[1]);
  872. vLight0[2] = MaddSIMD(vScalar, colorLightMap0[2], vLight0[2]);
  873. vLight0[3] = MaddSIMD(vScalar, colorLightMap0[3], vLight0[3]);
  874. vLight1[0] = MaddSIMD(vScalar, colorLightMap1[0], vLight1[0]);
  875. vLight1[1] = MaddSIMD(vScalar, colorLightMap1[1], vLight1[1]);
  876. vLight1[2] = MaddSIMD(vScalar, colorLightMap1[2], vLight1[2]);
  877. vLight1[3] = MaddSIMD(vScalar, colorLightMap1[3], vLight1[3]);
  878. // save
  879. StoreAlignedSIMD( (bLights + i + 0)->Base(), vLight0[0]);
  880. StoreAlignedSIMD( (bLights + i + 1)->Base(), vLight0[1]);
  881. StoreAlignedSIMD( (bLights + i + 2)->Base(), vLight0[2]);
  882. StoreAlignedSIMD( (bLights + i + 3)->Base(), vLight0[3]);
  883. StoreAlignedSIMD( (bLights + i + 4)->Base(), vLight1[0]);
  884. StoreAlignedSIMD( (bLights + i + 5)->Base(), vLight1[1]);
  885. StoreAlignedSIMD( (bLights + i + 6)->Base(), vLight1[2]);
  886. StoreAlignedSIMD( (bLights + i + 7)->Base(), vLight1[3]);
  887. }
  888. }
  889. // just like XMLoadByte4 only no asserts - loads a vector from
  890. // a struct { char v[4] }
  891. #ifdef _X360
  892. FORCEINLINE XMVECTOR LoadSignedByte4NoAssert ( CONST XMBYTE4* pSource )
  893. {
  894. XMVECTOR V;
  895. V = __lvlx(pSource, 0);
  896. V = __vupkhsb(V);
  897. V = __vupkhsh(V);
  898. V = __vcfsx(V, 0);
  899. return V;
  900. }
  901. FORCEINLINE XMVECTOR LoadUnsignedByte4( ColorRGBExp32* pSource )
  902. {
  903. return XMLoadUByte4(reinterpret_cast<XMUBYTE4 *>(pSource));
  904. }
  905. #elif defined(_PS3)
  906. typedef struct _XMBYTE4 {
  907. union {
  908. struct {
  909. CHAR x;
  910. CHAR y;
  911. CHAR z;
  912. CHAR w;
  913. };
  914. UINT v;
  915. };
  916. } XMBYTE4;
  917. FORCEINLINE fltx4 LoadSignedByte4NoAssert ( const XMBYTE4* pSource )
  918. {
  919. fltx4 V;
  920. /*
  921. V = vec_lvlx(pSource, 0);
  922. V = vec_vupkhsb(V);
  923. V = vec_vupkhsh(V);
  924. V = vec_vcfsx(V, 0);
  925. return V;
  926. */
  927. return vec_vcfsx( vec_vupkhsh( vec_vupkhsb( (vector signed char) vec_lvlx(0, reinterpret_cast<const vec_float4 *>(pSource)) ) ) , 0);
  928. }
  929. FORCEINLINE fltx4 LoadUnsignedByte4( ColorRGBExp32* pSource )
  930. {
  931. // this mask moves four consecutive bytes in the x word of a vec reg
  932. // into the respective four words of a vreg.
  933. const static vector unsigned int PermuteMask = { 0x00000010, 0x00000011, 0x00000012, 0x00000013 };
  934. fltx4 V = vec_lvlx( 0, reinterpret_cast<const vec_float4 *>(pSource) );
  935. V = vec_perm( LoadZeroSIMD(), V, (vec_uchar16) PermuteMask );
  936. return vec_vcfux( (vector unsigned int) V, 0 );
  937. }
  938. #else
  939. #error No implementation of LoadSignedByte4NoAssert for this platform
  940. #endif
  941. FORCEINLINE fltx4 StompW( fltx4 V ) // force w word of a vector to zero
  942. {
  943. #ifdef _X360
  944. return __vrlimi(V, Four_Zeros, 1, 0);
  945. #elif defined(_PS3)
  946. const static bi32x4 mask = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0 };
  947. return vec_and( V, mask );
  948. #else
  949. #error Wrong platform!
  950. #endif
  951. }
  952. //-----------------------------------------------------------------------------
  953. // Compute the portion of the lightmap generated from lightstyles
  954. //-----------------------------------------------------------------------------
  955. static void AccumulateLightstyles( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, fltx4 vScalar )
  956. {
  957. Assert( pLightmap );
  958. VPROF_2( "AccumulateLightstyles" , VPROF_BUDGETGROUP_OTHER_UNACCOUNTED, false, BUDGETFLAG_CLIENT);
  959. // crush w of the scalar to zero (so we don't overwrite blocklight[x][y][3] in the madds)
  960. vScalar = StompW(vScalar);
  961. int lightmapSizeEightAligned = lightmapSize & (~0x07);
  962. // crunch as many groups of eight as possible, then deal with the remainder
  963. AccumulateLightstyles_EightAtAtime(pLightmap, pLightmapExtraData, lightmapSizeEightAligned, vScalar, blocklights[0]);
  964. // handle remainders
  965. for (int i = lightmapSizeEightAligned; i < lightmapSize ; ++i )
  966. {
  967. // load four blockLights entries, and four colors
  968. fltx4 vLight;
  969. fltx4 colorLightMap;
  970. vLight = LoadAlignedSIMD(blocklights[0][i].Base());
  971. // unpack the color light maps
  972. // load the unsigned bytes
  973. colorLightMap = LoadUnsignedByte4(pLightmap + i);
  974. // fish out the exponent component from a signed load
  975. fltx4 exponentiator = Exp2EstSIMD(SplatWSIMD(LoadSignedByte4NoAssert(reinterpret_cast<XMBYTE4 *>(pLightmap + i))));
  976. // scale each of the color light channels by the exponent
  977. colorLightMap = MulSIMD( MulSIMD(colorLightMap, vOneOverTwoFiftyFive ), exponentiator );
  978. #ifdef _DEBUG
  979. float tltl_r = TexLightToLinear( pLightmap[i].r, pLightmap[i].exponent );
  980. float tltl_g = TexLightToLinear( pLightmap[i].g, pLightmap[i].exponent );
  981. float tltl_b = TexLightToLinear( pLightmap[i].b, pLightmap[i].exponent );
  982. #endif
  983. Assert( SubFloat(colorLightMap,0) == TexLightToLinear( pLightmap[i].r, pLightmap[i].exponent ) &&
  984. SubFloat(colorLightMap,1) == TexLightToLinear( pLightmap[i].g, pLightmap[i].exponent ) &&
  985. SubFloat(colorLightMap,2) == TexLightToLinear( pLightmap[i].b, pLightmap[i].exponent ) );
  986. // accumulate onto blocklights
  987. vLight = MaddSIMD(vScalar, colorLightMap, vLight);
  988. StoreAlignedSIMD(blocklights[0][i].Base(), vLight);
  989. }
  990. }
  991. static void AccumulateLightstylesFlat( ColorRGBExp32* pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, fltx4 vScalar )
  992. {
  993. Assert( pLightmap );
  994. VPROF( "AccumulateLightstylesFlat" );
  995. // this isn't a terribly fast way of doing things, but
  996. // this function doesn't seem to be called much (so
  997. // it's not worth the trouble of custom loop scheduling)
  998. fltx4 colorLightMap;
  999. // unpack the color light maps
  1000. // load the unsigned bytes
  1001. colorLightMap = LoadUnsignedByte4(pLightmap);
  1002. // fish out the exponent component from a signed load
  1003. fltx4 exponentiator = Exp2EstSIMD(SplatWSIMD(LoadSignedByte4NoAssert(reinterpret_cast<XMBYTE4 *>(pLightmap))));
  1004. // scale each of the color light channels by the exponent
  1005. colorLightMap = MulSIMD( MulSIMD(colorLightMap, vOneOverTwoFiftyFive ), exponentiator );
  1006. for (int i = 0; i < lightmapSize ; ++i )
  1007. {
  1008. // load four blockLights entries, and four colors
  1009. fltx4 vLight;
  1010. vLight = LoadAlignedSIMD(blocklights[0][i].Base());
  1011. // accumulate onto blocklights
  1012. vLight = MaddSIMD(vScalar, colorLightMap, vLight);
  1013. StoreAlignedSIMD(blocklights[0][i].Base(), vLight);
  1014. }
  1015. }
  1016. static void AccumulateBumpedLightstyles( ColorRGBExp32* RESTRICT pLightmap, unsigned char *pLightmapExtraData, int lightmapSize, fltx4 vScalar )
  1017. {
  1018. COMPILE_TIME_ASSERT(sizeof(ColorRGBExp32) == 4); // This function is carefully scheduled around four-byte colors
  1019. VPROF_2( "AccumulateBumpedLightstyles" , VPROF_BUDGETGROUP_OTHER_UNACCOUNTED, false, BUDGETFLAG_CLIENT);
  1020. // crush w of the scalar to zero (so we don't overwrite blocklight[x][y][3] in the madds)
  1021. vScalar = vScalar = StompW(vScalar);
  1022. /*
  1023. ColorRGBExp32 * RESTRICT pBumpedLightmaps[3];
  1024. pBumpedLightmaps[1] = pLightmap + lightmapSize;
  1025. pBumpedLightmaps[2] = pLightmap + 2 * lightmapSize;
  1026. pBumpedLightmaps[3] = pLightmap + 3 * lightmapSize;
  1027. */
  1028. // assert word (not vector) alignment
  1029. AssertMsg( ((reinterpret_cast<unsigned int>(pLightmap) & 0x03 ) == 0), "Lightmap was not word-aligned: AccumulateBumpedLightstyles must fail." );
  1030. // assert vector alignment
  1031. AssertMsg( (reinterpret_cast<unsigned int>(blocklights) & 0x0F ) == 0, "Blocklights is not vector-aligned. You're doomed." );
  1032. AssertMsg( (reinterpret_cast<unsigned int>(blocklights) & 127 ) == 0, "Blocklights is not cache-aligned. Performance will suffer." );
  1033. #if 0 // reference: This is the simple version -- four-way accumulate (no interleaving)
  1034. for (int i = 0 ; i < lightmapSize ; i+= 4)
  1035. {
  1036. // load four blockLights entries, and four colors
  1037. fltx4 vLight[4];
  1038. fltx4 colorLightMap[4];
  1039. vLight[0] = LoadUnalignedSIMD(&blocklights[0][i]);
  1040. vLight[1] = LoadUnalignedSIMD(&blocklights[0][i+1]);
  1041. vLight[2] = LoadUnalignedSIMD(&blocklights[0][i+2]);
  1042. vLight[3] = LoadUnalignedSIMD(&blocklights[0][i+3]);
  1043. // unpack the color light maps
  1044. {
  1045. fltx4 zero = Four_Zeros;
  1046. fltx4 colorLightmap = LoadUnalignedSIMD(pLightmap+i); // because each colorrgbexp is actually a 32-bit struct,
  1047. // this loads four of them into one vector -- they are ubytes for rgb and sbyte for e
  1048. // unpack rgbe 0 and 1:
  1049. // like an unsigned unpack: { 0x00, colorLightMap[0].r, 0x00, colorLightMap[0].g, 0x00, colorLightMap[0].b, 0x00, colorLightMap[0].e,
  1050. // 0x00, colorLightMap[1].r, 0x00, colorLightMap[1].g, 0x00, colorLightMap[1].b, 0x00, colorLightMap[1].e}
  1051. fltx4 unsignedUnpackHi = __vmrghb(zero, colorLightMap);
  1052. fltx4 unsignedUnpackLo = __vmrghb(zero, colorLightMap); // rgbe words 2 and 3
  1053. fltx4 signedUnpackHi = __vupkhsb(colorLightMap); // signed unpack of words 0 and 1, like the unsigned unpack but repl 0x00 w/ sign extension
  1054. fltx4 signedUnpackLo = __vupklsb(colorLightMap);
  1055. // merge the signed and unsigned unpacks together to make the full halfwords
  1056. unsignedUnpackHi = MaskedAssign(vHalfWordMask, signedUnpackHi, unsignedUnpackHi );
  1057. unsignedUnpackLo = MaskedAssign(vHalfWordMask, signedUnpackLo, unsignedUnpackLo );
  1058. // now complete the unpack from halfwords to words (we can just use signed because there are 0x00's above the rgb channels)
  1059. // and convert to float
  1060. colorLightMap[0] = __vcfsx( __vupkhsh(unsignedUnpackHi), 0);
  1061. colorLightMap[1] = __vcfsx( __vupklsh(unsignedUnpackHi), 0);
  1062. colorLightMap[2] = __vcfsx( __vupkhsh(unsignedUnpackLo), 0);
  1063. colorLightMap[3] = __vcfsx( __vupklsh(unsignedUnpackLo), 0);
  1064. }
  1065. // scale each of the color channels by the exponent channel
  1066. colorLightMap[0] = XMVectorExpEst( XMVectorSplatW(colorLightMap[0]) );
  1067. colorLightMap[1] = XMVectorExpEst( XMVectorSplatW(colorLightMap[1]) );
  1068. colorLightMap[2] = XMVectorExpEst( XMVectorSplatW(colorLightMap[2]) );
  1069. colorLightMap[3] = XMVectorExpEst( XMVectorSplatW(colorLightMap[3]) );
  1070. // accumulate into blocklights
  1071. vLight[0] = XMVectorMultiplyAdd(vScalar, colorLightMap[0], vLight[0]);
  1072. vLight[1] = XMVectorMultiplyAdd(vScalar, colorLightMap[1], vLight[1]);
  1073. vLight[2] = XMVectorMultiplyAdd(vScalar, colorLightMap[2], vLight[2]);
  1074. vLight[3] = XMVectorMultiplyAdd(vScalar, colorLightMap[3], vLight[3]);
  1075. // save
  1076. XMStoreVector4(&blocklights[0][i], vLight[0]);
  1077. XMStoreVector4(&blocklights[1][i], vLight[1]);
  1078. XMStoreVector4(&blocklights[2][i], vLight[2]);
  1079. XMStoreVector4(&blocklights[3][i], vLight[3]);
  1080. }
  1081. #endif
  1082. int lightmapSizeEightAligned = lightmapSize & (~0x07);
  1083. // crunch each of the lightmap groups.
  1084. for (int mapGroup = 0 ; mapGroup <= 3 ; ++mapGroup, pLightmap += lightmapSize )
  1085. {
  1086. // process the base lightmap
  1087. if ( lightmapSizeEightAligned )
  1088. {
  1089. // start loading the first couple of cache lines for the *next* group of blocklights.
  1090. if ( mapGroup < 3 )
  1091. {
  1092. PREFETCH_128( blocklights[mapGroup+1], 0 );
  1093. PREFETCH_128( blocklights[mapGroup+1], 128 );
  1094. PREFETCH_128( pLightmap + lightmapSize, 0 );
  1095. }
  1096. AccumulateLightstyles_EightAtAtime(pLightmap, pLightmapExtraData, lightmapSizeEightAligned, vScalar, blocklights[mapGroup]);
  1097. }
  1098. // handle remainders
  1099. for (int i = lightmapSizeEightAligned; i < lightmapSize ; ++i )
  1100. {
  1101. // load four blockLights entries, and four colors
  1102. fltx4 vLight;
  1103. fltx4 colorLightMap;
  1104. vLight = LoadAlignedSIMD(blocklights[mapGroup][i].Base());
  1105. // unpack the color light maps
  1106. // load the unsigned bytes
  1107. colorLightMap = LoadUnsignedByte4(pLightmap + i);
  1108. // fish out the exponent component from a signed load
  1109. fltx4 exponentiator = Exp2EstSIMD(SplatWSIMD(LoadSignedByte4NoAssert(reinterpret_cast<XMBYTE4 *>(pLightmap + i))));
  1110. // scale each of the color light channels by the exponent
  1111. colorLightMap = MulSIMD( MulSIMD(colorLightMap, vOneOverTwoFiftyFive ), exponentiator );
  1112. Assert( SubFloat(colorLightMap,0) == TexLightToLinear( pLightmap[i].r, pLightmap[i].exponent ) &&
  1113. SubFloat(colorLightMap,1) == TexLightToLinear( pLightmap[i].g, pLightmap[i].exponent ) &&
  1114. SubFloat(colorLightMap,2) == TexLightToLinear( pLightmap[i].b, pLightmap[i].exponent ) );
  1115. // accumulate onto blocklights
  1116. vLight = MaddSIMD(vScalar, colorLightMap, vLight);
  1117. StoreAlignedSIMD(blocklights[mapGroup][i].Base(), vLight);
  1118. }
  1119. // note: pLightmap is incremented as well.
  1120. }
  1121. }
  1122. #endif
  1123. //-----------------------------------------------------------------------------
  1124. // Compute the portion of the lightmap generated from lightstyles
  1125. //-----------------------------------------------------------------------------
  1126. static void ComputeLightmapFromLightstyle( msurfacelighting_t *pLighting, bool computeLightmap,
  1127. bool computeBumpmap, int lightmapSize, bool hasBumpmapLightmapData )
  1128. {
  1129. VPROF( "ComputeLightmapFromLightstyle" );
  1130. ColorRGBExp32 *pLightmap = pLighting->m_pSamples;
  1131. // This data should only exist on the PC. We strip out the data and clear the flag in makegamedata for consoles.
  1132. unsigned char *pLightmapExtraData = NULL;
  1133. if ( g_bHasLightmapAlphaData )
  1134. {
  1135. pLightmapExtraData = ( unsigned char * )&( pLighting->m_pSamples[ hasBumpmapLightmapData ? lightmapSize * ( NUM_BUMP_VECTS + 1 ) : lightmapSize ] );
  1136. }
  1137. // Compute iteration range
  1138. int minmap, maxmap;
  1139. #ifdef USE_CONVARS
  1140. if( r_lightmap.GetInt() != -1 )
  1141. {
  1142. minmap = r_lightmap.GetInt();
  1143. maxmap = minmap + 1;
  1144. }
  1145. else
  1146. #endif
  1147. {
  1148. minmap = 0; maxmap = MAXLIGHTMAPS;
  1149. }
  1150. for (int maps = minmap; maps < maxmap && pLighting->m_nStyles[maps] != 255; ++maps)
  1151. {
  1152. if( r_lightstyle.GetInt() != -1 && pLighting->m_nStyles[maps] != r_lightstyle.GetInt())
  1153. {
  1154. continue;
  1155. }
  1156. float fscalar = LightStyleValue( pLighting->m_nStyles[maps] );
  1157. // hack - don't know why we are getting negative values here.
  1158. // if (scalar > 0.0f && maps > 0 )
  1159. if (fscalar > 0.0f)
  1160. {
  1161. //#ifdef PLATFORM_PPC
  1162. #if 0 // 7LS
  1163. fltx4 scalar = ReplicateX4(fscalar); // we use SIMD versions of these functions on 360
  1164. #else
  1165. const float &scalar = fscalar;
  1166. #endif
  1167. if( computeBumpmap )
  1168. {
  1169. // don't accumulate alpha for other lightstyles
  1170. if ( maps == 0 )
  1171. {
  1172. AccumulateBumpedLightstyles( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1173. }
  1174. else
  1175. {
  1176. AccumulateBumpedLightstylesNoAlpha( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1177. }
  1178. }
  1179. else if( computeLightmap )
  1180. {
  1181. if (r_avglightmap.GetInt())
  1182. {
  1183. pLightmap = pLighting->AvgLightColor(maps);
  1184. // don't accumulate alpha for other lightstyles
  1185. if ( maps == 0 )
  1186. {
  1187. AccumulateLightstylesFlat( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1188. }
  1189. else
  1190. {
  1191. AccumulateLightstylesFlatNoAlpha( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1192. }
  1193. }
  1194. else
  1195. {
  1196. // don't accumulate alpha for other lightstyles
  1197. if ( maps == 0 )
  1198. {
  1199. AccumulateLightstyles( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1200. }
  1201. else
  1202. {
  1203. AccumulateLightstylesNoAlpha( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1204. }
  1205. }
  1206. }
  1207. }
  1208. // skip to next lightmap. If we store bump lightmap data, we need to jump forward 5 (1 x regular lmap, 3 x bump lmaps, 1 x extra alpha csm data)
  1209. // otherwise 2 (1 x regular lmap, 1 x extra alpha csm data)
  1210. pLightmap += hasBumpmapLightmapData ? lightmapSize * ( NUM_BUMP_VECTS + 2 ) : ( lightmapSize * 2 );
  1211. }
  1212. }
  1213. //-----------------------------------------------------------------------------
  1214. // Version of above to support old lightmap lump layout (before lightstyles were fixed)
  1215. // Added to avoid modders re-baking maps that used 'broken' lightstyle data in a manner that worked for them (i.e. without CSMs)
  1216. //-----------------------------------------------------------------------------
  1217. static void ComputeLightmapFromLightstyleOLD( msurfacelighting_t *pLighting, bool computeLightmap,
  1218. bool computeBumpmap, int lightmapSize, bool hasBumpmapLightmapData )
  1219. {
  1220. VPROF( "ComputeLightmapFromLightstyleOLD" );
  1221. ColorRGBExp32 *pLightmap = pLighting->m_pSamples;
  1222. // This data should only exist on the PC. We strip out the data and clear the flag in makegamedata for consoles.
  1223. unsigned char *pLightmapExtraData = NULL;
  1224. if ( g_bHasLightmapAlphaData )
  1225. {
  1226. pLightmapExtraData = ( unsigned char * )&( pLighting->m_pSamples[ hasBumpmapLightmapData ? lightmapSize * ( NUM_BUMP_VECTS + 1 ) : lightmapSize ] );
  1227. }
  1228. // Compute iteration range
  1229. int minmap, maxmap;
  1230. #ifdef USE_CONVARS
  1231. if ( r_lightmap.GetInt() != -1 )
  1232. {
  1233. minmap = r_lightmap.GetInt();
  1234. maxmap = minmap + 1;
  1235. }
  1236. else
  1237. #endif
  1238. {
  1239. minmap = 0; maxmap = MAXLIGHTMAPS;
  1240. }
  1241. for ( int maps = minmap; maps < maxmap && pLighting->m_nStyles[ maps ] != 255; ++maps )
  1242. {
  1243. if ( r_lightstyle.GetInt() != -1 && pLighting->m_nStyles[ maps ] != r_lightstyle.GetInt() )
  1244. {
  1245. continue;
  1246. }
  1247. float fscalar = LightStyleValue( pLighting->m_nStyles[ maps ] );
  1248. // hack - don't know why we are getting negative values here.
  1249. // if (scalar > 0.0f && maps > 0 )
  1250. if ( fscalar > 0.0f )
  1251. {
  1252. //#ifdef PLATFORM_PPC
  1253. #if 0 // 7LS
  1254. fltx4 scalar = ReplicateX4( fscalar ); // we use SIMD versions of these functions on 360
  1255. #else
  1256. const float &scalar = fscalar;
  1257. #endif
  1258. if ( computeBumpmap )
  1259. {
  1260. AccumulateBumpedLightstyles( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1261. }
  1262. else if ( computeLightmap )
  1263. {
  1264. if ( r_avglightmap.GetInt() )
  1265. {
  1266. pLightmap = pLighting->AvgLightColor( maps );
  1267. AccumulateLightstylesFlat( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1268. }
  1269. else
  1270. {
  1271. AccumulateLightstyles( pLightmap, pLightmapExtraData, lightmapSize, scalar );
  1272. }
  1273. }
  1274. }
  1275. // skip to next lightmap. If we store lightmap data, we need to jump forward 4
  1276. pLightmap += hasBumpmapLightmapData ? lightmapSize * ( NUM_BUMP_VECTS + 1 ) : lightmapSize;
  1277. }
  1278. }
  1279. // instrumentation to measure locks
  1280. /*
  1281. static CUtlVector<int> g_LightmapLocks;
  1282. static int g_Lastdlightframe = -1;
  1283. static int g_lastlock = -1;
  1284. static int g_unsorted = 0;
  1285. void MarkPage( int pageID )
  1286. {
  1287. if ( g_Lastdlightframe != r_framecount )
  1288. {
  1289. int total = 0;
  1290. int locks = 0;
  1291. for ( int i = 0; i < g_LightmapLocks.Count(); i++ )
  1292. {
  1293. int count = g_LightmapLocks[i];
  1294. if ( count )
  1295. {
  1296. total++;
  1297. locks += count;
  1298. }
  1299. g_LightmapLocks[i] = 0;
  1300. }
  1301. g_Lastdlightframe = r_framecount;
  1302. g_lastlock = -1;
  1303. if ( locks )
  1304. Msg("Total pages %d, locks %d, unsorted locks %d\n", total, locks, g_unsorted );
  1305. g_unsorted = 0;
  1306. }
  1307. if ( pageID != g_lastlock )
  1308. {
  1309. g_lastlock = pageID;
  1310. g_unsorted++;
  1311. }
  1312. g_LightmapLocks.EnsureCount(pageID+1);
  1313. g_LightmapLocks[pageID]++;
  1314. }
  1315. */
  1316. //-----------------------------------------------------------------------------
  1317. // Update the lightmaps...
  1318. //-----------------------------------------------------------------------------
  1319. static void UpdateLightmapTextures( SurfaceHandle_t surfID, bool needsBumpmap )
  1320. {
  1321. ASSERT_SURF_VALID( surfID );
  1322. if( materialSortInfoArray )
  1323. {
  1324. int lightmapSize[2];
  1325. int offsetIntoLightmapPage[2];
  1326. lightmapSize[0] = ( MSurf_LightmapExtents( surfID )[0] ) + 1;
  1327. lightmapSize[1] = ( MSurf_LightmapExtents( surfID )[1] ) + 1;
  1328. offsetIntoLightmapPage[0] = MSurf_OffsetIntoLightmapPage( surfID )[0];
  1329. offsetIntoLightmapPage[1] = MSurf_OffsetIntoLightmapPage( surfID )[1];
  1330. Assert( MSurf_MaterialSortID( surfID ) >= 0 &&
  1331. MSurf_MaterialSortID( surfID ) < g_WorldStaticMeshes.Count() );
  1332. // FIXME: Should differentiate between bumped and unbumped since the perf characteristics
  1333. // are completely different?
  1334. // MarkPage( materialSortInfoArray[MSurf_MaterialSortID( surfID )].lightmapPageID );
  1335. if( needsBumpmap )
  1336. {
  1337. materials->UpdateLightmap( materialSortInfoArray[MSurf_MaterialSortID( surfID )].lightmapPageID,
  1338. lightmapSize, offsetIntoLightmapPage,
  1339. &blocklights[0][0][0], &blocklights[1][0][0], &blocklights[2][0][0], &blocklights[3][0][0] );
  1340. }
  1341. else
  1342. {
  1343. materials->UpdateLightmap( materialSortInfoArray[MSurf_MaterialSortID( surfID )].lightmapPageID,
  1344. lightmapSize, offsetIntoLightmapPage,
  1345. &blocklights[0][0][0], NULL, NULL, NULL );
  1346. }
  1347. }
  1348. }
  1349. unsigned int R_UpdateDlightState( dlight_t *pLights, SurfaceHandle_t surfID, const matrix3x4_t& entityToWorld, bool bOnlyUseLightStyles, bool bLightmap )
  1350. {
  1351. unsigned int dlightMask = 0;
  1352. // Mark the surface with the particular cached light values...
  1353. msurfacelighting_t *pLighting = SurfaceLighting( surfID );
  1354. // Retire dlights that are no longer active
  1355. pLighting->m_fDLightBits &= r_dlightactive;
  1356. pLighting->m_nLastComputedFrame = r_framecount;
  1357. // Here, it's got the data it needs. So use it!
  1358. if ( !bOnlyUseLightStyles )
  1359. {
  1360. // add all the dynamic lights
  1361. if( bLightmap && ( pLighting->m_nDLightFrame == r_framecount ) )
  1362. {
  1363. dlightMask = R_ComputeDynamicLightMask( pLights, surfID, pLighting, entityToWorld );
  1364. }
  1365. if ( !dlightMask || !pLighting->m_fDLightBits )
  1366. {
  1367. pLighting->m_fDLightBits = 0;
  1368. MSurf_Flags(surfID) &= ~SURFDRAW_HASDLIGHT;
  1369. }
  1370. }
  1371. return dlightMask;
  1372. }
  1373. //-----------------------------------------------------------------------------
  1374. // Purpose: Build the blocklights array for a given surface and copy to dest
  1375. // Combine and scale multiple lightmaps into the 8.8 format in blocklights
  1376. // Input : *psurf - surface to rebuild
  1377. // *dest - texture pointer to receive copy in lightmap texture format
  1378. // stride - stride of *dest memory
  1379. //-----------------------------------------------------------------------------
  1380. void R_BuildLightMapGuts( dlight_t *pLights, SurfaceHandle_t surfID, const matrix3x4_t& entityToWorld, unsigned int dlightMask, bool needsBumpmap, bool needsLightmap )
  1381. {
  1382. VPROF_("R_BuildLightMapGuts", 1, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0);
  1383. int bumpID;
  1384. // Lightmap data can be dumped to save memory - this precludes any dynamic lighting on the world
  1385. Assert( !host_state.worldbrush->m_bUnloadedAllLightmaps );
  1386. // Mark the surface with the particular cached light values...
  1387. msurfacelighting_t *pLighting = SurfaceLighting( surfID );
  1388. int size = ComputeLightmapSize( surfID );
  1389. if (size == 0)
  1390. return;
  1391. bool hasBumpmap = SurfHasBumpedLightmaps( surfID );
  1392. bool hasLightmap = SurfHasLightmap( surfID );
  1393. extern bool g_bLightstylesWithCSM;
  1394. // clear to no light
  1395. if( needsLightmap )
  1396. {
  1397. // set to full bright if no light data
  1398. InitLMSamples( blocklights[0], size, hasLightmap ? 0.0f : mat_defaultlightmap.GetFloat() );
  1399. }
  1400. if( needsBumpmap )
  1401. {
  1402. // set to full bright if no light data
  1403. for( bumpID = 1; bumpID < NUM_BUMP_VECTS + 1; bumpID++ )
  1404. {
  1405. InitLMSamples( blocklights[bumpID], size, hasBumpmap ? 0.0f : mat_defaultlightmap.GetFloat() );
  1406. }
  1407. }
  1408. // add all the lightmaps
  1409. // Here, it's got the data it needs. So use it!
  1410. if( ( hasLightmap && needsLightmap ) || ( hasBumpmap && needsBumpmap ) )
  1411. {
  1412. if ( g_bLightstylesWithCSM )
  1413. {
  1414. ComputeLightmapFromLightstyle( pLighting, ( hasLightmap && needsLightmap ),
  1415. ( hasBumpmap && needsBumpmap ), size, hasBumpmap );
  1416. }
  1417. else
  1418. {
  1419. ComputeLightmapFromLightstyleOLD( pLighting, ( hasLightmap && needsLightmap ),
  1420. ( hasBumpmap && needsBumpmap ), size, hasBumpmap );
  1421. }
  1422. }
  1423. else if( !hasBumpmap && needsBumpmap && hasLightmap )
  1424. {
  1425. // make something up for the bumped lights if you need them but don't have the data
  1426. // if you have a lightmap, use that, otherwise fullbright
  1427. if ( g_bLightstylesWithCSM )
  1428. {
  1429. ComputeLightmapFromLightstyle( pLighting, true, false, size, hasBumpmap );
  1430. }
  1431. else
  1432. {
  1433. ComputeLightmapFromLightstyleOLD( pLighting, true, false, size, hasBumpmap );
  1434. }
  1435. for( bumpID = 0; bumpID < ( hasBumpmap ? ( NUM_BUMP_VECTS + 1 ) : 1 ); bumpID++ )
  1436. {
  1437. for (int i=0 ; i<size ; i++)
  1438. {
  1439. blocklights[bumpID][i].AsVector3D() = blocklights[0][i].AsVector3D();
  1440. }
  1441. }
  1442. }
  1443. else if( needsBumpmap && !hasLightmap )
  1444. {
  1445. // set to full bright if no light data
  1446. InitLMSamples( blocklights[1], size, 0.0f );
  1447. InitLMSamples( blocklights[2], size, 0.0f );
  1448. InitLMSamples( blocklights[3], size, 0.0f );
  1449. }
  1450. else if( !needsBumpmap && !needsLightmap )
  1451. {
  1452. }
  1453. else if( needsLightmap && !hasLightmap )
  1454. {
  1455. }
  1456. else
  1457. {
  1458. Assert( 0 );
  1459. }
  1460. // add all the dynamic lights
  1461. if ( dlightMask && (needsLightmap || needsBumpmap) )
  1462. {
  1463. R_AddDynamicLights( pLights, surfID, entityToWorld, needsBumpmap, dlightMask );
  1464. }
  1465. // Update the texture state
  1466. UpdateLightmapTextures( surfID, needsBumpmap );
  1467. }
  1468. void R_BuildLightMap( dlight_t *pLights, ICallQueue *pCallQueue, SurfaceHandle_t surfID, const matrix3x4_t &entityToWorld, bool bOnlyUseLightStyles )
  1469. {
  1470. bool needsBumpmap = SurfNeedsBumpedLightmaps( surfID );
  1471. bool needsLightmap = SurfNeedsLightmap( surfID );
  1472. if( !needsBumpmap && !needsLightmap )
  1473. return;
  1474. if( materialSortInfoArray )
  1475. {
  1476. Assert( MSurf_MaterialSortID( surfID ) >= 0 &&
  1477. MSurf_MaterialSortID( surfID ) < g_WorldStaticMeshes.Count() );
  1478. if (( materialSortInfoArray[MSurf_MaterialSortID( surfID )].lightmapPageID == MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE ) ||
  1479. ( materialSortInfoArray[MSurf_MaterialSortID( surfID )].lightmapPageID == MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP ) )
  1480. {
  1481. return;
  1482. }
  1483. }
  1484. bool bDlightsInLightmap = needsLightmap || needsBumpmap;
  1485. unsigned int dlightMask = R_UpdateDlightState( pLights, surfID, entityToWorld, bOnlyUseLightStyles, bDlightsInLightmap );
  1486. // update the state, but don't render any dlights if only lightstyles requested
  1487. if ( bOnlyUseLightStyles )
  1488. dlightMask = 0;
  1489. if ( !pCallQueue )
  1490. {
  1491. R_BuildLightMapGuts( pLights, surfID, entityToWorld, dlightMask, needsBumpmap, needsLightmap );
  1492. }
  1493. else
  1494. {
  1495. pCallQueue->QueueCall( R_BuildLightMapGuts, pLights, surfID, RefToVal( entityToWorld ), dlightMask, needsBumpmap, needsLightmap );
  1496. }
  1497. }
  1498. //-----------------------------------------------------------------------------
  1499. // Purpose: Save off the average light values, and dump the rest of the lightmap data.
  1500. // Can be used to save memory, at the expense of dynamic lights and lightstyles.
  1501. //-----------------------------------------------------------------------------
  1502. void CacheAndUnloadLightmapData()
  1503. {
  1504. Assert( !g_bHunkAllocLightmaps );
  1505. if ( g_bHunkAllocLightmaps )
  1506. {
  1507. // for safety, can't discard if lighting data is hunk allocated
  1508. return;
  1509. }
  1510. worldbrushdata_t *pBrushData = host_state.worldbrush;
  1511. msurfacelighting_t *pLighting = pBrushData->surfacelighting;
  1512. int numSurfaces = pBrushData->numsurfaces;
  1513. // This will allocate more data than necessary, but only 1-2K max
  1514. byte *pDestBase = (byte*)malloc( numSurfaces * MAXLIGHTMAPS * sizeof( ColorRGBExp32 ) );
  1515. byte *pDest = pDestBase;
  1516. for ( int i = 0; i < numSurfaces; ++i, ++pLighting )
  1517. {
  1518. int nStyleCt = 0;
  1519. for ( int map = 0 ; map < MAXLIGHTMAPS; ++map )
  1520. {
  1521. if ( pLighting->m_nStyles[map] != 255 )
  1522. ++nStyleCt;
  1523. }
  1524. const int nHdrBytes = nStyleCt * sizeof( ColorRGBExp32 );
  1525. byte *pHdr = (byte*)pLighting->m_pSamples - nHdrBytes;
  1526. // Copy just the 0-4 average color entries
  1527. Q_memcpy( pDest, pHdr, nHdrBytes );
  1528. // m_pSamples needs to stay pointing AFTER the average color data
  1529. // other code expects to back up and find it there
  1530. pDest += nHdrBytes;
  1531. pLighting->m_pSamples = (ColorRGBExp32*)pDest;
  1532. }
  1533. // discard previous and update the lightdata
  1534. DeallocateLightingData( host_state.worldbrush );
  1535. host_state.worldbrush->lightdata = (ColorRGBExp32*)pDestBase;
  1536. // track this specific hack
  1537. host_state.worldbrush->m_bUnloadedAllLightmaps = true;
  1538. }
  1539. class SurfaceLessFunc
  1540. {
  1541. public:
  1542. // ascending sort the lighting pointers
  1543. bool Less( const int &src1, const int &src2, void *pCtx )
  1544. {
  1545. msurfacelighting_t *pLighting = (msurfacelighting_t *)pCtx;
  1546. return ( ( ( uintp )pLighting[src1].m_pSamples ) < ( ( uintp )pLighting[src2].m_pSamples ) );
  1547. }
  1548. };
  1549. //-----------------------------------------------------------------------------
  1550. // All lightmaps should have been uploaded, can now compact portions of all
  1551. // the lighting data, fixup those surfaces, and decommit the unused portion
  1552. // of the lighting data.
  1553. //-----------------------------------------------------------------------------
  1554. void DiscardStaticLightmapData()
  1555. {
  1556. Assert( !g_bHunkAllocLightmaps );
  1557. if ( g_bHunkAllocLightmaps )
  1558. {
  1559. // for safety, can't discard if lighting data is hunk allocated
  1560. return;
  1561. }
  1562. worldbrushdata_t *pBrushData = host_state.worldbrush;
  1563. msurfacelighting_t *pLighting = pBrushData->surfacelighting;
  1564. int numSurfaces = pBrushData->numsurfaces;
  1565. if ( !numSurfaces || !pBrushData->m_pLightingDataStack )
  1566. return;
  1567. // sort all the surfaces lighting pointers
  1568. // want the pointers to be numerically ascending
  1569. int *pSurfaceIndexes = (int *)stackalloc( numSurfaces * sizeof( int ) );
  1570. CUtlSortVector< int, SurfaceLessFunc > surfaceSort( pSurfaceIndexes, numSurfaces );
  1571. surfaceSort.SetLessContext( pLighting );
  1572. for ( int i = 0; i < numSurfaces; i++ )
  1573. {
  1574. surfaceSort.InsertNoSort( i );
  1575. }
  1576. surfaceSort.RedoSort();
  1577. // for saftey, validate the pointers are sorted as expected, otherwise memory corruption
  1578. ColorRGBExp32 *pLast = pLighting[surfaceSort[0]].m_pSamples;
  1579. for ( int i = 1; i < numSurfaces; i++ )
  1580. {
  1581. ColorRGBExp32 *pCurrent = pLighting[surfaceSort[i]].m_pSamples;
  1582. if ( pCurrent && pLast && (uintp)pCurrent == (uintp)pLast )
  1583. {
  1584. // the lighting data pointers cannot be pointing to the same valid location
  1585. // abandon compaction, memory corruption would occur
  1586. DevMsg( "DiscardStaticLightmapData: Surface Lighting data aliased.\n" );
  1587. Assert( 0 );
  1588. return;
  1589. }
  1590. else if ( (uintp)pCurrent < (uintp)pLast )
  1591. {
  1592. // the lighting data pointers must be in ascending order
  1593. // abandon compaction, memory corruption would occur
  1594. DevMsg( "DiscardStaticLightmapData: Surface Lighting data out of order.\n" );
  1595. Assert( 0 );
  1596. return;
  1597. }
  1598. pLast = pCurrent;
  1599. }
  1600. // iterate through sorted surfaces, compacting surface lighting by shifting over discarded regions
  1601. ColorRGBExp32 *pTarget = pBrushData->lightdata;
  1602. for ( int i = 0; i < numSurfaces; i++ )
  1603. {
  1604. int nSortedIndex = surfaceSort[i];
  1605. SurfaceHandle_t surfID = SurfaceHandleFromIndex( nSortedIndex );
  1606. if ( !SurfHasLightmap( surfID ) )
  1607. {
  1608. // not a candidate
  1609. continue;
  1610. }
  1611. int offset = ComputeLightmapSize( surfID );
  1612. if ( SurfHasBumpedLightmaps( surfID ) )
  1613. {
  1614. offset *= ( NUM_BUMP_VECTS + 1 );
  1615. }
  1616. // count this surface's number of lightmaps
  1617. int nNumMaps;
  1618. for ( nNumMaps = 0; nNumMaps < MAXLIGHTMAPS && pLighting[nSortedIndex].m_nStyles[nNumMaps] != 255; nNumMaps++ )
  1619. {
  1620. }
  1621. if ( !nNumMaps )
  1622. {
  1623. // odd, marked for lightmaps, but no styles
  1624. // ignore
  1625. continue;
  1626. }
  1627. // account for the avgcolors
  1628. int nSurfaceLightSize = nNumMaps;
  1629. if ( nNumMaps > 1 && ( MSurf_Flags( surfID ) & SURFDRAW_HASLIGHTSYTLES ) )
  1630. {
  1631. // account for the lightmaps
  1632. nSurfaceLightSize += nNumMaps * offset;
  1633. }
  1634. // position the source properly
  1635. // the avgcolors are stored behind the lightmaps
  1636. ColorRGBExp32 *pSource = pLighting[nSortedIndex].m_pSamples - nNumMaps;
  1637. if ( pSource != pTarget )
  1638. {
  1639. memmove( pTarget, pSource, nSurfaceLightSize * sizeof( ColorRGBExp32 ) );
  1640. // fixup the surface to the new location
  1641. // the surface points to the data AFTER the avgcolors
  1642. pLighting[nSortedIndex].m_pSamples = pTarget + nNumMaps;
  1643. }
  1644. // advance past
  1645. pTarget += nSurfaceLightSize;
  1646. }
  1647. unsigned int nDynamicSize = size_cast< unsigned int >( (uintp)pTarget - (uintp)pBrushData->lightdata );
  1648. // shrink the original allocation in place
  1649. pBrushData->m_pLightingDataStack->FreeToAllocPoint( nDynamicSize );
  1650. const char *mapName = modelloader->GetName( host_state.worldmodel );
  1651. Msg( "(%s) Original Full Lighting Data: %.2f MB\n", mapName, (float)pBrushData->m_nLightingDataSize / ( 1024.0f * 1024.0f ) );
  1652. Msg( "(%s) Reduced To Only Dynamic Lighting Data: %.2f MB\n", mapName, (float)nDynamicSize / ( 1024.0f * 1024.0f ) );
  1653. }
  1654. //sorts the surfaces in place
  1655. static void SortSurfacesByLightmapID( SurfaceHandle_t *pToSort, int iSurfaceCount )
  1656. {
  1657. SurfaceHandle_t *pSortTemp = (SurfaceHandle_t *)stackalloc( sizeof( SurfaceHandle_t ) * iSurfaceCount );
  1658. //radix sort
  1659. for( int radix = 0; radix != 4; ++radix )
  1660. {
  1661. //swap the inputs for the next pass
  1662. {
  1663. SurfaceHandle_t *pTemp = pToSort;
  1664. pToSort = pSortTemp;
  1665. pSortTemp = pTemp;
  1666. }
  1667. int iCounts[256] = { 0 };
  1668. int iBitOffset = radix * 8;
  1669. for( int i = 0; i != iSurfaceCount; ++i )
  1670. {
  1671. uint8 val = (materialSortInfoArray[MSurf_MaterialSortID( pSortTemp[i] )].lightmapPageID >> iBitOffset) & 0xFF;
  1672. ++iCounts[val];
  1673. }
  1674. int iOffsetTable[256];
  1675. iOffsetTable[0] = 0;
  1676. for( int i = 0; i != 255; ++i )
  1677. {
  1678. iOffsetTable[i + 1] = iOffsetTable[i] + iCounts[i];
  1679. }
  1680. for( int i = 0; i != iSurfaceCount; ++i )
  1681. {
  1682. uint8 val = (materialSortInfoArray[MSurf_MaterialSortID( pSortTemp[i] )].lightmapPageID >> iBitOffset) & 0xFF;
  1683. int iWriteIndex = iOffsetTable[val];
  1684. pToSort[iWriteIndex] = pSortTemp[i];
  1685. ++iOffsetTable[val];
  1686. }
  1687. }
  1688. }
  1689. void R_RedownloadAllLightmaps()
  1690. {
  1691. #ifdef _DEBUG
  1692. static bool initializedBlockLights = false;
  1693. if ( !initializedBlockLights )
  1694. {
  1695. memset( &blocklights[0][0][0], 0, MAX_LIGHTMAP_DIM_INCLUDING_BORDER * MAX_LIGHTMAP_DIM_INCLUDING_BORDER * (NUM_BUMP_VECTS + 1) * sizeof( Vector ) );
  1696. initializedBlockLights = true;
  1697. }
  1698. #endif
  1699. double st = Sys_FloatTime();
  1700. if ( !host_state.worldbrush->m_bUnloadedAllLightmaps )
  1701. {
  1702. bool bOnlyUseLightStyles = false;
  1703. if ( r_dynamic.GetInt() == 0 || r_keepstyledlightmapsonly.GetBool() )
  1704. {
  1705. bOnlyUseLightStyles = true;
  1706. }
  1707. // Can't build lightmaps if the source data has been dumped
  1708. CMatRenderContextPtr pRenderContext( materials );
  1709. ICallQueue *pCallQueue = pRenderContext->GetCallQueue();
  1710. int iSurfaceCount = host_state.worldbrush->numsurfaces;
  1711. SurfaceHandle_t *pSortedSurfaces = (SurfaceHandle_t *)stackalloc( sizeof( SurfaceHandle_t ) * iSurfaceCount );
  1712. for( int surfaceIndex = 0; surfaceIndex < iSurfaceCount; surfaceIndex++ )
  1713. {
  1714. SurfaceHandle_t surfID = SurfaceHandleFromIndex( surfaceIndex );
  1715. pSortedSurfaces[surfaceIndex] = surfID;
  1716. }
  1717. SortSurfacesByLightmapID( pSortedSurfaces, iSurfaceCount ); //sorts in place, so now the array really is sorted
  1718. if( pCallQueue )
  1719. pCallQueue->QueueCall( materials, &IMaterialSystem::BeginUpdateLightmaps );
  1720. else
  1721. materials->BeginUpdateLightmaps();
  1722. matrix3x4_t xform;
  1723. SetIdentityMatrix(xform);
  1724. for( int surfaceIndex = 0; surfaceIndex < iSurfaceCount; surfaceIndex++ )
  1725. {
  1726. SurfaceHandle_t surfID = pSortedSurfaces[surfaceIndex];
  1727. ASSERT_SURF_VALID( surfID );
  1728. R_BuildLightMap( &cl_dlights[0], pCallQueue, surfID, xform, bOnlyUseLightStyles );
  1729. }
  1730. if( pCallQueue )
  1731. pCallQueue->QueueCall( materials, &IMaterialSystem::EndUpdateLightmaps );
  1732. else
  1733. materials->EndUpdateLightmaps();
  1734. if ( !g_bHunkAllocLightmaps )
  1735. {
  1736. if ( r_unloadlightmaps.GetInt() == 1 )
  1737. {
  1738. // Delete the lightmap data from memory
  1739. if ( !pCallQueue )
  1740. {
  1741. CacheAndUnloadLightmapData();
  1742. }
  1743. else
  1744. {
  1745. pCallQueue->QueueCall( CacheAndUnloadLightmapData );
  1746. }
  1747. }
  1748. else if ( r_keepstyledlightmapsonly.GetBool() )
  1749. {
  1750. if ( !pCallQueue )
  1751. {
  1752. DiscardStaticLightmapData();
  1753. }
  1754. else
  1755. {
  1756. pCallQueue->QueueCall( DiscardStaticLightmapData );
  1757. }
  1758. }
  1759. }
  1760. }
  1761. float elapsed = ( float )( Sys_FloatTime() - st ) * 1000.0;
  1762. DevMsg( "R_RedownloadAllLightmaps took %.3f msec!\n", elapsed );
  1763. g_RebuildLightmaps = false;
  1764. }
  1765. //-----------------------------------------------------------------------------
  1766. // Purpose: flag the lightmaps as needing to be rebuilt (gamma change)
  1767. //-----------------------------------------------------------------------------
  1768. bool g_RebuildLightmaps = false;
  1769. void GL_RebuildLightmaps( void )
  1770. {
  1771. g_RebuildLightmaps = true;
  1772. }
  1773. //-----------------------------------------------------------------------------
  1774. // Purpose: Update the in-RAM texture for the given surface's lightmap
  1775. // Input : *fa - surface pointer
  1776. //-----------------------------------------------------------------------------
  1777. #ifdef UPDATE_LIGHTSTYLES_EVERY_FRAME
  1778. ConVar mat_updatelightstyleseveryframe( "mat_updatelightstyleseveryframe", "0" );
  1779. #endif
  1780. int __cdecl LightmapPageCompareFunc( const void *pElem0, const void *pElem1 )
  1781. {
  1782. const LightmapUpdateInfo_t *pSurf0 = (const LightmapUpdateInfo_t *)pElem0;
  1783. const LightmapUpdateInfo_t *pSurf1 = (const LightmapUpdateInfo_t *)pElem1;
  1784. int page0 = materialSortInfoArray[MSurf_MaterialSortID( (pSurf0->m_SurfHandle) )].lightmapPageID;
  1785. int page1 = materialSortInfoArray[MSurf_MaterialSortID( (pSurf1->m_SurfHandle) )].lightmapPageID;
  1786. return page0 - page1;
  1787. }
  1788. void R_BuildLightmapUpdateList()
  1789. {
  1790. CMatRenderContextPtr pRenderContext( materials );
  1791. ICallQueue *pCallQueue = pRenderContext->GetCallQueue();
  1792. dlight_t *pLights = &cl_dlights[0];
  1793. // only do the copy when there are valid dlights to process and threading is on
  1794. if ( g_bActiveDlights && pCallQueue )
  1795. {
  1796. // keep a copy of the current dlight state around for the thread to work on
  1797. // in parallel. This way the main thread can continue to modify this state without
  1798. // generating any bad results
  1799. static dlight_t threadDlights[MAX_DLIGHTS*2];
  1800. static int threadFrameCount = 0;
  1801. pLights = &threadDlights[MAX_DLIGHTS*threadFrameCount];
  1802. Q_memcpy( pLights, cl_dlights, sizeof(dlight_t) * MAX_DLIGHTS );
  1803. threadFrameCount = (threadFrameCount+1) & 1;
  1804. }
  1805. qsort( g_LightmapUpdateList.Base(), g_LightmapUpdateList.Count(), sizeof(g_LightmapUpdateList.Element(0)), LightmapPageCompareFunc );
  1806. for ( int i = 0; i < g_LightmapUpdateList.Count(); i++ )
  1807. {
  1808. const LightmapUpdateInfo_t &info = g_LightmapUpdateList.Element(i);
  1809. if ( !pCallQueue )
  1810. {
  1811. R_BuildLightMapGuts( pLights, info.m_SurfHandle, g_LightmapTransformList[info.m_nTransformIndex].xform,
  1812. info.m_nDlightMask, info.m_bNeedsBumpmap, info.m_bNeedsLightmap );
  1813. }
  1814. else
  1815. {
  1816. pCallQueue->QueueCall( R_BuildLightMapGuts, pLights, info.m_SurfHandle, RefToVal( g_LightmapTransformList[info.m_nTransformIndex].xform ),
  1817. info.m_nDlightMask, info.m_bNeedsBumpmap, info.m_bNeedsLightmap );
  1818. }
  1819. }
  1820. }
  1821. void R_CheckForLightmapUpdates( SurfaceHandle_t surfID, int nTransformIndex )
  1822. {
  1823. msurfacelighting_t *pLighting = SurfaceLighting( surfID );
  1824. if ( pLighting->m_nLastComputedFrame != r_framecount )
  1825. {
  1826. int nFlags = MSurf_Flags( surfID );
  1827. if( nFlags & SURFDRAW_NOLIGHT )
  1828. return;
  1829. // check for lightmap modification
  1830. bool bChanged = false;
  1831. if( nFlags & SURFDRAW_HASLIGHTSYTLES )
  1832. {
  1833. #ifdef UPDATE_LIGHTSTYLES_EVERY_FRAME
  1834. if( mat_updatelightstyleseveryframe.GetBool() && ( pLighting->m_nStyles[0] != 0 || pLighting->m_nStyles[1] != 255 ) )
  1835. {
  1836. bChanged = true;
  1837. }
  1838. #endif
  1839. for( int maps = 0; maps < MAXLIGHTMAPS && pLighting->m_nStyles[maps] != 255; maps++ )
  1840. {
  1841. if( d_lightstyleframe[pLighting->m_nStyles[maps]] > pLighting->m_nLastComputedFrame )
  1842. {
  1843. bChanged = true;
  1844. break;
  1845. }
  1846. }
  1847. }
  1848. // was it dynamic this frame (pLighting->m_nDLightFrame == r_framecount)
  1849. // or dynamic previously (pLighting->m_fDLightBits)
  1850. bool bDLightChanged = ( pLighting->m_nDLightFrame == r_framecount ) || pLighting->m_fDLightBits;
  1851. bool bOnlyUseLightStyles = false;
  1852. if ( r_dynamic.GetInt() == 0 || r_keepstyledlightmapsonly.GetBool() )
  1853. {
  1854. bOnlyUseLightStyles = true;
  1855. }
  1856. else
  1857. {
  1858. bChanged |= bDLightChanged;
  1859. }
  1860. if ( bChanged )
  1861. {
  1862. bool bNeedsBumpmap = SurfNeedsBumpedLightmaps( surfID );
  1863. bool bNeedsLightmap = SurfNeedsLightmap( surfID );
  1864. if( !bNeedsBumpmap && !bNeedsLightmap )
  1865. return;
  1866. if( materialSortInfoArray )
  1867. {
  1868. int nSortID = MSurf_MaterialSortID( surfID );
  1869. Assert( nSortID >= 0 && nSortID < g_WorldStaticMeshes.Count() );
  1870. if (( materialSortInfoArray[nSortID].lightmapPageID == MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE ) ||
  1871. ( materialSortInfoArray[nSortID].lightmapPageID == MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP ) )
  1872. {
  1873. return;
  1874. }
  1875. }
  1876. bool bDlightsInLightmap = bNeedsLightmap || bNeedsBumpmap;
  1877. unsigned int nDlightMask = R_UpdateDlightState( cl_dlights, surfID, g_LightmapTransformList[nTransformIndex].xform, bOnlyUseLightStyles, bDlightsInLightmap );
  1878. int nIndex = g_LightmapUpdateList.AddToTail();
  1879. g_LightmapUpdateList[nIndex].m_SurfHandle = surfID;
  1880. g_LightmapUpdateList[nIndex].m_nTransformIndex = nTransformIndex;
  1881. g_LightmapUpdateList[nIndex].m_nDlightMask= nDlightMask;
  1882. g_LightmapUpdateList[nIndex].m_bNeedsLightmap = bNeedsLightmap;
  1883. g_LightmapUpdateList[nIndex].m_bNeedsBumpmap = bNeedsBumpmap;
  1884. }
  1885. }
  1886. }