Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2968 lines
96 KiB

  1. //========= Copyright � 1996-2008, Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. //=====================================================================================//
  6. #include "studiorender.h"
  7. #include "studio.h"
  8. #include "materialsystem/imesh.h"
  9. #include "materialsystem/imaterialsystemhardwareconfig.h"
  10. #include "materialsystem/imaterialvar.h"
  11. #include "materialsystem/imorph.h"
  12. #include "materialsystem/itexture.h"
  13. #include "materialsystem/imaterial.h"
  14. #include "optimize.h"
  15. #include "mathlib/mathlib.h"
  16. #include "mathlib/vector.h"
  17. #include "mathlib/vmatrix.h"
  18. #include "studiorendercontext.h"
  19. #include "tier2/tier2.h"
  20. #include "tier0/vprof.h"
  21. #include "filesystem.h"
  22. //#include "tier0/miniprofiler.h"
  23. //#define PROFILE_STUDIO VPROF
  24. #define PROFILE_STUDIO
  25. // memdbgon must be the last include file in a .cpp file!!!
  26. #include "tier0/memdbgon.h"
  27. typedef void (*SoftwareProcessMeshFunc_t)( const mstudio_meshvertexdata_t *, matrix3x4_t *pPoseToWorld,
  28. CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask,
  29. IMaterial *pMaterial);
  30. #define VERTEX_FORMAT_STANDARD MATERIAL_VERTEX_FORMAT_MODEL
  31. #define VERTEX_FORMAT_SUBDQUAD ( VERTEX_POSITION | VERTEX_NORMAL | VERTEX_USERDATA_SIZE( 4 ) | VERTEX_FORMAT_USE_EXACT_FORMAT | VERTEX_FORMAT_PAD_POS_NORM )
  32. //-----------------------------------------------------------------------------
  33. // Forward declarations
  34. //-----------------------------------------------------------------------------
  35. class IClientEntity;
  36. static int boxpnt[6][4] =
  37. {
  38. { 0, 4, 6, 2 }, // +X
  39. { 0, 1, 5, 4 }, // +Y
  40. { 0, 2, 3, 1 }, // +Z
  41. { 7, 5, 1, 3 }, // -X
  42. { 7, 3, 2, 6 }, // -Y
  43. { 7, 6, 4, 5 }, // -Z
  44. };
  45. static TableVector hullcolor[8] =
  46. {
  47. { 1.0, 1.0, 1.0 },
  48. { 1.0, 0.5, 0.5 },
  49. { 0.5, 1.0, 0.5 },
  50. { 1.0, 1.0, 0.5 },
  51. { 0.5, 0.5, 1.0 },
  52. { 1.0, 0.5, 1.0 },
  53. { 0.5, 1.0, 1.0 },
  54. { 1.0, 1.0, 1.0 }
  55. };
  56. //-----------------------------------------------------------------------------
  57. //
  58. //-----------------------------------------------------------------------------
  59. static unsigned int s_nTranslucentModelHullCache = 0;
  60. static unsigned int s_nSolidModelHullCache = 0;
  61. void CStudioRender::R_StudioDrawHulls( int hitboxset, bool translucent )
  62. {
  63. int i, j;
  64. // float lv;
  65. Vector tmp;
  66. Vector p[8];
  67. mstudiobbox_t *pbbox;
  68. IMaterialVar *colorVar;
  69. mstudiohitboxset_t *s = m_pStudioHdr->pHitboxSet( hitboxset );
  70. if ( !s )
  71. return;
  72. pbbox = s->pHitbox( 0 );
  73. if ( !pbbox )
  74. return;
  75. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  76. if( translucent )
  77. {
  78. pRenderContext->Bind( m_pMaterialTranslucentModelHulls );
  79. colorVar = m_pMaterialTranslucentModelHulls->FindVarFast( "$color", &s_nTranslucentModelHullCache );
  80. }
  81. else
  82. {
  83. pRenderContext->Bind( m_pMaterialSolidModelHulls );
  84. colorVar = m_pMaterialSolidModelHulls->FindVarFast( "$color", &s_nSolidModelHullCache );
  85. }
  86. for (i = 0; i < s->numhitboxes; i++)
  87. {
  88. for (j = 0; j < 8; j++)
  89. {
  90. tmp[0] = (j & 1) ? pbbox[i].bbmin[0] : pbbox[i].bbmax[0];
  91. tmp[1] = (j & 2) ? pbbox[i].bbmin[1] : pbbox[i].bbmax[1];
  92. tmp[2] = (j & 4) ? pbbox[i].bbmin[2] : pbbox[i].bbmax[2];
  93. VectorTransform( tmp, m_pBoneToWorld[pbbox[i].bone], p[j] );
  94. }
  95. j = (pbbox[i].group % 8);
  96. g_pMaterialSystem->Flush();
  97. if( colorVar )
  98. {
  99. if( translucent )
  100. {
  101. colorVar->SetVecValue( 0.2f * hullcolor[j].x, 0.2f * hullcolor[j].y, 0.2f * hullcolor[j].z );
  102. }
  103. else
  104. {
  105. colorVar->SetVecValue( hullcolor[j].x, hullcolor[j].y, hullcolor[j].z );
  106. }
  107. }
  108. for (j = 0; j < 6; j++)
  109. {
  110. #if 0
  111. tmp[0] = tmp[1] = tmp[2] = 0;
  112. tmp[j % 3] = (j < 3) ? 1.0 : -1.0;
  113. // R_StudioLighting( &lv, pbbox[i].bone, 0, tmp ); // BUG: not updated
  114. #endif
  115. IMesh* pMesh = pRenderContext->GetDynamicMesh();
  116. CMeshBuilder meshBuilder;
  117. meshBuilder.Begin( pMesh, MATERIAL_QUADS, 1 );
  118. for (int k = 0; k < 4; ++k)
  119. {
  120. meshBuilder.Position3fv( p[boxpnt[j][k]].Base() );
  121. meshBuilder.AdvanceVertex();
  122. }
  123. meshBuilder.End();
  124. pMesh->Draw();
  125. }
  126. }
  127. }
  128. void CStudioRender::R_StudioDrawBones (void)
  129. {
  130. int i, j, k;
  131. // float lv;
  132. Vector tmp;
  133. Vector p[8];
  134. Vector up, right, forward;
  135. Vector a1;
  136. const mstudiobone_t *pbones;
  137. Vector positionArray[4];
  138. pbones = m_pStudioHdr->pBone( 0 );
  139. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  140. for (i = 0; i < m_pStudioHdr->numbones; i++)
  141. {
  142. if (pbones[i].parent == -1)
  143. continue;
  144. k = pbones[i].parent;
  145. a1[0] = a1[1] = a1[2] = 1.0;
  146. up[0] = m_pBoneToWorld[i][0][3] - m_pBoneToWorld[k][0][3];
  147. up[1] = m_pBoneToWorld[i][1][3] - m_pBoneToWorld[k][1][3];
  148. up[2] = m_pBoneToWorld[i][2][3] - m_pBoneToWorld[k][2][3];
  149. if (up[0] > up[1])
  150. if (up[0] > up[2])
  151. a1[0] = 0.0;
  152. else
  153. a1[2] = 0.0;
  154. else
  155. if (up[1] > up[2])
  156. a1[1] = 0.0;
  157. else
  158. a1[2] = 0.0;
  159. CrossProduct( up, a1, right );
  160. VectorNormalize( right );
  161. CrossProduct( up, right, forward );
  162. VectorNormalize( forward );
  163. VectorScale( right, 2.0, right );
  164. VectorScale( forward, 2.0, forward );
  165. for (j = 0; j < 8; j++)
  166. {
  167. p[j][0] = m_pBoneToWorld[k][0][3];
  168. p[j][1] = m_pBoneToWorld[k][1][3];
  169. p[j][2] = m_pBoneToWorld[k][2][3];
  170. if (j & 1)
  171. {
  172. VectorSubtract( p[j], right, p[j] );
  173. }
  174. else
  175. {
  176. VectorAdd( p[j], right, p[j] );
  177. }
  178. if (j & 2)
  179. {
  180. VectorSubtract( p[j], forward, p[j] );
  181. }
  182. else
  183. {
  184. VectorAdd( p[j], forward, p[j] );
  185. }
  186. if (j & 4)
  187. {
  188. }
  189. else
  190. {
  191. VectorAdd( p[j], up, p[j] );
  192. }
  193. }
  194. VectorNormalize( up );
  195. VectorNormalize( right );
  196. VectorNormalize( forward );
  197. pRenderContext->Bind( m_pMaterialModelBones );
  198. for (j = 0; j < 6; j++)
  199. {
  200. switch( j)
  201. {
  202. case 0: VectorCopy( right, tmp ); break;
  203. case 1: VectorCopy( forward, tmp ); break;
  204. case 2: VectorCopy( up, tmp ); break;
  205. case 3: VectorScale( right, -1, tmp ); break;
  206. case 4: VectorScale( forward, -1, tmp ); break;
  207. case 5: VectorScale( up, -1, tmp ); break;
  208. }
  209. // R_StudioLighting( &lv, -1, 0, tmp ); // BUG: not updated
  210. IMesh* pMesh = pRenderContext->GetDynamicMesh();
  211. CMeshBuilder meshBuilder;
  212. meshBuilder.Begin( pMesh, MATERIAL_QUADS, 1 );
  213. for (int k = 0; k < 4; ++k)
  214. {
  215. meshBuilder.Position3fv( p[boxpnt[j][k]].Base() );
  216. meshBuilder.AdvanceVertex();
  217. }
  218. meshBuilder.End();
  219. pMesh->Draw();
  220. }
  221. }
  222. }
  223. int CStudioRender::R_StudioRenderModel( IMatRenderContext *pRenderContext, int skin,
  224. int body, int hitboxset, void /*IClientEntity*/ *pEntity,
  225. IMaterial **ppMaterials, int *pMaterialFlags, int flags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes )
  226. {
  227. VPROF("CStudioRender::R_StudioRenderModel");
  228. int nDrawGroup = flags & STUDIORENDER_DRAW_GROUP_MASK;
  229. if ( m_pRC->m_Config.drawEntities == 2 )
  230. {
  231. if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY )
  232. {
  233. R_StudioDrawBones( );
  234. }
  235. return 0;
  236. }
  237. if ( m_pRC->m_Config.drawEntities == 3 )
  238. {
  239. if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY )
  240. {
  241. R_StudioDrawHulls( hitboxset, false );
  242. }
  243. return 0;
  244. }
  245. // BUG: This method is crap, though less crap than before. It should just sort
  246. // the materials though it'll need to sort at render time as "skin"
  247. // can change what materials a given mesh may use
  248. int numFacesRendered = 0, numPasses = 0;
  249. // Build list of submodels
  250. BodyPartInfo_t *pBodyPartInfo = (BodyPartInfo_t*)stackalloc( m_pStudioHdr->numbodyparts * sizeof(BodyPartInfo_t) );
  251. for ( int i=0 ; i < m_pStudioHdr->numbodyparts; ++i )
  252. {
  253. pBodyPartInfo[i].m_nSubModelIndex = R_StudioSetupModel( i, body, &pBodyPartInfo[i].m_pSubModel, m_pStudioHdr );
  254. }
  255. // mark possible translucent meshes
  256. if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY )
  257. {
  258. // we're going to render the opaque meshes, so these will get counted in that pass
  259. m_bSkippedMeshes = false;
  260. m_bDrawTranslucentSubModels = false;
  261. numFacesRendered += R_StudioRenderFinal( pRenderContext, skin, m_pStudioHdr->numbodyparts, pBodyPartInfo,
  262. pEntity, ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes );
  263. numPasses++;
  264. }
  265. else
  266. {
  267. m_bSkippedMeshes = true;
  268. }
  269. if ( m_bSkippedMeshes && nDrawGroup != STUDIORENDER_DRAW_OPAQUE_ONLY )
  270. {
  271. m_bDrawTranslucentSubModels = true;
  272. numFacesRendered += R_StudioRenderFinal( pRenderContext, skin, m_pStudioHdr->numbodyparts, pBodyPartInfo,
  273. pEntity, ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes );
  274. numPasses++;
  275. }
  276. #ifndef _CERT
  277. static ConVarRef mat_rendered_faces_count( "mat_rendered_faces_count" );
  278. static ConVarRef mat_print_top_model_vert_counts( "mat_print_top_model_vert_counts" );
  279. if ( numPasses && ( mat_rendered_faces_count.GetBool() || mat_print_top_model_vert_counts.GetBool() ) )
  280. {
  281. // Each model counts how many rendered faces it accounts for each frame:
  282. m_pStudioHWData->UpdateFacesRenderedCount( m_pStudioHdr, m_ModelFaceCountHash, lod, 1, numFacesRendered );
  283. }
  284. #endif // !_CERT
  285. return numFacesRendered;
  286. }
  287. //-----------------------------------------------------------------------------
  288. // Generate morph accumulator
  289. //-----------------------------------------------------------------------------
  290. void CStudioRender::GenerateMorphAccumulator( mstudiomodel_t *pSubModel )
  291. {
  292. // Deal with all flexes
  293. // FIXME: HW Morphing doesn't work with translucent models yet
  294. if ( !m_pRC->m_Config.m_bEnableHWMorph || !m_pRC->m_Config.bFlex || m_bDrawTranslucentSubModels ||
  295. !g_pMaterialSystemHardwareConfig->HasFastVertexTextures() )
  296. return;
  297. int nActiveMeshCount = 0;
  298. mstudiomesh_t *ppMeshes[512];
  299. // First, build the list of meshes that need morphing
  300. for ( int i = 0; i < pSubModel->nummeshes; ++i )
  301. {
  302. mstudiomesh_t *pMesh = pSubModel->pMesh(i);
  303. studiomeshdata_t *pMeshData = &m_pStudioMeshes[pMesh->meshid];
  304. Assert( pMeshData );
  305. int nFlexCount = pMesh->numflexes;
  306. if ( !nFlexCount )
  307. continue;
  308. for ( int j = 0; j < pMeshData->m_NumGroup; ++j )
  309. {
  310. studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j];
  311. bool bIsDeltaFlexed = (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) != 0;
  312. if ( !bIsDeltaFlexed )
  313. continue;
  314. ppMeshes[nActiveMeshCount++] = pMesh;
  315. Assert( nActiveMeshCount < 512 );
  316. break;
  317. }
  318. }
  319. if ( nActiveMeshCount == 0 )
  320. return;
  321. // HACK - Just turn off scissor for this model if it is doing morph accumulation
  322. // DisableScissor();
  323. // Next, accumulate morphs for appropriate meshes
  324. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  325. pRenderContext->BeginMorphAccumulation();
  326. for ( int i = 0; i < nActiveMeshCount; ++i )
  327. {
  328. mstudiomesh_t *pMesh = ppMeshes[i];
  329. studiomeshdata_t *pMeshData = &m_pStudioMeshes[pMesh->meshid];
  330. int nFlexCount = pMesh->numflexes;
  331. MorphWeight_t *pWeights = (MorphWeight_t*)stackalloc( nFlexCount * sizeof(MorphWeight_t) );
  332. ComputeFlexWeights( nFlexCount, pMesh->pFlex(0), pWeights );
  333. for ( int j = 0; j < pMeshData->m_NumGroup; ++j )
  334. {
  335. studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j];
  336. if ( !pGroup->m_pMorph )
  337. continue;
  338. pRenderContext->AccumulateMorph( pGroup->m_pMorph, nFlexCount, pWeights );
  339. }
  340. }
  341. pRenderContext->EndMorphAccumulation();
  342. }
  343. //-----------------------------------------------------------------------------
  344. // Computes eyeball state
  345. //-----------------------------------------------------------------------------
  346. void CStudioRender::ComputeEyelidStateFACS( mstudiomodel_t *pSubModel )
  347. {
  348. for ( int j = 0; j < pSubModel->numeyeballs; j++ )
  349. {
  350. // FIXME: This might not be necessary...
  351. R_StudioEyeballPosition( pSubModel->pEyeball( j ), &m_pEyeballState[ j ] );
  352. R_StudioEyelidFACS( pSubModel->pEyeball(j), &m_pEyeballState[j] );
  353. }
  354. }
  355. /*
  356. ================
  357. R_StudioRenderFinal
  358. inputs:
  359. outputs: returns the number of triangles rendered.
  360. ================
  361. */
  362. int CStudioRender::R_StudioRenderFinal( IMatRenderContext *pRenderContext,
  363. int skin, int nBodyPartCount, BodyPartInfo_t *pBodyPartInfo, void /*IClientEntity*/ *pClientEntity,
  364. IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes )
  365. {
  366. VPROF("CStudioRender::R_StudioRenderFinal");
  367. int numFacesRendered = 0;
  368. for ( int i=0 ; i < nBodyPartCount; i++ )
  369. {
  370. m_pSubModel = pBodyPartInfo[i].m_pSubModel;
  371. // NOTE: This has to run here because it effects flex targets,
  372. // so therefore it must happen prior to GenerateMorphAccumulator.
  373. ComputeEyelidStateFACS( m_pSubModel );
  374. GenerateMorphAccumulator( m_pSubModel );
  375. // Set up SW flex
  376. m_VertexCache.SetBodyPart( i );
  377. m_VertexCache.SetModel( pBodyPartInfo[i].m_nSubModelIndex );
  378. numFacesRendered += R_StudioDrawPoints( pRenderContext, skin, pClientEntity,
  379. ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes );
  380. }
  381. return numFacesRendered;
  382. }
  383. ConVar r_flashlightscissor( "r_flashlightscissor", "0", FCVAR_MATERIAL_SYSTEM_THREAD );
  384. void CStudioRender::PushScissor( FlashlightState_t *state )
  385. {
  386. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  387. // Only scissor into the backbuffer
  388. if ( r_flashlightscissor.GetBool() && state->DoScissor() && ( pRenderContext->GetRenderTarget() == NULL ) )
  389. {
  390. pRenderContext->PushScissorRect( state->GetLeft(), state->GetTop(), state->GetRight(), state->GetBottom() );
  391. }
  392. }
  393. void CStudioRender::PopScissor( FlashlightState_t *state )
  394. {
  395. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  396. // Only scissor into the backbuffer
  397. if ( r_flashlightscissor.GetBool() && state->DoScissor() && ( pRenderContext->GetRenderTarget() == NULL ) )
  398. {
  399. pRenderContext->PopScissorRect();
  400. }
  401. }
  402. //-----------------------------------------------------------------------------
  403. // Draw shadows
  404. //-----------------------------------------------------------------------------
  405. void CStudioRender::DrawShadows( const DrawModelInfo_t& info, int flags, int boneMask )
  406. {
  407. if ( !m_ShadowState.Count() )
  408. return;
  409. VPROF("CStudioRender::DrawShadows");
  410. IMaterial* pForcedMat = m_pRC->m_pForcedMaterial[ 0 ];
  411. OverrideType_t nForcedType = m_pRC->m_nForcedMaterialType;
  412. // Here, we have to redraw the model one time for each flashlight
  413. // Having a material of NULL means that we are a light source.
  414. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  415. // Bail if we're using single-pass flashlight
  416. if ( IsGameConsole() || pRenderContext->SinglePassFlashlightModeEnabled() )
  417. return;
  418. pRenderContext->SetFlashlightMode( true );
  419. int i;
  420. for (i = 0; i < m_ShadowState.Count(); ++i )
  421. {
  422. if( !m_ShadowState[i].m_pMaterial )
  423. {
  424. Assert( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture );
  425. if ( ( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture ) )
  426. {
  427. pRenderContext->SetFlashlightStateEx( *m_ShadowState[i].m_pFlashlightState, *m_ShadowState[i].m_pWorldToTexture, m_ShadowState[i].m_pFlashlightDepthTexture );
  428. m_pCurrentFlashlight = m_ShadowState[i].m_pFlashlightState;
  429. PushScissor( m_ShadowState[i].m_pFlashlightState );
  430. R_StudioRenderModel( pRenderContext, info.m_Skin, info.m_Body, info.m_HitboxSet, info.m_pClientEntity,
  431. info.m_pHardwareData->m_pLODs[info.m_Lod].ppMaterials,
  432. info.m_pHardwareData->m_pLODs[info.m_Lod].pMaterialFlags, flags, boneMask, info.m_Lod, info.m_pColorMeshes );
  433. PopScissor( m_ShadowState[i].m_pFlashlightState );
  434. m_pCurrentFlashlight = NULL;
  435. }
  436. }
  437. }
  438. pRenderContext->SetFlashlightMode( false );
  439. // Here, we have to redraw the model one time for each shadow
  440. for (int i = 0; i < m_ShadowState.Count(); ++i )
  441. {
  442. if( m_ShadowState[i].m_pMaterial )
  443. {
  444. m_pRC->m_pForcedMaterial[ 0 ] = m_ShadowState[i].m_pMaterial;
  445. m_pRC->m_nForcedMaterialType = OVERRIDE_NORMAL;
  446. R_StudioRenderModel( pRenderContext, 0, info.m_Body, 0, m_ShadowState[i].m_pProxyData,
  447. NULL, NULL, flags, boneMask, info.m_Lod, NULL );
  448. }
  449. }
  450. // Restore the previous forced material
  451. m_pRC->m_pForcedMaterial[ 0 ] = pForcedMat;
  452. m_pRC->m_nForcedMaterialType = nForcedType;
  453. }
  454. void CStudioRender::DrawStaticPropShadows( const DrawModelInfo_t &info, const StudioRenderContext_t &rc, const matrix3x4_t& rootToWorld, int flags )
  455. {
  456. memcpy( &m_StaticPropRootToWorld, &rootToWorld, sizeof(matrix3x4_t) );
  457. memcpy( &m_PoseToWorld[0], &rootToWorld, sizeof(matrix3x4_t) );
  458. m_pRC = const_cast< StudioRenderContext_t* >( &rc );
  459. m_pBoneToWorld = &m_StaticPropRootToWorld;
  460. m_pStudioHdr = info.m_pStudioHdr;
  461. m_pStudioMeshes = info.m_pHardwareData->m_pLODs[info.m_Lod].m_pMeshData;
  462. m_pStudioHWData = info.m_pHardwareData;
  463. DrawShadows( info, flags, BONE_USED_BY_ANYTHING );
  464. m_pRC = NULL;
  465. m_pBoneToWorld = NULL;
  466. m_pStudioHdr = NULL;
  467. m_pStudioMeshes = NULL;
  468. m_pStudioHWData = NULL;
  469. }
  470. // Draw flashlight lighting on decals.
  471. void CStudioRender::DrawFlashlightDecals( const DrawModelInfo_t& info, int lod )
  472. {
  473. if ( !m_ShadowState.Count() || IsGameConsole() ) // game console implies single pass flashlight
  474. return;
  475. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  476. pRenderContext->SetFlashlightMode( true );
  477. int i;
  478. for (i = 0; i < m_ShadowState.Count(); ++i )
  479. {
  480. // This isn't clear. This means that this is a flashlight if the material is NULL. FLASHLIGHTFIXME
  481. if( !m_ShadowState[i].m_pMaterial )
  482. {
  483. Assert( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture );
  484. if ( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture )
  485. {
  486. pRenderContext->SetFlashlightStateEx( *m_ShadowState[i].m_pFlashlightState, *m_ShadowState[i].m_pWorldToTexture, m_ShadowState[i].m_pFlashlightDepthTexture );
  487. PushScissor( m_ShadowState[i].m_pFlashlightState );
  488. DrawDecal( info, lod, info.m_Body );
  489. PopScissor( m_ShadowState[i].m_pFlashlightState );
  490. }
  491. }
  492. }
  493. pRenderContext->SetFlashlightMode( false );
  494. }
  495. matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix )
  496. {
  497. float flWeight0, flWeight1, flWeight2;
  498. switch( boneweights.numbones )
  499. {
  500. default:
  501. case 1:
  502. return &pPoseToWorld[boneweights.bone[0]];
  503. case 2:
  504. {
  505. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  506. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  507. flWeight0 = boneweights.weight[0];
  508. flWeight1 = boneweights.weight[1];
  509. // NOTE: Inlining here seems to make a fair amount of difference
  510. scratchMatrix[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1;
  511. scratchMatrix[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1;
  512. scratchMatrix[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1;
  513. scratchMatrix[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1;
  514. scratchMatrix[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1;
  515. scratchMatrix[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1;
  516. scratchMatrix[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1;
  517. scratchMatrix[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1;
  518. scratchMatrix[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1;
  519. scratchMatrix[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1;
  520. scratchMatrix[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1;
  521. scratchMatrix[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1;
  522. }
  523. return &scratchMatrix;
  524. case 3:
  525. {
  526. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  527. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  528. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  529. flWeight0 = boneweights.weight[0];
  530. flWeight1 = boneweights.weight[1];
  531. flWeight2 = boneweights.weight[2];
  532. scratchMatrix[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1 + boneMat2[0][0] * flWeight2;
  533. scratchMatrix[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1 + boneMat2[0][1] * flWeight2;
  534. scratchMatrix[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1 + boneMat2[0][2] * flWeight2;
  535. scratchMatrix[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1 + boneMat2[0][3] * flWeight2;
  536. scratchMatrix[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1 + boneMat2[1][0] * flWeight2;
  537. scratchMatrix[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1 + boneMat2[1][1] * flWeight2;
  538. scratchMatrix[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1 + boneMat2[1][2] * flWeight2;
  539. scratchMatrix[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1 + boneMat2[1][3] * flWeight2;
  540. scratchMatrix[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1 + boneMat2[2][0] * flWeight2;
  541. scratchMatrix[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1 + boneMat2[2][1] * flWeight2;
  542. scratchMatrix[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1 + boneMat2[2][2] * flWeight2;
  543. scratchMatrix[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1 + boneMat2[2][3] * flWeight2;
  544. }
  545. return &scratchMatrix;
  546. case 4:
  547. {
  548. Assert( 0 ); // results undefined for numbones == 4, as MAX_NUM_BONES_PER_VERT is 3
  549. }
  550. return &scratchMatrix;
  551. }
  552. Assert(0);
  553. return NULL;
  554. }
  555. static void ComputeSkinMatrixToMemory( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result )
  556. {
  557. float flWeight0, flWeight1, flWeight2;
  558. switch( boneweights.numbones )
  559. {
  560. default:
  561. case 1:
  562. memcpy( &result, &pPoseToWorld[boneweights.bone[0]], sizeof(matrix3x4_t) );
  563. return;
  564. case 2:
  565. {
  566. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  567. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  568. flWeight0 = boneweights.weight[0];
  569. flWeight1 = boneweights.weight[1];
  570. // NOTE: Inlining here seems to make a fair amount of difference
  571. result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1;
  572. result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1;
  573. result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1;
  574. result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1;
  575. result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1;
  576. result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1;
  577. result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1;
  578. result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1;
  579. result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1;
  580. result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1;
  581. result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1;
  582. result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1;
  583. }
  584. return;
  585. case 3:
  586. {
  587. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  588. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  589. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  590. flWeight0 = boneweights.weight[0];
  591. flWeight1 = boneweights.weight[1];
  592. flWeight2 = boneweights.weight[2];
  593. result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1 + boneMat2[0][0] * flWeight2;
  594. result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1 + boneMat2[0][1] * flWeight2;
  595. result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1 + boneMat2[0][2] * flWeight2;
  596. result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1 + boneMat2[0][3] * flWeight2;
  597. result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1 + boneMat2[1][0] * flWeight2;
  598. result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1 + boneMat2[1][1] * flWeight2;
  599. result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1 + boneMat2[1][2] * flWeight2;
  600. result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1 + boneMat2[1][3] * flWeight2;
  601. result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1 + boneMat2[2][0] * flWeight2;
  602. result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1 + boneMat2[2][1] * flWeight2;
  603. result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1 + boneMat2[2][2] * flWeight2;
  604. result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1 + boneMat2[2][3] * flWeight2;
  605. }
  606. return;
  607. case 4:
  608. {
  609. Assert( 0 ); // results undefined for numbones == 4, as MAX_NUM_BONES_PER_VERT is 3
  610. }
  611. return;
  612. }
  613. Assert(0);
  614. }
  615. void ComputeSkinMatrixToMemorySSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result )
  616. {
  617. // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization
  618. #if defined( _WIN32 ) && !defined( _WIN64 ) && !defined( _X360 )
  619. switch( boneweights.numbones )
  620. {
  621. default:
  622. case 1:
  623. memcpy( &result, &pPoseToWorld[boneweights.bone[0]], sizeof(matrix3x4_t) );
  624. return;
  625. case 2:
  626. {
  627. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  628. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  629. float *pWeights = boneweights.weight;
  630. _asm
  631. {
  632. mov eax, DWORD PTR [pWeights]
  633. movss xmm6, dword ptr[eax] ; boneweights.weight[0]
  634. movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1]
  635. mov eax, DWORD PTR [boneMat0]
  636. mov ecx, DWORD PTR [boneMat1]
  637. mov edi, DWORD PTR [result]
  638. // Fill xmm6, and 7 with all the bone weights
  639. shufps xmm6, xmm6, 0
  640. shufps xmm7, xmm7, 0
  641. // Load up all rows of the three matrices
  642. movaps xmm0, XMMWORD PTR [eax]
  643. movaps xmm1, XMMWORD PTR [ecx]
  644. movaps xmm2, XMMWORD PTR [eax + 16]
  645. movaps xmm3, XMMWORD PTR [ecx + 16]
  646. movaps xmm4, XMMWORD PTR [eax + 32]
  647. movaps xmm5, XMMWORD PTR [ecx + 32]
  648. // Multiply the rows by the weights
  649. mulps xmm0, xmm6
  650. mulps xmm1, xmm7
  651. mulps xmm2, xmm6
  652. mulps xmm3, xmm7
  653. mulps xmm4, xmm6
  654. mulps xmm5, xmm7
  655. addps xmm0, xmm1
  656. addps xmm2, xmm3
  657. addps xmm4, xmm5
  658. movaps XMMWORD PTR [edi], xmm0
  659. movaps XMMWORD PTR [edi + 16], xmm2
  660. movaps XMMWORD PTR [edi + 32], xmm4
  661. }
  662. }
  663. case 3:
  664. {
  665. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  666. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  667. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  668. float *pWeights = boneweights.weight;
  669. _asm
  670. {
  671. mov eax, DWORD PTR [pWeights]
  672. movss xmm5, dword ptr[eax] ; boneweights.weight[0]
  673. movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1]
  674. movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2]
  675. mov eax, DWORD PTR [boneMat0]
  676. mov ecx, DWORD PTR [boneMat1]
  677. mov edx, DWORD PTR [boneMat2]
  678. mov edi, DWORD PTR [result]
  679. // Fill xmm5, 6, and 7 with all the bone weights
  680. shufps xmm5, xmm5, 0
  681. shufps xmm6, xmm6, 0
  682. shufps xmm7, xmm7, 0
  683. // Load up the first row of the three matrices
  684. movaps xmm0, XMMWORD PTR [eax]
  685. movaps xmm1, XMMWORD PTR [ecx]
  686. movaps xmm2, XMMWORD PTR [edx]
  687. // Multiply the rows by the weights
  688. mulps xmm0, xmm5
  689. mulps xmm1, xmm6
  690. mulps xmm2, xmm7
  691. addps xmm0, xmm1
  692. addps xmm0, xmm2
  693. movaps XMMWORD PTR [edi], xmm0
  694. // Load up the second row of the three matrices
  695. movaps xmm0, XMMWORD PTR [eax + 16]
  696. movaps xmm1, XMMWORD PTR [ecx + 16]
  697. movaps xmm2, XMMWORD PTR [edx + 16]
  698. // Multiply the rows by the weights
  699. mulps xmm0, xmm5
  700. mulps xmm1, xmm6
  701. mulps xmm2, xmm7
  702. addps xmm0, xmm1
  703. addps xmm0, xmm2
  704. movaps XMMWORD PTR [edi + 16], xmm0
  705. // Load up the third row of the three matrices
  706. movaps xmm0, XMMWORD PTR [eax + 32]
  707. movaps xmm1, XMMWORD PTR [ecx + 32]
  708. movaps xmm2, XMMWORD PTR [edx + 32]
  709. // Multiply the rows by the weights
  710. mulps xmm0, xmm5
  711. mulps xmm1, xmm6
  712. mulps xmm2, xmm7
  713. addps xmm0, xmm1
  714. addps xmm0, xmm2
  715. movaps XMMWORD PTR [edi + 32], xmm0
  716. }
  717. }
  718. case 4:
  719. {
  720. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  721. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  722. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  723. matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]];
  724. float *pWeights = boneweights.weight;
  725. _asm
  726. {
  727. mov eax, DWORD PTR [pWeights]
  728. movss xmm4, dword ptr[eax] ; boneweights.weight[0]
  729. movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1]
  730. movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2]
  731. movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3]
  732. mov eax, DWORD PTR [boneMat0]
  733. mov ecx, DWORD PTR [boneMat1]
  734. mov edx, DWORD PTR [boneMat2]
  735. mov esi, DWORD PTR [boneMat3]
  736. mov edi, DWORD PTR [result]
  737. // Fill xmm5, 6, and 7 with all the bone weights
  738. shufps xmm4, xmm4, 0
  739. shufps xmm5, xmm5, 0
  740. shufps xmm6, xmm6, 0
  741. shufps xmm7, xmm7, 0
  742. // Load up the first row of the four matrices
  743. movaps xmm0, XMMWORD PTR [eax]
  744. movaps xmm1, XMMWORD PTR [ecx]
  745. movaps xmm2, XMMWORD PTR [edx]
  746. movaps xmm3, XMMWORD PTR [esi]
  747. // Multiply the rows by the weights
  748. mulps xmm0, xmm4
  749. mulps xmm1, xmm5
  750. mulps xmm2, xmm6
  751. mulps xmm3, xmm7
  752. addps xmm0, xmm1
  753. addps xmm2, xmm3
  754. addps xmm0, xmm2
  755. movaps XMMWORD PTR [edi], xmm0
  756. // Load up the second row of the three matrices
  757. movaps xmm0, XMMWORD PTR [eax + 16]
  758. movaps xmm1, XMMWORD PTR [ecx + 16]
  759. movaps xmm2, XMMWORD PTR [edx + 16]
  760. movaps xmm3, XMMWORD PTR [esi + 16]
  761. // Multiply the rows by the weights
  762. mulps xmm0, xmm4
  763. mulps xmm1, xmm5
  764. mulps xmm2, xmm6
  765. mulps xmm3, xmm7
  766. addps xmm0, xmm1
  767. addps xmm2, xmm3
  768. addps xmm0, xmm2
  769. movaps XMMWORD PTR [edi + 16], xmm0
  770. // Load up the third row of the three matrices
  771. movaps xmm0, XMMWORD PTR [eax + 32]
  772. movaps xmm1, XMMWORD PTR [ecx + 32]
  773. movaps xmm2, XMMWORD PTR [edx + 32]
  774. movaps xmm3, XMMWORD PTR [esi + 32]
  775. // Multiply the rows by the weights
  776. mulps xmm0, xmm4
  777. mulps xmm1, xmm5
  778. mulps xmm2, xmm6
  779. mulps xmm3, xmm7
  780. addps xmm0, xmm1
  781. addps xmm2, xmm3
  782. addps xmm0, xmm2
  783. movaps XMMWORD PTR [edi + 32], xmm0
  784. }
  785. }
  786. }
  787. #elif POSIX || _WIN64
  788. ComputeSkinMatrixToMemory( boneweights, pPoseToWorld, result );
  789. #elif defined( _X360 )
  790. ComputeSkinMatrixToMemory( boneweights, pPoseToWorld, result );
  791. #endif
  792. }
  793. matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix )
  794. {
  795. // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization
  796. #if defined( _WIN32 ) && !defined( _WIN64 ) && !defined( _X360 )
  797. switch( boneweights.numbones )
  798. {
  799. default:
  800. case 1:
  801. return &pPoseToWorld[boneweights.bone[0]];
  802. case 2:
  803. {
  804. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  805. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  806. float *pWeights = boneweights.weight;
  807. _asm
  808. {
  809. mov eax, DWORD PTR [pWeights]
  810. movss xmm6, dword ptr[eax] ; boneweights.weight[0]
  811. movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1]
  812. mov eax, DWORD PTR [boneMat0]
  813. mov ecx, DWORD PTR [boneMat1]
  814. mov edi, DWORD PTR [scratchMatrix]
  815. // Fill xmm6, and 7 with all the bone weights
  816. shufps xmm6, xmm6, 0
  817. shufps xmm7, xmm7, 0
  818. // Load up all rows of the three matrices
  819. movaps xmm0, XMMWORD PTR [eax]
  820. movaps xmm1, XMMWORD PTR [ecx]
  821. movaps xmm2, XMMWORD PTR [eax + 16]
  822. movaps xmm3, XMMWORD PTR [ecx + 16]
  823. movaps xmm4, XMMWORD PTR [eax + 32]
  824. movaps xmm5, XMMWORD PTR [ecx + 32]
  825. // Multiply the rows by the weights
  826. mulps xmm0, xmm6
  827. mulps xmm1, xmm7
  828. mulps xmm2, xmm6
  829. mulps xmm3, xmm7
  830. mulps xmm4, xmm6
  831. mulps xmm5, xmm7
  832. addps xmm0, xmm1
  833. addps xmm2, xmm3
  834. addps xmm4, xmm5
  835. movaps XMMWORD PTR [edi], xmm0
  836. movaps XMMWORD PTR [edi + 16], xmm2
  837. movaps XMMWORD PTR [edi + 32], xmm4
  838. }
  839. }
  840. return &scratchMatrix;
  841. case 3:
  842. {
  843. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  844. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  845. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  846. float *pWeights = boneweights.weight;
  847. _asm
  848. {
  849. mov eax, DWORD PTR [pWeights]
  850. movss xmm5, dword ptr[eax] ; boneweights.weight[0]
  851. movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1]
  852. movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2]
  853. mov eax, DWORD PTR [boneMat0]
  854. mov ecx, DWORD PTR [boneMat1]
  855. mov edx, DWORD PTR [boneMat2]
  856. mov edi, DWORD PTR [scratchMatrix]
  857. // Fill xmm5, 6, and 7 with all the bone weights
  858. shufps xmm5, xmm5, 0
  859. shufps xmm6, xmm6, 0
  860. shufps xmm7, xmm7, 0
  861. // Load up the first row of the three matrices
  862. movaps xmm0, XMMWORD PTR [eax]
  863. movaps xmm1, XMMWORD PTR [ecx]
  864. movaps xmm2, XMMWORD PTR [edx]
  865. // Multiply the rows by the weights
  866. mulps xmm0, xmm5
  867. mulps xmm1, xmm6
  868. mulps xmm2, xmm7
  869. addps xmm0, xmm1
  870. addps xmm0, xmm2
  871. movaps XMMWORD PTR [edi], xmm0
  872. // Load up the second row of the three matrices
  873. movaps xmm0, XMMWORD PTR [eax + 16]
  874. movaps xmm1, XMMWORD PTR [ecx + 16]
  875. movaps xmm2, XMMWORD PTR [edx + 16]
  876. // Multiply the rows by the weights
  877. mulps xmm0, xmm5
  878. mulps xmm1, xmm6
  879. mulps xmm2, xmm7
  880. addps xmm0, xmm1
  881. addps xmm0, xmm2
  882. movaps XMMWORD PTR [edi + 16], xmm0
  883. // Load up the third row of the three matrices
  884. movaps xmm0, XMMWORD PTR [eax + 32]
  885. movaps xmm1, XMMWORD PTR [ecx + 32]
  886. movaps xmm2, XMMWORD PTR [edx + 32]
  887. // Multiply the rows by the weights
  888. mulps xmm0, xmm5
  889. mulps xmm1, xmm6
  890. mulps xmm2, xmm7
  891. addps xmm0, xmm1
  892. addps xmm0, xmm2
  893. movaps XMMWORD PTR [edi + 32], xmm0
  894. }
  895. }
  896. return &scratchMatrix;
  897. case 4:
  898. {
  899. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  900. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  901. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  902. matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]];
  903. float *pWeights = boneweights.weight;
  904. _asm
  905. {
  906. mov eax, DWORD PTR [pWeights]
  907. movss xmm4, dword ptr[eax] ; boneweights.weight[0]
  908. movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1]
  909. movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2]
  910. movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3]
  911. mov eax, DWORD PTR [boneMat0]
  912. mov ecx, DWORD PTR [boneMat1]
  913. mov edx, DWORD PTR [boneMat2]
  914. mov esi, DWORD PTR [boneMat3]
  915. mov edi, DWORD PTR [scratchMatrix]
  916. // Fill xmm5, 6, and 7 with all the bone weights
  917. shufps xmm4, xmm4, 0
  918. shufps xmm5, xmm5, 0
  919. shufps xmm6, xmm6, 0
  920. shufps xmm7, xmm7, 0
  921. // Load up the first row of the four matrices
  922. movaps xmm0, XMMWORD PTR [eax]
  923. movaps xmm1, XMMWORD PTR [ecx]
  924. movaps xmm2, XMMWORD PTR [edx]
  925. movaps xmm3, XMMWORD PTR [esi]
  926. // Multiply the rows by the weights
  927. mulps xmm0, xmm4
  928. mulps xmm1, xmm5
  929. mulps xmm2, xmm6
  930. mulps xmm3, xmm7
  931. addps xmm0, xmm1
  932. addps xmm2, xmm3
  933. addps xmm0, xmm2
  934. movaps XMMWORD PTR [edi], xmm0
  935. // Load up the second row of the three matrices
  936. movaps xmm0, XMMWORD PTR [eax + 16]
  937. movaps xmm1, XMMWORD PTR [ecx + 16]
  938. movaps xmm2, XMMWORD PTR [edx + 16]
  939. movaps xmm3, XMMWORD PTR [esi + 16]
  940. // Multiply the rows by the weights
  941. mulps xmm0, xmm4
  942. mulps xmm1, xmm5
  943. mulps xmm2, xmm6
  944. mulps xmm3, xmm7
  945. addps xmm0, xmm1
  946. addps xmm2, xmm3
  947. addps xmm0, xmm2
  948. movaps XMMWORD PTR [edi + 16], xmm0
  949. // Load up the third row of the three matrices
  950. movaps xmm0, XMMWORD PTR [eax + 32]
  951. movaps xmm1, XMMWORD PTR [ecx + 32]
  952. movaps xmm2, XMMWORD PTR [edx + 32]
  953. movaps xmm3, XMMWORD PTR [esi + 32]
  954. // Multiply the rows by the weights
  955. mulps xmm0, xmm4
  956. mulps xmm1, xmm5
  957. mulps xmm2, xmm6
  958. mulps xmm3, xmm7
  959. addps xmm0, xmm1
  960. addps xmm2, xmm3
  961. addps xmm0, xmm2
  962. movaps XMMWORD PTR [edi + 32], xmm0
  963. }
  964. }
  965. return &scratchMatrix;
  966. }
  967. #else
  968. #ifndef LINUX
  969. #pragma message("ComputeSkinMatrixSSE C implementation only")
  970. #endif
  971. return ComputeSkinMatrix( boneweights, pPoseToWorld, scratchMatrix );
  972. #endif
  973. Assert( 0 );
  974. return NULL;
  975. }
  976. //-----------------------------------------------------------------------------
  977. // Designed for inter-module draw optimized calling, requires R_InitLightEffectWorld3()
  978. // Compute the lighting at a point and normal
  979. // Uses the set function pointer
  980. // Final lighting is in gamma space
  981. //-----------------------------------------------------------------------------
  982. static lightpos_t lightpos[MAXLOCALLIGHTS];
  983. inline void CStudioRender::R_ComputeLightAtPoint3( const Vector &pos, const Vector &normal, Vector &color )
  984. {
  985. if ( m_pRC->m_Config.fullbright )
  986. {
  987. color.Init( 1.0f, 1.0f, 1.0f );
  988. return;
  989. }
  990. // Set up lightpos[i].dot, lightpos[i].falloff, and lightpos[i].delta for all lights
  991. R_LightStrengthWorld( pos, m_pRC->m_NumLocalLights, m_pRC->m_LocalLights, lightpos );
  992. // calculate ambient values from the ambient cube given a normal.
  993. R_LightAmbient_4D( normal, m_pRC->m_LightBoxColors, color );
  994. // Calculate color given lightpos_t lightpos, a normal, and the ambient
  995. // color from the ambient cube calculated above.
  996. Assert(R_LightEffectsWorld3);
  997. R_LightEffectsWorld3( m_pRC->m_LocalLights, lightpos, normal, color );
  998. }
  999. // define SPECIAL_SSE_MESH_PROCESSOR to enable code which contains a special optimized SSE lighting loop, significantly
  1000. // improving software vertex processing performace.
  1001. #if defined( _WIN32 ) && !defined( _X360 )
  1002. #define SPECIAL_SSE_MESH_PROCESSOR
  1003. #endif
  1004. #ifdef SPECIAL_SSE_MESH_PROCESSOR
  1005. //#define VERIFY_SSE_LIGHTING
  1006. // false: MAX(0,L*N) true: .5*(L.N)+.5. set based on material
  1007. static bool SSELightingHalfLambert;
  1008. // These variables are used by the special SSE lighting path. The
  1009. // lighting path calculates them everytime it processes a mesh so their
  1010. // is no need to keep them in sync with changes to the other light variables
  1011. static fltx4 OneOver_ThetaDot_Minus_PhiDot[MAXLOCALLIGHTS]; // 1/(theta-phi)
  1012. void CStudioRender::R_MouthLighting( fltx4 fIllum, const FourVectors& normal, const FourVectors& forward, FourVectors &light )
  1013. {
  1014. fltx4 dot = SubSIMD(Four_Zeros,normal*forward);
  1015. dot=MaxSIMD(Four_Zeros,dot);
  1016. dot=MulSIMD(fIllum,dot);
  1017. light *= dot;
  1018. }
  1019. inline void CStudioRender::R_ComputeLightAtPoints3( const FourVectors &pos, const FourVectors &normal, FourVectors &color )
  1020. {
  1021. if ( m_pRC->m_Config.fullbright )
  1022. {
  1023. color.DuplicateVector( Vector( 1.0f, 1.0f, 1.0f ) );
  1024. return;
  1025. }
  1026. R_LightAmbient_4D( normal, m_pRC->m_LightBoxColors, color );
  1027. // now, add in contribution from all lights
  1028. for ( int i = 0; i < m_pRC->m_NumLocalLights; i++)
  1029. {
  1030. FourVectors delta;
  1031. LightDesc_t const *wl = m_pRC->m_LocalLights+i;
  1032. Assert((wl->m_Type==MATERIAL_LIGHT_POINT) || (wl->m_Type==MATERIAL_LIGHT_SPOT) || (wl->m_Type==MATERIAL_LIGHT_DIRECTIONAL));
  1033. switch (wl->m_Type)
  1034. {
  1035. case MATERIAL_LIGHT_POINT:
  1036. case MATERIAL_LIGHT_SPOT:
  1037. delta.DuplicateVector(wl->m_Position);
  1038. delta-=pos;
  1039. break;
  1040. case MATERIAL_LIGHT_DIRECTIONAL:
  1041. delta.DuplicateVector(wl->m_Direction);
  1042. delta*=-1.0;
  1043. break;
  1044. }
  1045. fltx4 falloff = R_WorldLightDistanceFalloff( wl, delta);
  1046. delta.VectorNormalizeFast();
  1047. fltx4 strength=delta*normal;
  1048. if (SSELightingHalfLambert)
  1049. {
  1050. strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives);
  1051. }
  1052. else
  1053. strength=MaxSIMD(Four_Zeros,delta*normal);
  1054. switch(wl->m_Type)
  1055. {
  1056. case MATERIAL_LIGHT_POINT:
  1057. // half-lambert
  1058. break;
  1059. case MATERIAL_LIGHT_SPOT:
  1060. {
  1061. fltx4 dot2=SubSIMD(Four_Zeros,delta*wl->m_Direction); // dot position with spot light dir for cone falloff
  1062. fltx4 cone_falloff_scale=MulSIMD(OneOver_ThetaDot_Minus_PhiDot[i],
  1063. SubSIMD(dot2,ReplicateX4(wl->m_PhiDot)));
  1064. cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones);
  1065. if ((wl->m_Falloff!=0.0) && (wl->m_Falloff!=1.0))
  1066. {
  1067. // !!speed!! could compute integer exponent needed by powsimd and store in light
  1068. cone_falloff_scale=PowSIMD(cone_falloff_scale,wl->m_Falloff);
  1069. }
  1070. strength=MulSIMD(cone_falloff_scale,strength);
  1071. // now, zero out lighting where dot2<phidot. This will mask out any invalid results
  1072. // from pow function, etc
  1073. fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(wl->m_PhiDot)); // outside light cone?
  1074. strength=AndSIMD(OutsideMask,strength);
  1075. }
  1076. break;
  1077. case MATERIAL_LIGHT_DIRECTIONAL:
  1078. break;
  1079. }
  1080. strength=MulSIMD(strength,falloff);
  1081. color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(wl->m_Color.x)));
  1082. color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(wl->m_Color.y)));
  1083. color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(wl->m_Color.z)));
  1084. }
  1085. }
  1086. #endif // SPECIAL_SSE_MESH_PROCESSOR
  1087. //-----------------------------------------------------------------------------
  1088. // Optimized for low-end hardware
  1089. //-----------------------------------------------------------------------------
  1090. #pragma warning (disable:4701)
  1091. // NOTE: I'm using this crazy wrapper because using straight template functions
  1092. // doesn't appear to work with function tables
  1093. template< int nHasTangentSpace, int nDoFlex, int nLighting >
  1094. class CProcessMeshWrapper
  1095. {
  1096. public:
  1097. static void R_TransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, const Vector4D *pSrcTangentS,
  1098. matrix3x4_t *pSkinMat, VectorAligned &pos, Vector &norm, Vector4DAligned &tangentS )
  1099. {
  1100. // NOTE: Could add SSE stuff here, if we knew what SSE stuff could make it faster
  1101. pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3];
  1102. norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2];
  1103. pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3];
  1104. norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2];
  1105. pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3];
  1106. norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2];
  1107. if ( nHasTangentSpace )
  1108. {
  1109. tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2];
  1110. tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2];
  1111. tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2];
  1112. tangentS.w = pSrcTangentS->w;
  1113. }
  1114. }
  1115. static void R_StudioSoftwareProcessMesh( const mstudio_meshvertexdata_t *vertData, matrix3x4_t *pPoseToWorld,
  1116. CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask,
  1117. IMaterial* pMaterial)
  1118. {
  1119. Vector color;
  1120. Vector4D *pStudioTangentS;
  1121. Vector4DAligned tangentS;
  1122. Vector *pSrcPos;
  1123. Vector *pSrcNorm;
  1124. Vector4D *pSrcTangentS = NULL;
  1125. ALIGN16 ModelVertexDX8_t dstVertex;
  1126. dstVertex.m_vecUserData.Init( 1.0f, 0.0f, 0.0f, 1.0f );
  1127. ALIGN16 matrix3x4_t temp;
  1128. ALIGN16 matrix3x4_t *pSkinMat;
  1129. int ntemp[PREFETCH_VERT_COUNT];
  1130. Assert( numVertices > 0 );
  1131. mstudiovertex_t *pVertices = vertData->Vertex( 0 );
  1132. if (nHasTangentSpace)
  1133. {
  1134. pStudioTangentS = vertData->TangentS( 0 );
  1135. Assert( pStudioTangentS->w == -1.0f || pStudioTangentS->w == 1.0f );
  1136. }
  1137. // Mouth related stuff...
  1138. float fIllum = 1.0f;
  1139. Vector forward;
  1140. if (nLighting == LIGHTING_MOUTH)
  1141. {
  1142. g_StudioRender.R_MouthComputeLightingValues( fIllum, forward );
  1143. }
  1144. if ((nLighting == LIGHTING_MOUTH) || (nLighting == LIGHTING_SOFTWARE))
  1145. {
  1146. g_StudioRender.R_InitLightEffectsWorld3();
  1147. }
  1148. #ifdef _DEBUG
  1149. // In debug, clear it out to ensure we aren't accidentially calling
  1150. // the last setup for R_ComputeLightForPoint3.
  1151. else
  1152. {
  1153. g_StudioRender.R_LightEffectsWorld3 = NULL;
  1154. }
  1155. #endif
  1156. #if defined( _WIN32 ) && !defined( _X360 )
  1157. // Precaches the data
  1158. _mm_prefetch( (char*)((int)pGroupToMesh & (~0x1F)), _MM_HINT_NTA );
  1159. #endif
  1160. for ( int i = 0; i < PREFETCH_VERT_COUNT; ++i )
  1161. {
  1162. ntemp[i] = pGroupToMesh[i];
  1163. #if defined( _WIN32 ) && !defined( _X360 )
  1164. char *pMem = (char*)&pVertices[ntemp[i]];
  1165. _mm_prefetch( pMem, _MM_HINT_NTA );
  1166. _mm_prefetch( pMem + 32, _MM_HINT_NTA );
  1167. if ( nHasTangentSpace )
  1168. {
  1169. _mm_prefetch( (char*)&pStudioTangentS[ntemp[i]], _MM_HINT_NTA );
  1170. }
  1171. #endif
  1172. }
  1173. int n, idx;
  1174. for ( int j=0; j < numVertices; ++j )
  1175. {
  1176. #if defined( _WIN32 ) && !defined( _X360 )
  1177. char *pMem = (char*)&pGroupToMesh[j + PREFETCH_VERT_COUNT + 1];
  1178. _mm_prefetch( (char*)((int)pMem & (~0x1F)), _MM_HINT_NTA );
  1179. #endif
  1180. idx = j & (PREFETCH_VERT_COUNT-1);
  1181. n = ntemp[idx];
  1182. mstudiovertex_t &vert = pVertices[n];
  1183. ntemp[idx] = pGroupToMesh[j + PREFETCH_VERT_COUNT];
  1184. // Compute the skinning matrix
  1185. pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp );
  1186. // transform into world space
  1187. if ( nDoFlex && vertexCache.IsVertexFlexed(n) )
  1188. {
  1189. CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex(n);
  1190. pSrcPos = &pFlexedVertex->m_Position.AsVector3D();
  1191. pSrcNorm = &pFlexedVertex->m_Normal.AsVector3D();
  1192. if ( nHasTangentSpace )
  1193. {
  1194. pSrcTangentS = &pFlexedVertex->m_TangentS;
  1195. Assert( pSrcTangentS->w == -1.0f || pSrcTangentS->w == 1.0f );
  1196. }
  1197. }
  1198. else
  1199. {
  1200. pSrcPos = &vert.m_vecPosition;
  1201. pSrcNorm = &vert.m_vecNormal;
  1202. if ( nHasTangentSpace )
  1203. {
  1204. pSrcTangentS = &pStudioTangentS[n];
  1205. Assert( pSrcTangentS->w == -1.0f || pSrcTangentS->w == 1.0f );
  1206. }
  1207. }
  1208. // Transform the vert into world space
  1209. R_TransformVert( pSrcPos, pSrcNorm, pSrcTangentS, pSkinMat,
  1210. *(VectorAligned*)&dstVertex.m_vecPosition, dstVertex.m_vecNormal, *(Vector4DAligned*)&dstVertex.m_vecUserData );
  1211. #if defined( _WIN32 ) && !defined( _X360 )
  1212. _mm_prefetch( (char*)&pVertices[ntemp[idx]], _MM_HINT_NTA);
  1213. _mm_prefetch( (char*)&pVertices[ntemp[idx]] + 32, _MM_HINT_NTA );
  1214. if ( nHasTangentSpace )
  1215. {
  1216. _mm_prefetch( (char*)&pStudioTangentS[ntemp[idx]], _MM_HINT_NTA );
  1217. }
  1218. #endif
  1219. dstVertex.m_vecTexCoord = vert.m_vecTexCoord;
  1220. #if !defined( _X360 )
  1221. Assert( dstVertex.m_vecUserData.w == -1.0f || dstVertex.m_vecUserData.w == 1.0f );
  1222. meshBuilder.FastVertexSSE( dstVertex );
  1223. #else
  1224. meshBuilder.VertexDX8ToX360( dstVertex );
  1225. #endif
  1226. }
  1227. meshBuilder.FastAdvanceNVertices( numVertices );
  1228. }
  1229. #ifdef SPECIAL_SSE_MESH_PROCESSOR
  1230. #ifdef VERIFY_SSE_LIGHTING
  1231. static int NotCloseEnough( float a, float b )
  1232. {
  1233. // check if 2 linear lighting values are close enough between the sse and non see lighting model
  1234. // no point being more precise than 1% since it all maps to 8 bit anyway
  1235. float thresh=0.1f*fabs( a );
  1236. if ( thresh < 0.1f )
  1237. thresh = 0.1f;
  1238. return ( fabs( a-b ) > thresh );
  1239. }
  1240. #endif
  1241. // this special version of the vertex processor does 4 vertices at once, so that they can be lit using SSE instructions. This provides
  1242. // a >2x speedup in the lit case
  1243. static void R_PerformVectorizedLightingSSE( const FourVectors &forward, fltx4 fIllum, ModelVertexDX8_t *dst, unsigned int nAlphaMask)
  1244. {
  1245. if ( nLighting == LIGHTING_SOFTWARE )
  1246. {
  1247. #ifdef VERIFY_SSE_LIGHTING
  1248. // if ( (g_StudioRender.m_NumLocalLights==1) &&
  1249. // ( (g_StudioRender.m_LocalLights[0].m_Type==MATERIAL_LIGHT_SPOT)))
  1250. // {
  1251. // // ihvtest doesn't use different exponents for its spots,
  1252. // // so i mess with the exponents when testing
  1253. // static int ctr=0;
  1254. // static float exps[8]={0,1,2,3,4,4.5,5.25,2.5};
  1255. // ctr=(ctr+1)&7;
  1256. // g_StudioRender.m_LocalLights[0].m_Falloff=exps[ctr];
  1257. // }
  1258. #endif
  1259. FourVectors Position;
  1260. Position.LoadAndSwizzleAligned(dst[0].m_vecPosition,dst[1].m_vecPosition,dst[2].m_vecPosition,dst[3].m_vecPosition);
  1261. FourVectors Normal(dst[0].m_vecNormal,dst[1].m_vecNormal,dst[2].m_vecNormal,dst[3].m_vecNormal);
  1262. FourVectors Color;
  1263. g_StudioRender.R_ComputeLightAtPoints3( Position, Normal, Color);
  1264. for (int i=0; i<4; i++)
  1265. {
  1266. Vector color;
  1267. #ifdef VERIFY_SSE_LIGHTING
  1268. // debug - check sse version against "real" version
  1269. g_StudioRender.R_ComputeLightAtPoint3( dst[i].m_vecPosition,dst[i].m_vecNormal, color );
  1270. if ( NotCloseEnough(color.x,Color.X(i)) ||
  1271. NotCloseEnough(color.y,Color.Y(i)) ||
  1272. NotCloseEnough(color.z,Color.Z(i)))
  1273. {
  1274. Assert(0);
  1275. // recompute so can step in debugger
  1276. g_StudioRender.R_ComputeLightAtPoints3( Position,Normal,Color);
  1277. g_StudioRender.R_ComputeLightAtPoint3( dst[i].m_vecPosition,dst[i].m_vecNormal, color );
  1278. }
  1279. #endif
  1280. unsigned char r = LinearToLightmap( Color.X(i) );
  1281. unsigned char g = LinearToLightmap( Color.Y(i) );
  1282. unsigned char b = LinearToLightmap( Color.Z(i) );
  1283. dst[i].m_nColor = b | (g << 8) | (r << 16) | nAlphaMask;
  1284. }
  1285. }
  1286. else if ( nLighting == LIGHTING_MOUTH )
  1287. {
  1288. FourVectors Position;
  1289. Position.LoadAndSwizzleAligned(dst[0].m_vecPosition,dst[1].m_vecPosition,dst[2].m_vecPosition,dst[3].m_vecPosition);
  1290. FourVectors Normal(dst[0].m_vecNormal,dst[1].m_vecNormal,dst[2].m_vecNormal,dst[3].m_vecNormal);
  1291. FourVectors Color;
  1292. g_StudioRender.R_ComputeLightAtPoints3( Position, Normal, Color);
  1293. g_StudioRender.R_MouthLighting( fIllum, Normal, forward, Color );
  1294. for (int i=0; i<4; i++)
  1295. {
  1296. unsigned char r = LinearToLightmap( Color.X(i) );
  1297. unsigned char g = LinearToLightmap( Color.Y(i) );
  1298. unsigned char b = LinearToLightmap( Color.Z(i) );
  1299. dst[i].m_nColor = b | (g << 8) | (r << 16) | nAlphaMask;
  1300. }
  1301. }
  1302. }
  1303. #endif // SPECIAL_SSE_MESH_PROCESSOR
  1304. };
  1305. //-----------------------------------------------------------------------------
  1306. // Draws the mesh using software vertex transformation
  1307. //-----------------------------------------------------------------------------
  1308. typedef CProcessMeshWrapper< false, false, LIGHTING_HARDWARE > ProcessMesh00H_t;
  1309. typedef CProcessMeshWrapper< false, false, LIGHTING_SOFTWARE > ProcessMesh00S_t;
  1310. typedef CProcessMeshWrapper< false, false, LIGHTING_MOUTH > ProcessMesh00M_t;
  1311. typedef CProcessMeshWrapper< false, true, LIGHTING_HARDWARE > ProcessMesh01H_t;
  1312. typedef CProcessMeshWrapper< false, true, LIGHTING_SOFTWARE > ProcessMesh01S_t;
  1313. typedef CProcessMeshWrapper< false, true, LIGHTING_MOUTH > ProcessMesh01M_t;
  1314. typedef CProcessMeshWrapper< true, false, LIGHTING_HARDWARE > ProcessMesh10H_t;
  1315. typedef CProcessMeshWrapper< true, false, LIGHTING_SOFTWARE > ProcessMesh10S_t;
  1316. typedef CProcessMeshWrapper< true, false, LIGHTING_MOUTH > ProcessMesh10M_t;
  1317. typedef CProcessMeshWrapper< true, true, LIGHTING_HARDWARE > ProcessMesh11H_t;
  1318. typedef CProcessMeshWrapper< true, true, LIGHTING_SOFTWARE > ProcessMesh11S_t;
  1319. typedef CProcessMeshWrapper< true, true, LIGHTING_MOUTH > ProcessMesh11M_t;
  1320. static SoftwareProcessMeshFunc_t g_SoftwareProcessMeshFunc[] =
  1321. {
  1322. ProcessMesh00H_t::R_StudioSoftwareProcessMesh,
  1323. ProcessMesh00S_t::R_StudioSoftwareProcessMesh,
  1324. ProcessMesh00M_t::R_StudioSoftwareProcessMesh,
  1325. ProcessMesh01H_t::R_StudioSoftwareProcessMesh,
  1326. ProcessMesh01S_t::R_StudioSoftwareProcessMesh,
  1327. ProcessMesh01M_t::R_StudioSoftwareProcessMesh,
  1328. ProcessMesh10H_t::R_StudioSoftwareProcessMesh,
  1329. ProcessMesh10S_t::R_StudioSoftwareProcessMesh,
  1330. ProcessMesh10M_t::R_StudioSoftwareProcessMesh,
  1331. ProcessMesh11H_t::R_StudioSoftwareProcessMesh,
  1332. ProcessMesh11S_t::R_StudioSoftwareProcessMesh,
  1333. ProcessMesh11M_t::R_StudioSoftwareProcessMesh,
  1334. };
  1335. inline const mstudio_meshvertexdata_t * GetFatVertexData( mstudiomesh_t * pMesh, studiohdr_t * pStudioHdr )
  1336. {
  1337. if ( !pMesh->pModel()->CacheVertexData( pStudioHdr ) )
  1338. {
  1339. // not available yet
  1340. return NULL;
  1341. }
  1342. const mstudio_meshvertexdata_t *pVertData = pMesh->GetVertexData( pStudioHdr );
  1343. Assert( pVertData );
  1344. if ( !pVertData )
  1345. {
  1346. static unsigned int warnCount = 0;
  1347. if ( warnCount++ < 20 )
  1348. Warning( "ERROR: model verts have been compressed or you don't have them in memory on a console, cannot render! (use \"-no_compressed_vvds\")" );
  1349. }
  1350. return pVertData;
  1351. }
  1352. void CStudioRender::R_StudioSoftwareProcessMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder,
  1353. int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend,
  1354. bool bNeedsTangentSpace, IMaterial *pMaterial )
  1355. {
  1356. unsigned int nAlphaMask = RoundFloatToInt( r_blend * 255.0f );
  1357. nAlphaMask = clamp( nAlphaMask, (uint)0, (uint)255 );
  1358. nAlphaMask <<= 24;
  1359. // FIXME: Use function pointers to simplify this?!?
  1360. int idx = bNeedsTangentSpace * 6 + doFlex * 3 + lighting;
  1361. const mstudio_meshvertexdata_t *pVertData = GetFatVertexData( pmesh, m_pStudioHdr );
  1362. if ( pVertData )
  1363. {
  1364. // invoke the software mesh processing handler
  1365. g_SoftwareProcessMeshFunc[idx]( pVertData, m_PoseToWorld, m_VertexCache, meshBuilder, numVertices, pGroupToMesh, nAlphaMask, pMaterial );
  1366. }
  1367. }
  1368. static void R_SlowTransformVert( const Vector *pSrcPos, const Vector *pSrcNorm,
  1369. matrix3x4_t *pSkinMat, VectorAligned &pos, VectorAligned &norm )
  1370. {
  1371. pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3];
  1372. norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2];
  1373. pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3];
  1374. norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2];
  1375. pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3];
  1376. norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2];
  1377. }
  1378. static void R_SlowTransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, const Vector4D *pSrcTangentS,
  1379. matrix3x4_t *pSkinMat, VectorAligned &pos, VectorAligned &norm, VectorAligned &tangentS )
  1380. {
  1381. pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3];
  1382. norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2];
  1383. tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2];
  1384. pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3];
  1385. norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2];
  1386. tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2];
  1387. pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3];
  1388. norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2];
  1389. tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2];
  1390. }
  1391. void CStudioRender::R_StudioSoftwareProcessMesh_NormalsBatched(IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshgroup_t* pGroup,
  1392. StudioModelLighting_t lighting, bool doFlex, float r_blend, bool bShowNormals, bool bShowTangent )
  1393. {
  1394. //Batch up and render normals and tangents so that we don't blow the maximum vertex buffer size.
  1395. CMeshBuilder meshBuilder;
  1396. IMesh* pMesh = pRenderContext->GetDynamicMesh( false );
  1397. int nMaxVertices, nMaxIndices;
  1398. pRenderContext->GetMaxToRender( pMesh, false, &nMaxVertices, &nMaxIndices );
  1399. int numPrimativesPerVertex = ( (bShowNormals ? 1 : 0) + (bShowTangent ? 2 : 0) );
  1400. if ( numPrimativesPerVertex == 0 )
  1401. {
  1402. return;
  1403. }
  1404. int numLineSegVertsPerVertex = 2 * numPrimativesPerVertex;
  1405. const int maxVertsPerPass = nMaxVertices / numLineSegVertsPerVertex;
  1406. int startVertex = 0;
  1407. int numVertsLeftToDraw = pGroup->m_NumVertices;
  1408. while ( numVertsLeftToDraw > 0 )
  1409. {
  1410. int numVertsInThisPass = numVertsLeftToDraw;
  1411. if ( numVertsInThisPass > maxVertsPerPass )
  1412. {
  1413. numVertsInThisPass = maxVertsPerPass;
  1414. }
  1415. meshBuilder.Begin( pMesh, MATERIAL_LINES, numVertsInThisPass * numPrimativesPerVertex );
  1416. R_StudioSoftwareProcessMesh_Normals( pmesh, meshBuilder, startVertex, numVertsInThisPass, pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, bShowNormals, bShowTangent, bShowTangent );
  1417. meshBuilder.End( );
  1418. pMesh->Draw();
  1419. numVertsLeftToDraw -= numVertsInThisPass;
  1420. startVertex += numVertsInThisPass;
  1421. }
  1422. }
  1423. void CStudioRender::R_StudioSoftwareProcessMesh_Normals( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, int startVertex,
  1424. int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend,
  1425. bool bShowNormals, bool bShowTangentS, bool bShowTangentT )
  1426. {
  1427. ALIGN16 matrix3x4_t temp;
  1428. ALIGN16 matrix3x4_t *pSkinMat;
  1429. Vector *pSrcPos = NULL;
  1430. Vector *pSrcNorm = NULL;
  1431. Vector4D *pSrcTangentS = NULL;
  1432. VectorAligned norm, pos, tangentS, tangentT;
  1433. // Gets at the vertex data
  1434. const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr );
  1435. if ( !vertData )
  1436. {
  1437. // not available
  1438. return;
  1439. }
  1440. // Don't even try to show tangent data if we don't have any
  1441. if ( !vertData->HasTangentData() )
  1442. {
  1443. bShowTangentS = bShowTangentT = false;
  1444. }
  1445. mstudiovertex_t *pVertices = vertData->Vertex( 0 );
  1446. Vector4D *pTangentS = NULL;
  1447. Vector4D tang;
  1448. if ( bShowTangentS || bShowTangentT )
  1449. {
  1450. pTangentS = vertData->TangentS( 0 );
  1451. }
  1452. for ( int j=startVertex; j < startVertex + numVertices; j++ )
  1453. {
  1454. int n = pGroupToMesh[j];
  1455. mstudiovertex_t &vert = pVertices[n];
  1456. if ( bShowTangentS || bShowTangentT )
  1457. {
  1458. tang = pTangentS[n];
  1459. }
  1460. pSkinMat = ComputeSkinMatrix( vert.m_BoneWeights, m_PoseToWorld, temp );
  1461. // transform into world space
  1462. if ( m_VertexCache.IsVertexFlexed(n) )
  1463. {
  1464. CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n);
  1465. pSrcPos = &pFlexedVertex->m_Position.AsVector3D();
  1466. pSrcNorm = &pFlexedVertex->m_Normal.AsVector3D();
  1467. if ( bShowTangentS || bShowTangentT )
  1468. {
  1469. pSrcTangentS = &pFlexedVertex->m_TangentS;
  1470. }
  1471. }
  1472. else
  1473. {
  1474. pSrcPos = &vert.m_vecPosition;
  1475. pSrcNorm = &vert.m_vecNormal;
  1476. if ( bShowTangentS || bShowTangentT )
  1477. {
  1478. pSrcTangentS = &tang;
  1479. }
  1480. }
  1481. // Transform the vert into world space
  1482. if ( ( bShowTangentS || bShowTangentT ) && ( pSrcTangentS != NULL ) )
  1483. {
  1484. R_SlowTransformVert( pSrcPos, pSrcNorm, pSrcTangentS, pSkinMat, pos, norm, tangentS );
  1485. }
  1486. else
  1487. {
  1488. R_SlowTransformVert( pSrcPos, pSrcNorm, pSkinMat, pos, norm );
  1489. }
  1490. if ( bShowNormals )
  1491. {
  1492. meshBuilder.Position3fv( pos.Base() );
  1493. meshBuilder.Color3f( 0.0f, 0.0f, 1.0f );
  1494. meshBuilder.AdvanceVertex();
  1495. Vector normalPos;
  1496. normalPos = pos + norm * 0.5f;
  1497. meshBuilder.Position3fv( normalPos.Base() );
  1498. meshBuilder.Color3f( 0.0f, 0.0f, 1.0f );
  1499. meshBuilder.AdvanceVertex();
  1500. }
  1501. if ( ( bShowTangentS || bShowTangentT ) && ( pSrcTangentS != NULL) )
  1502. {
  1503. if ( bShowTangentS )
  1504. {
  1505. meshBuilder.Position3fv( pos.Base() );
  1506. meshBuilder.Color3f( 1.0f, 0.0f, 0.0f );
  1507. meshBuilder.AdvanceVertex();
  1508. Vector vTangentSPos;
  1509. vTangentSPos = pos + tangentS * 0.5f;
  1510. meshBuilder.Position3fv( vTangentSPos.Base() );
  1511. meshBuilder.Color3f( 1.0f, 0.0f, 0.0f );
  1512. meshBuilder.AdvanceVertex();
  1513. }
  1514. if ( bShowTangentT )
  1515. {
  1516. meshBuilder.Position3fv( pos.Base() );
  1517. meshBuilder.Color3f( 0.0f, 1.0f, 0.0f );
  1518. meshBuilder.AdvanceVertex();
  1519. // Compute tangentT from normal and tangentS
  1520. CrossProduct( norm, tangentS, tangentT );
  1521. Vector vTangentTPos;
  1522. vTangentTPos = pos + tangentT * 0.5f;
  1523. meshBuilder.Position3fv( vTangentTPos.Base() );
  1524. meshBuilder.Color3f( 0.0f, 1.0f, 0.0f );
  1525. meshBuilder.AdvanceVertex();
  1526. }
  1527. } // end tacking on tangentS and tangetT line segments
  1528. }
  1529. }
  1530. #pragma warning (default:4701)
  1531. static int r_studioProcess_maxVerts = 100;
  1532. //DLL_IMPORT CLinkedMiniProfiler *g_pOtherMiniProfilers;
  1533. //CLinkedMiniProfiler g_mp_flexV("flexV", &g_pOtherMiniProfilers);
  1534. //CLinkedMiniProfiler g_mp_flexW("flexW", &g_pOtherMiniProfilers);
  1535. void CStudioRender::R_StudioProcessFlexedMesh_StreamOffset( mstudiomesh_t* pmesh, int lod )
  1536. {
  1537. VPROF_BUDGET( "ProcessFlexedMesh_SO", _T("HW_Morphing") );
  1538. if ( m_VertexCache.IsFlexComputationDone() )
  1539. return;
  1540. int vertCount = pmesh->vertexdata.numLODVertexes[lod];
  1541. m_VertexCache.SetupComputation( pmesh, true );
  1542. mstudioflex_t *pflex = pmesh->pFlex( 0 );
  1543. for (int i = 0; i < pmesh->numflexes; i++)
  1544. {
  1545. float w1 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexdesc ] );
  1546. float w2 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexdesc ] );
  1547. float w3, w4;
  1548. if ( pflex[i].flexpair != 0)
  1549. {
  1550. w3 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexpair ] );
  1551. w4 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexpair ] );
  1552. }
  1553. else
  1554. {
  1555. w3 = w1;
  1556. w4 = w2;
  1557. }
  1558. // Move on if the weights for this flex are sufficiently small
  1559. if (w1 > -0.001 && w1 < 0.001 && w2 > -0.001 && w2 < 0.001)
  1560. {
  1561. if (w3 > -0.001 && w3 < 0.001 && w4 > -0.001 && w4 < 0.001)
  1562. {
  1563. continue;
  1564. }
  1565. }
  1566. if ( pflex[i].vertanimtype == STUDIO_VERT_ANIM_NORMAL )
  1567. {
  1568. //CMiniProfilerGuard mpguard(&g_mp_flexV,pflex[i].numverts);
  1569. // the most likely path
  1570. mstudiovertanim_t *pvanim = pflex[i].pVertanim( 0 );
  1571. #if defined(TEST_DUMP_BIG_FLEXES)
  1572. if(pflex[i].numverts > r_studioProcess_maxVerts)
  1573. {
  1574. int numVerts = pflex[i].numverts;
  1575. r_studioProcess_maxVerts = numVerts;
  1576. char szFileName[64];
  1577. sprintf(szFileName, "d:\\BigFlex%u.gl", r_studioProcess_maxVerts);
  1578. FileHandle_t fh = g_pFullFileSystem->Open(szFileName, "wt");
  1579. const mstudio_meshvertexdata_t * pVertData = pmesh->GetVertexData(m_pStudioHdr);
  1580. g_pFullFileSystem->FPrintf(fh, "// %d vertices, here goes:\n", numVerts);
  1581. for(int i = 0; i < numVerts; ++i)
  1582. {
  1583. int vertIndex = pvanim[i].index;
  1584. Vector pos = *pVertData->Position(vertIndex);
  1585. Vector flexPos = pos + pvanim[i].GetDeltaFloat() + *(pVertData->Normal(vertIndex)) * 0.1f;
  1586. g_pFullFileSystem->FPrintf(
  1587. fh,
  1588. "2\n%g %g %g 1 1 1\n%g %g %g 1 1 0\n",
  1589. pos.x,pos.y,pos.z,
  1590. flexPos.x, flexPos.y, flexPos.z
  1591. );
  1592. }
  1593. g_pFullFileSystem->Close(fh);
  1594. }
  1595. #endif
  1596. m_VertexCache.ComputeFlexedVertex_StreamOffset_Optimized( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 );
  1597. }
  1598. else
  1599. {
  1600. //CMiniProfilerGuard mpguard(&g_mp_flexW,pflex[i].numverts);
  1601. mstudiovertanim_wrinkle_t *pvanim = pflex[i].pVertanimWrinkle( 0 );
  1602. m_VertexCache.ComputeFlexedVertexWrinkle_StreamOffset_Optimized( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 );
  1603. }
  1604. }
  1605. }
  1606. //-----------------------------------------------------------------------------
  1607. // Purpose:
  1608. //
  1609. // ** Only execute this function if device supports stream offset **
  1610. //
  1611. // Input : pGroup - pointer to a studio mesh group
  1612. // Output : none
  1613. //-----------------------------------------------------------------------------
  1614. void CStudioRender::R_StudioFlexMeshGroup( studiomeshgroup_t *pGroup )
  1615. {
  1616. VPROF_BUDGET( "R_StudioFlexMeshGroup", VPROF_BUDGETGROUP_MODEL_RENDERING );
  1617. CMeshBuilder meshBuilder;
  1618. int nVertexOffsetInBytes = 0;
  1619. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  1620. IMesh *pMesh = pRenderContext->GetFlexMesh();
  1621. meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0, &nVertexOffsetInBytes );
  1622. // Just pos and norm deltas (tangents use same deltas as normals)
  1623. for ( int j=0; j < pGroup->m_NumVertices; j++)
  1624. {
  1625. int n = pGroup->m_pGroupIndexToMeshIndex[j];
  1626. if ( m_VertexCache.IsThinVertexFlexed(n) )
  1627. {
  1628. CachedPosNorm_t *pIn = m_VertexCache.GetThinFlexVertex(n);
  1629. meshBuilder.Position3fv( pIn->m_Position.Base() );
  1630. meshBuilder.NormalDelta3fv( pIn->m_Normal.Base() );
  1631. meshBuilder.Wrinkle1f( pIn->m_Position.w );
  1632. }
  1633. else
  1634. {
  1635. meshBuilder.Position3f( 0.0f, 0.0f, 0.0f );
  1636. meshBuilder.NormalDelta3f( 0.0f, 0.0f, 0.0f );
  1637. meshBuilder.Wrinkle1f( 0.0f );
  1638. }
  1639. meshBuilder.AdvanceVertexF<VTX_HAVEPOS | VTX_HAVENORMAL, 0>();
  1640. }
  1641. meshBuilder.End( false, false );
  1642. pGroup->m_pMesh->SetFlexMesh( pMesh, nVertexOffsetInBytes );
  1643. }
  1644. //-----------------------------------------------------------------------------
  1645. // Processes a flexed mesh to be hw skinned
  1646. //-----------------------------------------------------------------------------
  1647. void CStudioRender::R_StudioProcessFlexedMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder,
  1648. int numVertices, unsigned short* pGroupToMesh )
  1649. {
  1650. PROFILE_STUDIO("FlexMeshBuilder");
  1651. Vector4D *pStudioTangentS;
  1652. // get the vertex data
  1653. const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr );
  1654. if ( !vertData )
  1655. {
  1656. // not available
  1657. return;
  1658. }
  1659. mstudiovertex_t *pVertices = vertData->Vertex( 0 );
  1660. if ( vertData->HasTangentData() )
  1661. {
  1662. pStudioTangentS = vertData->TangentS( 0 );
  1663. Assert( pStudioTangentS->w == -1.0f || pStudioTangentS->w == 1.0f );
  1664. for ( int j=0; j < numVertices ; j++)
  1665. {
  1666. int n = pGroupToMesh[j];
  1667. mstudiovertex_t &vert = pVertices[n];
  1668. // FIXME: For now, flexed hw-skinned meshes can only have one bone
  1669. // The data must exist in the 0th hardware matrix
  1670. // Here, we are doing HW skinning, so we need to simply copy over the flex
  1671. if ( m_VertexCache.IsVertexFlexed(n) )
  1672. {
  1673. CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n);
  1674. meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() );
  1675. meshBuilder.BoneWeight( 0, 1.0f );
  1676. meshBuilder.BoneWeight( 1, 0.0f );
  1677. meshBuilder.BoneWeight( 2, 0.0f );
  1678. meshBuilder.BoneWeight( 3, 0.0f );
  1679. meshBuilder.BoneMatrix( 0, 0 );
  1680. meshBuilder.BoneMatrix( 1, 0 );
  1681. meshBuilder.BoneMatrix( 2, 0 );
  1682. meshBuilder.BoneMatrix( 3, 0 );
  1683. meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() );
  1684. meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() );
  1685. Assert( pFlexedVertex->m_TangentS.w == -1.0f || pFlexedVertex->m_TangentS.w == 1.0f );
  1686. meshBuilder.UserData( pFlexedVertex->m_TangentS.Base() );
  1687. }
  1688. else
  1689. {
  1690. meshBuilder.Position3fv( vert.m_vecPosition.Base() );
  1691. meshBuilder.BoneWeight( 0, 1.0f );
  1692. meshBuilder.BoneWeight( 1, 0.0f );
  1693. meshBuilder.BoneWeight( 2, 0.0f );
  1694. meshBuilder.BoneWeight( 3, 0.0f );
  1695. meshBuilder.BoneMatrix( 0, 0 );
  1696. meshBuilder.BoneMatrix( 1, 0 );
  1697. meshBuilder.BoneMatrix( 2, 0 );
  1698. meshBuilder.BoneMatrix( 3, 0 );
  1699. meshBuilder.Normal3fv( vert.m_vecNormal.Base() );
  1700. meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() );
  1701. Assert( pStudioTangentS[n].w == -1.0f || pStudioTangentS[n].w == 1.0f );
  1702. meshBuilder.UserData( pStudioTangentS[n].Base() );
  1703. }
  1704. meshBuilder.AdvanceVertexF<VTX_HAVEPOS | VTX_HAVENORMAL, 1>();
  1705. }
  1706. }
  1707. else
  1708. {
  1709. // no TangentS, replicated code to save inner conditional
  1710. for ( int j=0; j < numVertices ; j++)
  1711. {
  1712. int n = pGroupToMesh[j];
  1713. mstudiovertex_t &vert = pVertices[n];
  1714. // FIXME: For now, flexed hw-skinned meshes can only have one bone
  1715. // The data must exist in the 0th hardware matrix
  1716. // Here, we are doing HW skinning, so we need to simply copy over the flex
  1717. if ( m_VertexCache.IsVertexFlexed(n) )
  1718. {
  1719. CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n);
  1720. meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() );
  1721. meshBuilder.BoneWeight( 0, 1.0f );
  1722. meshBuilder.BoneWeight( 1, 0.0f );
  1723. meshBuilder.BoneWeight( 2, 0.0f );
  1724. meshBuilder.BoneWeight( 3, 0.0f );
  1725. meshBuilder.BoneMatrix( 0, 0 );
  1726. meshBuilder.BoneMatrix( 1, 0 );
  1727. meshBuilder.BoneMatrix( 2, 0 );
  1728. meshBuilder.BoneMatrix( 3, 0 );
  1729. meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() );
  1730. }
  1731. else
  1732. {
  1733. meshBuilder.Position3fv( vert.m_vecPosition.Base() );
  1734. meshBuilder.BoneWeight( 0, 1.0f );
  1735. meshBuilder.BoneWeight( 1, 0.0f );
  1736. meshBuilder.BoneWeight( 2, 0.0f );
  1737. meshBuilder.BoneWeight( 3, 0.0f );
  1738. meshBuilder.BoneMatrix( 0, 0 );
  1739. meshBuilder.BoneMatrix( 1, 0 );
  1740. meshBuilder.BoneMatrix( 2, 0 );
  1741. meshBuilder.BoneMatrix( 3, 0 );
  1742. meshBuilder.Normal3fv( vert.m_vecNormal.Base() );
  1743. }
  1744. meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() );
  1745. meshBuilder.AdvanceVertexF<VTX_HAVEPOS | VTX_HAVENORMAL, 1>();
  1746. }
  1747. }
  1748. }
  1749. //-----------------------------------------------------------------------------
  1750. // Restores the static mesh
  1751. //-----------------------------------------------------------------------------
  1752. template<VertexCompressionType_t T> void CStudioRender::R_StudioRestoreMesh( mstudiomesh_t* pmesh, studiomeshgroup_t* pMeshData )
  1753. {
  1754. #ifdef IS_WINDOWS_PC
  1755. Vector4D *pStudioTangentS;
  1756. // get at the vertex data
  1757. const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr );
  1758. if ( !vertData )
  1759. {
  1760. // not available
  1761. return;
  1762. }
  1763. mstudiovertex_t *pVertices = vertData->Vertex( 0 );
  1764. if (vertData->HasTangentData())
  1765. {
  1766. pStudioTangentS = vertData->TangentS( 0 );
  1767. }
  1768. else
  1769. {
  1770. pStudioTangentS = NULL;
  1771. }
  1772. CMeshBuilder meshBuilder;
  1773. meshBuilder.BeginModify( pMeshData->m_pMesh );
  1774. meshBuilder.SetCompressionType( T );
  1775. for ( int j=0; j < meshBuilder.VertexCount() ; j++)
  1776. {
  1777. meshBuilder.SelectVertex(j);
  1778. int n = pMeshData->m_pGroupIndexToMeshIndex[j];
  1779. mstudiovertex_t &vert = pVertices[n];
  1780. meshBuilder.Position3fv( vert.m_vecPosition.Base() );
  1781. meshBuilder.CompressedNormal3fv<T>( vert.m_vecNormal.Base() );
  1782. meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() );
  1783. if (pStudioTangentS)
  1784. {
  1785. Assert( pStudioTangentS[n].w == -1.0f || pStudioTangentS[n].w == 1.0f );
  1786. meshBuilder.CompressedUserData<T>( pStudioTangentS[n].Base() );
  1787. }
  1788. meshBuilder.Color4ub( 255, 255, 255, 255 );
  1789. }
  1790. meshBuilder.EndModify();
  1791. #endif
  1792. }
  1793. //-----------------------------------------------------------------------------
  1794. // Draws a mesh using hardware + software skinning
  1795. //-----------------------------------------------------------------------------
  1796. int CStudioRender::R_StudioDrawGroupHWSkin( IMatRenderContext *pRenderContext, studiomeshgroup_t* pGroup, IMesh* pMesh, ColorMeshInfo_t * pColorMeshInfo )
  1797. {
  1798. PROFILE_STUDIO("HwSkin");
  1799. int numFacesRendered = 0;
  1800. #if PIX_ENABLE
  1801. char szPIXEventName[128];
  1802. sprintf( szPIXEventName, "R_StudioDrawGroupHWSkin (%s)", m_pStudioHdr->name ); // PIX
  1803. PIXEVENT( pRenderContext, szPIXEventName );
  1804. #endif
  1805. if ( m_pStudioHdr->numbones == 1 )
  1806. {
  1807. pRenderContext->MatrixMode( MATERIAL_MODEL );
  1808. pRenderContext->LoadMatrix( m_PoseToWorld[0] );
  1809. // a single bone means all verts rigidly assigned
  1810. // any bonestatechange would needlessly re-load the same matrix
  1811. // xbox can skip further hw skinning, seems ok for pc too
  1812. pRenderContext->SetNumBoneWeights( 0 );
  1813. }
  1814. if ( pColorMeshInfo )
  1815. pMesh->SetColorMesh( pColorMeshInfo->m_pMesh, pColorMeshInfo->m_nVertOffsetInBytes );
  1816. else
  1817. pMesh->SetColorMesh( NULL, 0 );
  1818. Vector4D vecDiffuseModulation;
  1819. ComputeDiffuseModulation( &vecDiffuseModulation );
  1820. for (int j = 0; j < pGroup->m_NumStrips; ++j)
  1821. {
  1822. OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j];
  1823. if ( m_pStudioHdr->numbones > 1 )
  1824. {
  1825. // Reset bone state if we're hardware skinning
  1826. pRenderContext->SetNumBoneWeights( pStrip->numBones );
  1827. for (int k = 0; k < pStrip->numBoneStateChanges; ++k)
  1828. {
  1829. OptimizedModel::BoneStateChangeHeader_t* pStateChange = pStrip->pBoneStateChange(k);
  1830. if ( pStateChange->newBoneID < 0 )
  1831. break;
  1832. pRenderContext->LoadBoneMatrix( pStateChange->hardwareID, m_PoseToWorld[pStateChange->newBoneID] );
  1833. }
  1834. }
  1835. pMesh->SetPrimitiveType( GetPrimitiveTypeForStripHeaderFlags( pStrip->flags ) );
  1836. pMesh->DrawModulated( vecDiffuseModulation, pStrip->indexOffset, pStrip->numIndices );
  1837. numFacesRendered += pGroup->m_pUniqueFaces[j];
  1838. }
  1839. pMesh->SetColorMesh( NULL, 0 );
  1840. return numFacesRendered;
  1841. }
  1842. int CStudioRender::R_StudioDrawGroupSWSkin( studiomeshgroup_t* pGroup, IMesh* pMesh )
  1843. {
  1844. int numFacesRendered = 0;
  1845. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  1846. // Disable skinning
  1847. pRenderContext->SetNumBoneWeights( 0 );
  1848. Vector4D vecDiffuseModulation;
  1849. ComputeDiffuseModulation( &vecDiffuseModulation );
  1850. for (int j = 0; j < pGroup->m_NumStrips; ++j)
  1851. {
  1852. OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j];
  1853. // Choose our primitive type
  1854. pMesh->SetPrimitiveType( GetPrimitiveTypeForStripHeaderFlags( pStrip->flags ) );
  1855. pMesh->DrawModulated( vecDiffuseModulation, pStrip->indexOffset, pStrip->numIndices );
  1856. numFacesRendered += pGroup->m_pUniqueFaces[j];
  1857. }
  1858. return numFacesRendered;
  1859. }
  1860. //-----------------------------------------------------------------------------
  1861. // Sets up the hw flex mesh
  1862. //-----------------------------------------------------------------------------
  1863. void CStudioRender::ComputeFlexWeights( int nFlexCount, mstudioflex_t *pFlex, MorphWeight_t *pWeights )
  1864. {
  1865. for ( int i = 0; i < nFlexCount; ++i, ++pFlex )
  1866. {
  1867. MorphWeight_t &weight = pWeights[i];
  1868. weight.m_pWeight[MORPH_WEIGHT] = RampFlexWeight( *pFlex, m_pFlexWeights[ pFlex->flexdesc ] );
  1869. weight.m_pWeight[MORPH_WEIGHT_LAGGED] = RampFlexWeight( *pFlex, m_pFlexDelayedWeights[ pFlex->flexdesc ] );
  1870. if ( pFlex->flexpair != 0 )
  1871. {
  1872. weight.m_pWeight[MORPH_WEIGHT_STEREO] = RampFlexWeight( *pFlex, m_pFlexWeights[ pFlex->flexpair ] );
  1873. weight.m_pWeight[MORPH_WEIGHT_STEREO_LAGGED] = RampFlexWeight( *pFlex, m_pFlexDelayedWeights[ pFlex->flexpair ] );
  1874. }
  1875. else
  1876. {
  1877. weight.m_pWeight[MORPH_WEIGHT_STEREO] = weight.m_pWeight[MORPH_WEIGHT];
  1878. weight.m_pWeight[MORPH_WEIGHT_STEREO_LAGGED] = weight.m_pWeight[MORPH_WEIGHT_LAGGED];
  1879. }
  1880. }
  1881. }
  1882. //-----------------------------------------------------------------------------
  1883. // Draws the mesh as tristrips using hardware
  1884. //-----------------------------------------------------------------------------
  1885. int CStudioRender::R_StudioDrawStaticMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh,
  1886. studiomeshgroup_t* pGroup, StudioModelLighting_t lighting,
  1887. float r_blend, IMaterial* pMaterial, int lod, ColorMeshInfo_t *pColorMeshes )
  1888. {
  1889. MatSysQueueMark( g_pMaterialSystem, "R_StudioDrawStaticMesh\n" );
  1890. VPROF( "R_StudioDrawStaticMesh" );
  1891. int numFacesRendered = 0;
  1892. bool bDoSoftwareLighting = !pColorMeshes &&
  1893. ((m_pRC->m_Config.bSoftwareSkin != 0) || m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame ||
  1894. (pMaterial ? pMaterial->NeedsSoftwareSkinning() : false) ||
  1895. (m_pRC->m_Config.bSoftwareLighting != 0) ||
  1896. ((lighting != LIGHTING_HARDWARE) && (lighting != LIGHTING_MOUTH) ));
  1897. // software lighting case
  1898. if ( bDoSoftwareLighting )
  1899. {
  1900. if ( m_pRC->m_Config.bNoSoftware )
  1901. return 0;
  1902. bool bTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false;
  1903. pRenderContext->MatrixMode( MATERIAL_MODEL );
  1904. pRenderContext->LoadIdentity();
  1905. // Hardcode the vertex format to a well-known format to make sw skin code faster
  1906. VertexFormat_t fmt = VERTEX_FORMAT_STANDARD;
  1907. Assert( ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) == 0 );
  1908. CMeshBuilder meshBuilder;
  1909. IMesh* pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh );
  1910. meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 );
  1911. R_StudioSoftwareProcessMesh( pmesh, meshBuilder,
  1912. pGroup->m_NumVertices, pGroup->m_pGroupIndexToMeshIndex,
  1913. lighting, false, r_blend, bTangentSpace, pMaterial );
  1914. meshBuilder.End();
  1915. numFacesRendered = R_StudioDrawGroupSWSkin( pGroup, pMesh );
  1916. MatSysQueueMark( g_pMaterialSystem, "END R_StudioDrawStaticMesh\n" );
  1917. return numFacesRendered;
  1918. }
  1919. // Needed when we switch back and forth between hardware + software lighting
  1920. #ifdef IS_WINDOWS_PC
  1921. if ( IsPC() && pGroup->m_MeshNeedsRestore )
  1922. {
  1923. VertexCompressionType_t compressionType = CompressionType( pGroup->m_pMesh->GetVertexFormat() );
  1924. switch ( compressionType )
  1925. {
  1926. case VERTEX_COMPRESSION_ON:
  1927. R_StudioRestoreMesh<VERTEX_COMPRESSION_ON>( pmesh, pGroup );
  1928. break;
  1929. case VERTEX_COMPRESSION_NONE:
  1930. default:
  1931. R_StudioRestoreMesh<VERTEX_COMPRESSION_NONE>( pmesh, pGroup );
  1932. break;
  1933. }
  1934. pGroup->m_MeshNeedsRestore = false;
  1935. }
  1936. #endif
  1937. // Build separate flex stream containing deltas, which will get copied into another vertex stream
  1938. bool bUseHWFlex = m_pRC->m_Config.m_bEnableHWMorph && pGroup->m_pMorph && !m_bDrawTranslucentSubModels;
  1939. bool bUseSOFlex = g_pMaterialSystemHardwareConfig->SupportsStreamOffset() && !bUseHWFlex;
  1940. if ( (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) && m_pRC->m_Config.bFlex )
  1941. {
  1942. PIXEVENT( pRenderContext, "Delta Flex Processing" );
  1943. if ( bUseHWFlex )
  1944. {
  1945. pRenderContext->BindMorph( pGroup->m_pMorph );
  1946. }
  1947. if ( bUseSOFlex )
  1948. {
  1949. R_StudioProcessFlexedMesh_StreamOffset( pmesh, lod );
  1950. R_StudioFlexMeshGroup( pGroup );
  1951. }
  1952. }
  1953. // Draw it baby
  1954. if ( pColorMeshes && ( pGroup->m_ColorMeshID != -1 ) )
  1955. {
  1956. // draw using specified color mesh
  1957. numFacesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pGroup->m_pMesh, &(pColorMeshes[pGroup->m_ColorMeshID]) );
  1958. }
  1959. else
  1960. {
  1961. numFacesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pGroup->m_pMesh, NULL );
  1962. }
  1963. if ( ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) && m_pRC->m_Config.bFlex )
  1964. {
  1965. if ( bUseHWFlex )
  1966. {
  1967. pRenderContext->BindMorph( NULL );
  1968. }
  1969. if ( bUseSOFlex )
  1970. {
  1971. pGroup->m_pMesh->DisableFlexMesh(); // clear flex stream
  1972. }
  1973. }
  1974. MatSysQueueMark( g_pMaterialSystem, "END2 R_StudioDrawStaticMesh\n" );
  1975. return numFacesRendered;
  1976. }
  1977. //-----------------------------------------------------------------------------
  1978. // Draws a dynamic mesh
  1979. //-----------------------------------------------------------------------------
  1980. int CStudioRender::R_StudioDrawDynamicMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh,
  1981. studiomeshgroup_t* pGroup, StudioModelLighting_t lighting,
  1982. float r_blend, IMaterial* pMaterial, int lod )
  1983. {
  1984. VPROF( "R_StudioDrawDynamicMesh" );
  1985. bool bDoFlex = ((pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) != 0) && m_pRC->m_Config.bFlex;
  1986. bool bQuadList = ( pGroup->m_pStripData[0].flags & OptimizedModel::STRIP_IS_QUADLIST_EXTRA ) ||
  1987. ( pGroup->m_pStripData[0].flags & OptimizedModel::STRIP_IS_QUADLIST_REG ) != 0;
  1988. bool bDoSoftwareLighting = (m_pRC->m_Config.bSoftwareLighting != 0) ||
  1989. ((lighting != LIGHTING_HARDWARE) && (lighting != LIGHTING_MOUTH) );
  1990. bool bSWSkin = bDoSoftwareLighting || m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame ||
  1991. ((pGroup->m_Flags & MESHGROUP_IS_HWSKINNED) == 0) ||
  1992. m_pRC->m_Config.bSoftwareSkin || bQuadList ||
  1993. ( pMaterial ? pMaterial->NeedsSoftwareSkinning() : false );
  1994. if ( !bDoFlex && !bSWSkin )
  1995. {
  1996. return R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, r_blend, pMaterial, lod, NULL );
  1997. }
  1998. // ---- Drawers before this might not need the vertices, so don't pay the penalty of getting them ----
  1999. // -------- Everybody else past this point (flex and/or sw skinning) expects to read vertices --------
  2000. const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr );
  2001. if ( !vertData )
  2002. {
  2003. return 0; // not available
  2004. }
  2005. MatSysQueueMark( g_pMaterialSystem, "R_StudioDrawDynamicMesh\n" );
  2006. int numFacesRendered = 0;
  2007. #ifdef _DEBUG
  2008. const char *pDebugMaterialName = NULL;
  2009. if ( pMaterial )
  2010. {
  2011. pDebugMaterialName = pMaterial->GetName();
  2012. }
  2013. #endif
  2014. pRenderContext->MatrixMode( MATERIAL_MODEL );
  2015. pRenderContext->LoadIdentity();
  2016. // Software flex verts (not a delta stream)
  2017. if ( bDoFlex )
  2018. {
  2019. R_StudioFlexVerts( pmesh, lod, bQuadList );
  2020. }
  2021. // Map quad mesh to Bicubic Bezier Patches
  2022. if ( bQuadList )
  2023. {
  2024. GenerateBicubicPatches( pmesh, pGroup, bDoFlex );
  2025. }
  2026. IMesh* pMesh;
  2027. bool bTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false;
  2028. VertexFormat_t fmt = bQuadList ? VERTEX_FORMAT_SUBDQUAD : VERTEX_FORMAT_STANDARD;
  2029. CMeshBuilder meshBuilder;
  2030. pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh);
  2031. if ( bQuadList )
  2032. {
  2033. int TotalFaces = 0;
  2034. for ( int s=0; s<pGroup->m_NumStrips; ++s )
  2035. {
  2036. TotalFaces += pGroup->m_pUniqueFaces[s];
  2037. }
  2038. // We're de-indexing the quad mesh, so we need to multiply the number of vertices by 4 here
  2039. meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, TotalFaces * 4, 0 );
  2040. SoftwareProcessQuadMesh( pmesh, meshBuilder, TotalFaces,
  2041. pGroup->m_pGroupIndexToMeshIndex,
  2042. pGroup->m_pTopologyIndices, bTangentSpace, bDoFlex );
  2043. }
  2044. else if ( bSWSkin )
  2045. {
  2046. meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 );
  2047. R_StudioSoftwareProcessMesh( pmesh, meshBuilder, pGroup->m_NumVertices,
  2048. pGroup->m_pGroupIndexToMeshIndex, lighting, bDoFlex,
  2049. r_blend, bTangentSpace, pMaterial );
  2050. }
  2051. else if ( bDoFlex )
  2052. {
  2053. meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 );
  2054. R_StudioProcessFlexedMesh( pmesh, meshBuilder, pGroup->m_NumVertices,
  2055. pGroup->m_pGroupIndexToMeshIndex );
  2056. }
  2057. meshBuilder.End();
  2058. if ( !bSWSkin )
  2059. {
  2060. numFacesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pMesh );
  2061. }
  2062. else
  2063. {
  2064. numFacesRendered = R_StudioDrawGroupSWSkin( pGroup, pMesh );
  2065. }
  2066. pRenderContext->SetNumBoneWeights( 0 );
  2067. pRenderContext->Bind( m_pMaterialTangentFrame );
  2068. R_StudioSoftwareProcessMesh_NormalsBatched( pRenderContext, pmesh, pGroup, lighting, bDoFlex, r_blend, m_pRC->m_Config.bDrawNormals, m_pRC->m_Config.bDrawTangentFrame );
  2069. pRenderContext->Bind( pMaterial );
  2070. MatSysQueueMark( g_pMaterialSystem, "END R_StudioDrawDynamicMesh\n" );
  2071. return numFacesRendered;
  2072. }
  2073. //-----------------------------------------------------------------------------
  2074. // Sets the material vars for the eye vertex shader
  2075. //-----------------------------------------------------------------------------
  2076. static unsigned int eyeOriginCache = 0;
  2077. static unsigned int eyeUpCache = 0;
  2078. static unsigned int irisUCache = 0;
  2079. static unsigned int irisVCache = 0;
  2080. static unsigned int glintUCache = 0;
  2081. static unsigned int glintVCache = 0;
  2082. void CStudioRender::SetEyeMaterialVars( IMaterial* pMaterial, mstudioeyeball_t* peyeball,
  2083. Vector const& eyeOrigin, const matrix3x4_t& irisTransform, const matrix3x4_t& glintTransform )
  2084. {
  2085. if ( !pMaterial )
  2086. return;
  2087. IMaterialVar* pVar = pMaterial->FindVarFast( "$eyeorigin", &eyeOriginCache );
  2088. if (pVar)
  2089. {
  2090. pVar->SetVecValue( eyeOrigin.Base(), 3 );
  2091. }
  2092. pVar = pMaterial->FindVarFast( "$eyeup", &eyeUpCache );
  2093. if (pVar)
  2094. {
  2095. pVar->SetVecValue( peyeball->up.Base(), 3 );
  2096. }
  2097. pVar = pMaterial->FindVarFast( "$irisu", &irisUCache );
  2098. if (pVar)
  2099. {
  2100. pVar->SetVecValue( irisTransform[0], 4 );
  2101. }
  2102. pVar = pMaterial->FindVarFast( "$irisv", &irisVCache );
  2103. if (pVar)
  2104. {
  2105. pVar->SetVecValue( irisTransform[1], 4 );
  2106. }
  2107. pVar = pMaterial->FindVarFast( "$glintu", &glintUCache );
  2108. if (pVar)
  2109. {
  2110. pVar->SetVecValue( glintTransform[0], 4 );
  2111. }
  2112. pVar = pMaterial->FindVarFast( "$glintv", &glintVCache );
  2113. if (pVar)
  2114. {
  2115. pVar->SetVecValue( glintTransform[1], 4 );
  2116. }
  2117. }
  2118. //-----------------------------------------------------------------------------
  2119. // Specialized routine to draw the eyeball
  2120. //-----------------------------------------------------------------------------
  2121. static unsigned int glintCache = 0;
  2122. int CStudioRender::R_StudioDrawEyeball( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData,
  2123. StudioModelLighting_t lighting, IMaterial *pMaterial, int lod )
  2124. {
  2125. if ( !m_pRC->m_Config.bEyes )
  2126. {
  2127. return 0;
  2128. }
  2129. int j;
  2130. int numFacesRendered = 0;
  2131. // See if any meshes in the group want to go down the static path...
  2132. bool bFlexStatic = false;
  2133. bool bIsHardwareSkinnedData = false;
  2134. bool bQuadList = false;
  2135. for (j = 0; j < pMeshData->m_NumGroup; ++j)
  2136. {
  2137. studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j];
  2138. if ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED )
  2139. bFlexStatic = true;
  2140. if ( pGroup->m_Flags & MESHGROUP_IS_HWSKINNED )
  2141. bIsHardwareSkinnedData = true;
  2142. if ( pGroup->m_pStripData[0].flags & OptimizedModel::STRIP_IS_QUADLIST_EXTRA ||
  2143. pGroup->m_pStripData[0].flags & OptimizedModel::STRIP_IS_QUADLIST_REG )
  2144. {
  2145. bIsHardwareSkinnedData = false;
  2146. bQuadList = true;
  2147. }
  2148. }
  2149. // Take the static path for new flexed models on DX9 hardware
  2150. bool bShouldHardwareSkin = bIsHardwareSkinnedData && bFlexStatic &&
  2151. ( lighting != LIGHTING_SOFTWARE ) && ( !m_pRC->m_Config.bSoftwareSkin );
  2152. // EXPLICITLY DISABLING NEED FOR CPU SIDE VERTS ON CONSOLES!!!!
  2153. // PORTAL2 CONSOLE: Vertex/Index data will never be read again (no model decals or load-time lighting), so discard the VVD data and create a new header
  2154. // If we ever have a flexed eye vert on a model on the console, badness will ensue (ie. won't flex).
  2155. if ( IsGameConsole() )
  2156. {
  2157. bShouldHardwareSkin = true;
  2158. bFlexStatic = false;
  2159. }
  2160. pRenderContext->MatrixMode( MATERIAL_MODEL );
  2161. pRenderContext->LoadIdentity();
  2162. // Software flex eyeball verts (not a delta stream)
  2163. if ( bFlexStatic && !bShouldHardwareSkin )
  2164. {
  2165. R_StudioFlexVerts( pmesh, lod, bQuadList );
  2166. }
  2167. mstudioeyeball_t *peyeball = m_pSubModel->pEyeball(pmesh->materialparam);
  2168. // We'll need this to compute normals
  2169. Vector org;
  2170. VectorTransform( peyeball->org, m_pBoneToWorld[peyeball->bone], org );
  2171. // Compute the glint projection
  2172. matrix3x4_t glintMat;
  2173. ComputeGlintTextureProjection( &m_pEyeballState[pmesh->materialparam], m_pRC->m_ViewRight, m_pRC->m_ViewUp, glintMat );
  2174. if ( !m_pRC->m_Config.bWireframe )
  2175. {
  2176. // Compute the glint procedural texture
  2177. IMaterialVar* pGlintVar = pMaterial->FindVarFast( "$glint", &glintCache );
  2178. if (pGlintVar)
  2179. {
  2180. R_StudioEyeballGlint( &m_pEyeballState[pmesh->materialparam], pGlintVar, m_pRC->m_ViewRight, m_pRC->m_ViewUp, m_pRC->m_ViewOrigin );
  2181. }
  2182. SetEyeMaterialVars( pMaterial, peyeball, org, m_pEyeballState[pmesh->materialparam].mat, glintMat );
  2183. }
  2184. if ( bShouldHardwareSkin )
  2185. {
  2186. for ( j = 0; j < pMeshData->m_NumGroup; ++j )
  2187. {
  2188. studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j];
  2189. numFacesRendered += R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod, NULL );
  2190. }
  2191. return numFacesRendered;
  2192. }
  2193. // FIXME: We could compile a static vertex buffer in this case
  2194. // if there's no flexed verts.
  2195. const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr );
  2196. if ( !vertData )
  2197. {
  2198. // not available
  2199. return 0;
  2200. }
  2201. mstudiovertex_t *pVertices = vertData->Vertex( 0 );
  2202. pRenderContext->SetNumBoneWeights( 0 );
  2203. m_VertexCache.SetupComputation( pmesh );
  2204. int nAlpnaInt = RoundFloatToInt( m_pRC->m_AlphaMod * 255 );
  2205. unsigned char a = clamp( nAlpnaInt, 0, 255 );
  2206. Vector position, normal, color;
  2207. // setup the call
  2208. R_InitLightEffectsWorld3();
  2209. Vector4D vecDiffuseModulation;
  2210. ComputeDiffuseModulation( &vecDiffuseModulation );
  2211. // Render the puppy
  2212. CMeshBuilder meshBuilder;
  2213. bool bTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false;
  2214. VertexFormat_t fmt = bQuadList ? VERTEX_FORMAT_SUBDQUAD : VERTEX_FORMAT_STANDARD;
  2215. bool useHWLighting = m_pRC->m_Config.m_bSupportsVertexAndPixelShaders && !m_pRC->m_Config.bSoftwareLighting;
  2216. // Draw all the various mesh groups...
  2217. for ( j = 0; j < pMeshData->m_NumGroup; ++j )
  2218. {
  2219. studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j];
  2220. IMesh* pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh );
  2221. if ( bQuadList )
  2222. {
  2223. int TotalFaces = 0;
  2224. for ( int s=0; s<pGroup->m_NumStrips; ++s )
  2225. {
  2226. TotalFaces += pGroup->m_pUniqueFaces[s];
  2227. }
  2228. // Map quad mesh to Bicubic Bezier Patches
  2229. GenerateBicubicPatches( pmesh, pGroup, bFlexStatic );
  2230. meshBuilder.Begin( pMesh, MATERIAL_SUBD_QUADS_EXTRA, TotalFaces, 0 );
  2231. SoftwareProcessQuadMesh( pmesh, meshBuilder, TotalFaces,
  2232. pGroup->m_pGroupIndexToMeshIndex,
  2233. pGroup->m_pTopologyIndices, bTangentSpace, bFlexStatic );
  2234. }
  2235. else
  2236. {
  2237. // garymcthack! need to look at the strip flags to figure out what it is.
  2238. meshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, pmesh->numvertices, 0 );
  2239. // meshBuilder.Begin( pMesh, MATERIAL_TRIANGLE_STRIP, pmesh->numvertices, 0 );
  2240. //VPROF_INCREMENT_COUNTER( "TransformFlexVerts", pGroup->m_NumVertices );
  2241. for ( int i=0; i < pGroup->m_NumVertices; ++i)
  2242. {
  2243. int n = pGroup->m_pGroupIndexToMeshIndex[i];
  2244. mstudiovertex_t &vert = pVertices[n];
  2245. CachedPosNorm_t* pWorldVert = m_VertexCache.CreateWorldVertex(n);
  2246. // transform into world space
  2247. if ( m_VertexCache.IsVertexFlexed(n) )
  2248. {
  2249. CachedPosNormTan_t* pFlexVert = m_VertexCache.GetFlexVertex(n);
  2250. R_StudioTransform( pFlexVert->m_Position.AsVector3D(), &vert.m_BoneWeights, m_PoseToWorld, pWorldVert->m_Position.AsVector3D() );
  2251. R_StudioRotate( pFlexVert->m_Normal.AsVector3D(), &vert.m_BoneWeights, m_PoseToWorld, pWorldVert->m_Normal.AsVector3D() );
  2252. Assert( pWorldVert->m_Normal.x >= -1.05f && pWorldVert->m_Normal.x <= 1.05f );
  2253. Assert( pWorldVert->m_Normal.y >= -1.05f && pWorldVert->m_Normal.y <= 1.05f );
  2254. Assert( pWorldVert->m_Normal.z >= -1.05f && pWorldVert->m_Normal.z <= 1.05f );
  2255. }
  2256. else
  2257. {
  2258. R_StudioTransform( vert.m_vecPosition, &vert.m_BoneWeights, m_PoseToWorld, pWorldVert->m_Position.AsVector3D() );
  2259. R_StudioRotate( vert.m_vecNormal, &vert.m_BoneWeights, m_PoseToWorld, pWorldVert->m_Normal.AsVector3D() );
  2260. Assert( pWorldVert->m_Normal.x >= -1.05f && pWorldVert->m_Normal.x <= 1.05f );
  2261. Assert( pWorldVert->m_Normal.y >= -1.05f && pWorldVert->m_Normal.y <= 1.05f );
  2262. Assert( pWorldVert->m_Normal.z >= -1.05f && pWorldVert->m_Normal.z <= 1.05f );
  2263. }
  2264. // Don't bother to light in software when we've got vertex + pixel shaders.
  2265. meshBuilder.Position3fv( pWorldVert->m_Position.Base() );
  2266. if (useHWLighting)
  2267. {
  2268. meshBuilder.Normal3fv( pWorldVert->m_Normal.Base() );
  2269. }
  2270. else
  2271. {
  2272. R_StudioEyeballNormal( peyeball, org, pWorldVert->m_Position.AsVector3D(), pWorldVert->m_Normal.AsVector3D() );
  2273. // This isn't really used, but since the meshbuilder checks for messed up
  2274. // normals, let's do this here in debug mode.
  2275. // WRONGO YOU FRIGGIN IDIOT!!!!!!!!!!
  2276. // DX7 needs these for the flashlight.
  2277. meshBuilder.Normal3fv( pWorldVert->m_Normal.Base() );
  2278. R_ComputeLightAtPoint3( pWorldVert->m_Position.AsVector3D(), pWorldVert->m_Normal.AsVector3D(), color );
  2279. unsigned char r = LinearToLightmap( color.x );
  2280. unsigned char g = LinearToLightmap( color.y );
  2281. unsigned char b = LinearToLightmap( color.z );
  2282. meshBuilder.Color4ub( r, g, b, a );
  2283. }
  2284. meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() );
  2285. meshBuilder.AdvanceVertexF<VTX_HAVEPOS | VTX_HAVENORMAL | VTX_HAVECOLOR, 1>();
  2286. }
  2287. }
  2288. meshBuilder.End();
  2289. pMesh->DrawModulated( vecDiffuseModulation );
  2290. for ( int k=0; k<pGroup->m_NumStrips; k++ )
  2291. {
  2292. numFacesRendered += pGroup->m_pUniqueFaces[k];
  2293. }
  2294. pRenderContext->SetNumBoneWeights( 0 );
  2295. pRenderContext->Bind( m_pMaterialTangentFrame );
  2296. R_StudioSoftwareProcessMesh_NormalsBatched( pRenderContext, pmesh, pGroup, lighting, true, false, m_pRC->m_Config.bDrawNormals, m_pRC->m_Config.bDrawTangentFrame );
  2297. pRenderContext->Bind( pMaterial );
  2298. }
  2299. return numFacesRendered;
  2300. }
  2301. //-----------------------------------------------------------------------------
  2302. // Draws a mesh
  2303. //-----------------------------------------------------------------------------
  2304. int CStudioRender::R_StudioDrawMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData,
  2305. StudioModelLighting_t lighting, IMaterial *pMaterial,
  2306. ColorMeshInfo_t *pColorMeshes, int lod )
  2307. {
  2308. VPROF( "R_StudioDrawMesh" );
  2309. int numFacesRendered = 0;
  2310. // Draw all the various mesh groups...
  2311. for ( int j = 0; j < pMeshData->m_NumGroup; ++j )
  2312. {
  2313. studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j];
  2314. // Use the hardware if the mesh is hw skinned and we can put flexes on another stream
  2315. // Otherwise, we gotta do some expensive locks
  2316. bool bIsHardwareSkinnedData = ( pGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) != 0;
  2317. bool bIsQuadMesh = ( pMeshData->m_pMeshGroup->m_pStripData[0].flags & OptimizedModel::STRIP_IS_QUADLIST_EXTRA ) ||
  2318. ( pMeshData->m_pMeshGroup->m_pStripData[0].flags & OptimizedModel::STRIP_IS_QUADLIST_REG ) != 0;
  2319. bool bShouldHardwareSkin = bIsHardwareSkinnedData && !bIsQuadMesh && ( lighting != LIGHTING_SOFTWARE );
  2320. if ( bShouldHardwareSkin && !m_pRC->m_Config.bDrawNormals && !m_pRC->m_Config.bDrawTangentFrame && !m_pRC->m_Config.bWireframe )
  2321. {
  2322. if ( !m_pRC->m_Config.bNoHardware )
  2323. {
  2324. numFacesRendered += R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod, pColorMeshes );
  2325. }
  2326. }
  2327. else
  2328. {
  2329. if ( !m_pRC->m_Config.bNoSoftware )
  2330. {
  2331. numFacesRendered += R_StudioDrawDynamicMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod );
  2332. }
  2333. }
  2334. }
  2335. return numFacesRendered;
  2336. }
  2337. //-----------------------------------------------------------------------------
  2338. // Inserts translucent mesh into list
  2339. //-----------------------------------------------------------------------------
  2340. template< class T >
  2341. void InsertRenderable( int mesh, T val, int count, int* pIndices, T* pValList )
  2342. {
  2343. // Compute insertion point...
  2344. int i;
  2345. for ( i = count; --i >= 0; )
  2346. {
  2347. if (val < pValList[i])
  2348. break;
  2349. // Shift down
  2350. pIndices[i + 1] = pIndices[i];
  2351. pValList[i+1] = pValList[i];
  2352. }
  2353. // Insert at insertion point
  2354. ++i;
  2355. pValList[i] = val;
  2356. pIndices[i] = mesh;
  2357. }
  2358. //-----------------------------------------------------------------------------
  2359. // Sorts the meshes
  2360. //-----------------------------------------------------------------------------
  2361. int CStudioRender::SortMeshes( int* pIndices, IMaterial **ppMaterials,
  2362. short* pskinref, Vector const& vforward, Vector const& r_origin )
  2363. {
  2364. int numMeshes = 0;
  2365. if (m_bDrawTranslucentSubModels)
  2366. {
  2367. // float* pDist = (float*)stackalloc( m_pSubModel->nummeshes * sizeof(float) );
  2368. // Sort each model piece by it's center, if it's translucent
  2369. for (int i = 0; i < m_pSubModel->nummeshes; ++i)
  2370. {
  2371. // Don't add opaque materials
  2372. mstudiomesh_t* pmesh = m_pSubModel->pMesh(i);
  2373. IMaterial *pMaterial = ppMaterials[pskinref[pmesh->material]];
  2374. if( !pMaterial || !pMaterial->IsTranslucent() )
  2375. continue;
  2376. // FIXME: put the "center" of the mesh into delta
  2377. // Vector delta;
  2378. // VectorSubtract( delta, r_origin, delta );
  2379. // float dist = DotProduct( delta, vforward );
  2380. // Add it to our lists
  2381. // InsertRenderable( i, dist, numMeshes, pIndices, pDist );
  2382. // One more mesh
  2383. ++numMeshes;
  2384. }
  2385. }
  2386. else
  2387. {
  2388. IMaterial** ppMat = (IMaterial**)stackalloc( m_pSubModel->nummeshes * sizeof(IMaterial*) );
  2389. // Sort by material type
  2390. for (int i = 0; i < m_pSubModel->nummeshes; ++i)
  2391. {
  2392. mstudiomesh_t* pmesh = m_pSubModel->pMesh(i);
  2393. IMaterial *pMaterial = ppMaterials[pskinref[pmesh->material]];
  2394. if( !pMaterial )
  2395. continue;
  2396. // Don't add translucent materials
  2397. if (( !m_pRC->m_Config.bWireframe ) && pMaterial->IsTranslucent() )
  2398. continue;
  2399. // Add it to our lists
  2400. InsertRenderable( i, pMaterial, numMeshes, pIndices, ppMat );
  2401. // One more mesh
  2402. ++numMeshes;
  2403. }
  2404. }
  2405. return numMeshes;
  2406. }
  2407. //-----------------------------------------------------------------------------
  2408. // R_StudioDrawPoints
  2409. //
  2410. // Returns the number of triangles rendered.
  2411. //-----------------------------------------------------------------------------
  2412. #pragma warning (disable:4189)
  2413. int CStudioRender::R_StudioDrawPoints( IMatRenderContext *pRenderContext, int skin, void /*IClientEntity*/ *pClientEntity,
  2414. IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes )
  2415. {
  2416. VPROF( "R_StudioDrawPoints" );
  2417. int i;
  2418. int numFacesRendered = 0;
  2419. #if 0 // garymcthack
  2420. if ( m_pSubModel->numfaces == 0 )
  2421. return 0;
  2422. #endif
  2423. // happens when there's a model load failure
  2424. if ( m_pStudioMeshes == 0 )
  2425. return 0;
  2426. if ( m_pRC->m_Config.bWireframe && m_bDrawTranslucentSubModels )
  2427. return 0;
  2428. // ConDMsg("%d: %d %d\n", pimesh->numFaces, pimesh->numVertices, pimesh->numNormals );
  2429. if ( m_pRC->m_Config.skin )
  2430. {
  2431. skin = m_pRC->m_Config.skin;
  2432. if ( skin >= m_pStudioHdr->numskinfamilies )
  2433. {
  2434. skin = 0;
  2435. }
  2436. }
  2437. // get skinref array
  2438. short *pskinref = m_pStudioHdr->pSkinref( 0 );
  2439. if ( skin > 0 && skin < m_pStudioHdr->numskinfamilies )
  2440. {
  2441. pskinref += ( skin * m_pStudioHdr->numskinref );
  2442. }
  2443. // FIXME: Activate sorting on a mesh level
  2444. // int* pIndices = (int*)stackalloc( m_pSubModel->nummeshes * sizeof(int) );
  2445. // int numMeshes = SortMeshes( pIndices, ppMaterials, pskinref, vforward, r_origin );
  2446. bool bHasMaterialOverride = ( m_pRC->m_pForcedMaterial[ 0 ] || ( m_pRC->m_nForcedMaterialType == OVERRIDE_DEPTH_WRITE ) );
  2447. // draw each mesh
  2448. for ( i = 0; i < m_pSubModel->nummeshes; ++i)
  2449. {
  2450. mstudiomesh_t *pmesh = m_pSubModel->pMesh(i);
  2451. studiomeshdata_t *pMeshData = &m_pStudioMeshes[pmesh->meshid];
  2452. Assert( pMeshData );
  2453. if ( !pMeshData->m_NumGroup )
  2454. continue;
  2455. if ( !pMaterialFlags )
  2456. continue;
  2457. StudioModelLighting_t lighting = LIGHTING_HARDWARE;
  2458. int materialFlags = pMaterialFlags[pskinref[pmesh->material]];
  2459. IMaterial* pMaterial = R_StudioSetupSkinAndLighting( pRenderContext, pskinref[ pmesh->material ], ppMaterials, materialFlags, pClientEntity, pColorMeshes, lighting );
  2460. if ( !pMaterial )
  2461. continue;
  2462. #ifdef _DEBUG
  2463. char const *materialName = pMaterial->GetName();
  2464. #endif
  2465. // Set up flex data - this is the CPU flex cache...do we really need this at all if we're morphing?
  2466. m_VertexCache.SetMesh( i );
  2467. // The following are special cases that can't be covered with the normal static/dynamic methods due to optimization reasons
  2468. // NOTE: If we have a material override, we don't need to do eyeballs differently
  2469. int nType = bHasMaterialOverride ? 0 : pmesh->materialtype;
  2470. switch( nType )
  2471. {
  2472. case 1: // eyeballs
  2473. numFacesRendered += R_StudioDrawEyeball( pRenderContext, pmesh, pMeshData, lighting, pMaterial, lod );
  2474. break;
  2475. default:
  2476. numFacesRendered += R_StudioDrawMesh( pRenderContext, pmesh, pMeshData, lighting, pMaterial, pColorMeshes, lod );
  2477. break;
  2478. }
  2479. }
  2480. // Reset this state so it doesn't hose other parts of rendering
  2481. pRenderContext->SetNumBoneWeights( 0 );
  2482. return numFacesRendered;
  2483. }
  2484. #pragma warning (default:4189)