Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

659 lines
20 KiB

  1. //===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======//
  2. //
  3. // Purpose: Support for mapping from a quad mesh to Bicubic Patches, as a means
  4. // of rendering approximate Catmull-Clark subdivision surfaces
  5. //
  6. //===========================================================================//
  7. #include "studio.h"
  8. #include "studiorendercontext.h"
  9. #include "materialsystem/imaterialsystem.h"
  10. #include "materialsystem/imaterial.h"
  11. #include "materialsystem/imaterialvar.h"
  12. #include "materialsystem/itexture.h"
  13. #include "materialsystem/imesh.h"
  14. #include "mathlib/mathlib.h"
  15. #include "studiorender.h"
  16. #include "optimize.h"
  17. #include "tier1/convar.h"
  18. #include "tier1/keyvalues.h"
  19. #include "tier0/vprof.h"
  20. // memdbgon must be the last include file in a .cpp file!!!
  21. #include "tier0/memdbgon.h"
  22. #define R_STUDIOSUBD
  23. #include "r_studiosubd_patches.h"
  24. #ifdef _DEBUG
  25. // Temporary debug arrays
  26. extern CUtlVector<Vector4D> g_DebugCornerPositions;
  27. extern CUtlVector<Vector4D> g_DebugEdgePositions;
  28. extern CUtlVector<Vector4D> g_DebugInteriorPositions;
  29. #endif
  30. //
  31. // Check out CL# 584588 for an SSE-ized version of the older versions of these
  32. // routines, which came from an older MS doc, by way of the DX10 SDK
  33. //
  34. static void R_TransformVert( const Vector *pSrcPos, matrix3x4_t *pSkinMat, Vector4DAligned &pos )
  35. {
  36. VPROF_BUDGET( "R_TransformVert", _T("SubD Rendering") );
  37. // NOTE: Could add SSE stuff here, if we knew what SSE stuff could make it faster
  38. pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3];
  39. pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3];
  40. pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3];
  41. pos.w = 1.0f;
  42. }
  43. // This function is duplicate code ****
  44. static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix )
  45. {
  46. VPROF_BUDGET( "ComputeSkinMatrixSSE", _T("SubD Rendering") );
  47. // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization
  48. #if defined( _WIN32 ) && !defined( WIN64 ) && !defined( _X360 )
  49. switch( boneweights.numbones )
  50. {
  51. default:
  52. case 1:
  53. return &pPoseToWorld[boneweights.bone[0]];
  54. case 2:
  55. {
  56. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  57. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  58. float *pWeights = boneweights.weight;
  59. _asm
  60. {
  61. mov eax, DWORD PTR [pWeights]
  62. movss xmm6, dword ptr[eax] ; boneweights.weight[0]
  63. movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1]
  64. mov eax, DWORD PTR [boneMat0]
  65. mov ecx, DWORD PTR [boneMat1]
  66. mov edi, DWORD PTR [scratchMatrix]
  67. // Fill xmm6, and 7 with all the bone weights
  68. shufps xmm6, xmm6, 0
  69. shufps xmm7, xmm7, 0
  70. // Load up all rows of the three matrices
  71. movaps xmm0, XMMWORD PTR [eax]
  72. movaps xmm1, XMMWORD PTR [ecx]
  73. movaps xmm2, XMMWORD PTR [eax + 16]
  74. movaps xmm3, XMMWORD PTR [ecx + 16]
  75. movaps xmm4, XMMWORD PTR [eax + 32]
  76. movaps xmm5, XMMWORD PTR [ecx + 32]
  77. // Multiply the rows by the weights
  78. mulps xmm0, xmm6
  79. mulps xmm1, xmm7
  80. mulps xmm2, xmm6
  81. mulps xmm3, xmm7
  82. mulps xmm4, xmm6
  83. mulps xmm5, xmm7
  84. addps xmm0, xmm1
  85. addps xmm2, xmm3
  86. addps xmm4, xmm5
  87. movaps XMMWORD PTR [edi], xmm0
  88. movaps XMMWORD PTR [edi + 16], xmm2
  89. movaps XMMWORD PTR [edi + 32], xmm4
  90. }
  91. }
  92. return &scratchMatrix;
  93. case 3:
  94. {
  95. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  96. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  97. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  98. float *pWeights = boneweights.weight;
  99. _asm
  100. {
  101. mov eax, DWORD PTR [pWeights]
  102. movss xmm5, dword ptr[eax] ; boneweights.weight[0]
  103. movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1]
  104. movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2]
  105. mov eax, DWORD PTR [boneMat0]
  106. mov ecx, DWORD PTR [boneMat1]
  107. mov edx, DWORD PTR [boneMat2]
  108. mov edi, DWORD PTR [scratchMatrix]
  109. // Fill xmm5, 6, and 7 with all the bone weights
  110. shufps xmm5, xmm5, 0
  111. shufps xmm6, xmm6, 0
  112. shufps xmm7, xmm7, 0
  113. // Load up the first row of the three matrices
  114. movaps xmm0, XMMWORD PTR [eax]
  115. movaps xmm1, XMMWORD PTR [ecx]
  116. movaps xmm2, XMMWORD PTR [edx]
  117. // Multiply the rows by the weights
  118. mulps xmm0, xmm5
  119. mulps xmm1, xmm6
  120. mulps xmm2, xmm7
  121. addps xmm0, xmm1
  122. addps xmm0, xmm2
  123. movaps XMMWORD PTR [edi], xmm0
  124. // Load up the second row of the three matrices
  125. movaps xmm0, XMMWORD PTR [eax + 16]
  126. movaps xmm1, XMMWORD PTR [ecx + 16]
  127. movaps xmm2, XMMWORD PTR [edx + 16]
  128. // Multiply the rows by the weights
  129. mulps xmm0, xmm5
  130. mulps xmm1, xmm6
  131. mulps xmm2, xmm7
  132. addps xmm0, xmm1
  133. addps xmm0, xmm2
  134. movaps XMMWORD PTR [edi + 16], xmm0
  135. // Load up the third row of the three matrices
  136. movaps xmm0, XMMWORD PTR [eax + 32]
  137. movaps xmm1, XMMWORD PTR [ecx + 32]
  138. movaps xmm2, XMMWORD PTR [edx + 32]
  139. // Multiply the rows by the weights
  140. mulps xmm0, xmm5
  141. mulps xmm1, xmm6
  142. mulps xmm2, xmm7
  143. addps xmm0, xmm1
  144. addps xmm0, xmm2
  145. movaps XMMWORD PTR [edi + 32], xmm0
  146. }
  147. }
  148. return &scratchMatrix;
  149. case 4:
  150. {
  151. matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]];
  152. matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]];
  153. matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]];
  154. matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]];
  155. float *pWeights = boneweights.weight;
  156. _asm
  157. {
  158. mov eax, DWORD PTR [pWeights]
  159. movss xmm4, dword ptr[eax] ; boneweights.weight[0]
  160. movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1]
  161. movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2]
  162. movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3]
  163. mov eax, DWORD PTR [boneMat0]
  164. mov ecx, DWORD PTR [boneMat1]
  165. mov edx, DWORD PTR [boneMat2]
  166. mov esi, DWORD PTR [boneMat3]
  167. mov edi, DWORD PTR [scratchMatrix]
  168. // Fill xmm5, 6, and 7 with all the bone weights
  169. shufps xmm4, xmm4, 0
  170. shufps xmm5, xmm5, 0
  171. shufps xmm6, xmm6, 0
  172. shufps xmm7, xmm7, 0
  173. // Load up the first row of the four matrices
  174. movaps xmm0, XMMWORD PTR [eax]
  175. movaps xmm1, XMMWORD PTR [ecx]
  176. movaps xmm2, XMMWORD PTR [edx]
  177. movaps xmm3, XMMWORD PTR [esi]
  178. // Multiply the rows by the weights
  179. mulps xmm0, xmm4
  180. mulps xmm1, xmm5
  181. mulps xmm2, xmm6
  182. mulps xmm3, xmm7
  183. addps xmm0, xmm1
  184. addps xmm2, xmm3
  185. addps xmm0, xmm2
  186. movaps XMMWORD PTR [edi], xmm0
  187. // Load up the second row of the three matrices
  188. movaps xmm0, XMMWORD PTR [eax + 16]
  189. movaps xmm1, XMMWORD PTR [ecx + 16]
  190. movaps xmm2, XMMWORD PTR [edx + 16]
  191. movaps xmm3, XMMWORD PTR [esi + 16]
  192. // Multiply the rows by the weights
  193. mulps xmm0, xmm4
  194. mulps xmm1, xmm5
  195. mulps xmm2, xmm6
  196. mulps xmm3, xmm7
  197. addps xmm0, xmm1
  198. addps xmm2, xmm3
  199. addps xmm0, xmm2
  200. movaps XMMWORD PTR [edi + 16], xmm0
  201. // Load up the third row of the three matrices
  202. movaps xmm0, XMMWORD PTR [eax + 32]
  203. movaps xmm1, XMMWORD PTR [ecx + 32]
  204. movaps xmm2, XMMWORD PTR [edx + 32]
  205. movaps xmm3, XMMWORD PTR [esi + 32]
  206. // Multiply the rows by the weights
  207. mulps xmm0, xmm4
  208. mulps xmm1, xmm5
  209. mulps xmm2, xmm6
  210. mulps xmm3, xmm7
  211. addps xmm0, xmm1
  212. addps xmm2, xmm3
  213. addps xmm0, xmm2
  214. movaps XMMWORD PTR [edi + 32], xmm0
  215. }
  216. }
  217. return &scratchMatrix;
  218. }
  219. #else
  220. #ifndef LINUX
  221. #pragma message( "ComputeSkinMatrixSSE C implementation only" )
  222. #endif
  223. extern matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix );
  224. return ComputeSkinMatrix( boneweights, pPoseToWorld, scratchMatrix );
  225. #endif
  226. Assert( 0 );
  227. return NULL;
  228. }
  229. #ifdef _DEBUG
  230. static ConVar mat_tess_dump( "mat_tess_dump", "0", FCVAR_CHEAT );
  231. #endif
  232. void CStudioRender::SkinSubDCage( mstudiovertex_t *pVertices, int nNumVertices,
  233. matrix3x4_t *pPoseToWorld, CCachedRenderData &vertexCache,
  234. unsigned short* pGroupToMesh, fltx4 *vOutput, bool bDoFlex )
  235. {
  236. VPROF_BUDGET( "CStudioRender::SkinSubDCage", _T("SubD Rendering") );
  237. Vector *pSrcPos;
  238. ALIGN16 matrix3x4_t *pSkinMat, temp ALIGN16_POST;
  239. Assert( nNumVertices > 0 );
  240. for ( int j=0; j < nNumVertices; ++j )
  241. {
  242. mstudiovertex_t &vert = pVertices[pGroupToMesh[j]];
  243. pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp );
  244. if ( bDoFlex && vertexCache.IsVertexFlexed( pGroupToMesh[j] ) )
  245. {
  246. CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex( pGroupToMesh[j] );
  247. pSrcPos = &pFlexedVertex->m_Position.AsVector3D();
  248. // Copy strange signed, 0..3 wrinkle tangent-flip encoding over to tangent.w
  249. pFlexedVertex->m_TangentS.w = pFlexedVertex->m_Position.w;
  250. }
  251. else // non-flexed case
  252. {
  253. pSrcPos = &vert.m_vecPosition;
  254. }
  255. // Transform into world space
  256. Vector4DAligned vTemp;
  257. R_TransformVert( pSrcPos, pSkinMat, *(Vector4DAligned*)&vTemp );
  258. vOutput[j] = LoadAlignedSIMD( (float *) &vTemp );
  259. }
  260. }
  261. inline unsigned short *InitializeTopologyIndexStruct( TopologyIndexStruct &quad, unsigned short *topologyIndex )
  262. {
  263. quad.vtx1RingSize = topologyIndex; topologyIndex += 4;
  264. quad.vtx1RingCenterQuadOffset = topologyIndex; topologyIndex += 4;
  265. quad.valences = topologyIndex; topologyIndex += 4;
  266. quad.minOneRingOffset = topologyIndex; topologyIndex += 4;
  267. quad.bndVtx = topologyIndex; topologyIndex += 4;
  268. quad.bndEdge = topologyIndex; topologyIndex += 4;
  269. quad.cornerVtx = topologyIndex; topologyIndex += 4;
  270. quad.loopGapAngle = topologyIndex; topologyIndex += 4;
  271. quad.nbCornerVtx = topologyIndex; topologyIndex += 4;
  272. quad.edgeBias = topologyIndex; topologyIndex += 8;
  273. quad.vUV0 = topologyIndex; topologyIndex += 4;
  274. quad.vUV1 = topologyIndex; topologyIndex += 4;
  275. quad.vUV2 = topologyIndex; topologyIndex += 4;
  276. quad.vUV3 = topologyIndex; topologyIndex += 4;
  277. quad.oneRing = topologyIndex;
  278. topologyIndex += quad.vtx1RingSize[0]+quad.vtx1RingSize[1]+quad.vtx1RingSize[2]+quad.vtx1RingSize[3];
  279. return topologyIndex;
  280. }
  281. static ConVar mat_tessellation_update_buffers( "mat_tessellation_update_buffers", "1", FCVAR_CHEAT );
  282. static ConVar mat_tessellation_cornertangents( "mat_tessellation_cornertangents", "1", FCVAR_CHEAT );
  283. static ConVar mat_tessellation_accgeometrytangents( "mat_tessellation_accgeometrytangents", "0", FCVAR_CHEAT );
  284. #ifdef _DEBUG
  285. bool NotQuiteEqual( Vector4D &vA, Vector4D &vB )
  286. {
  287. float flEpsilon = 0.05f;
  288. Vector4D vDelta = vA - vB;
  289. float flDist = sqrt( vDelta.x * vDelta.x + vDelta.y * vDelta.y + vDelta.z * vDelta.z );
  290. bool bSameVector = ( vA.x == vB.x ) && ( vA.y == vB.y ) && ( vA.z == vB.z );
  291. return ( flDist < flEpsilon ) && !bSameVector;
  292. }
  293. void DumpDebugPositions()
  294. {
  295. for ( int i=0; i< g_DebugCornerPositions.Count(); i++ )
  296. {
  297. bool bCrack = false;
  298. for ( int j=0; j< g_DebugCornerPositions.Count(); j++ )
  299. {
  300. if ( NotQuiteEqual( g_DebugCornerPositions[i], g_DebugCornerPositions[j] ) )
  301. {
  302. bCrack = true;
  303. Assert(0);
  304. }
  305. }
  306. DevMsg( "%s C - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugCornerPositions[i].x, g_DebugCornerPositions[i].y, g_DebugCornerPositions[i].z );
  307. }
  308. for ( int i=0; i< g_DebugEdgePositions.Count(); i++ )
  309. {
  310. bool bCrack = false;
  311. for ( int j=0; j< g_DebugEdgePositions.Count(); j++ )
  312. {
  313. if ( NotQuiteEqual( g_DebugEdgePositions[i], g_DebugEdgePositions[j] ) )
  314. {
  315. bCrack = true;
  316. }
  317. }
  318. DevMsg( "%s E - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugEdgePositions[i].x, g_DebugEdgePositions[i].y, g_DebugEdgePositions[i].z );
  319. }
  320. for ( int i=0; i< g_DebugInteriorPositions.Count(); i++ )
  321. {
  322. bool bCrack = false;
  323. for ( int j=0; j< g_DebugInteriorPositions.Count(); j++ )
  324. {
  325. if ( NotQuiteEqual( g_DebugInteriorPositions[i], g_DebugInteriorPositions[j] ) )
  326. {
  327. bCrack = true;
  328. }
  329. }
  330. DevMsg( "%s I - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugInteriorPositions[i].x, g_DebugInteriorPositions[i].y, g_DebugInteriorPositions[i].z );
  331. }
  332. }
  333. #endif // _DEBUG
  334. void GenerateWorldSpacePatches( float *pSubDBuff, int nNumPatches, unsigned short *pTopologyIndices, fltx4 *pWSVertices, bool bRegularPatch )
  335. {
  336. VPROF_BUDGET( "CStudioRender::GenerateWorldSpacePatches", _T("SubD Rendering") );
  337. TopologyIndexStruct quad;
  338. unsigned short *nextPatchIndices = InitializeTopologyIndexStruct( quad, pTopologyIndices );
  339. set_ShowACCGeometryTangents(mat_tessellation_accgeometrytangents.GetBool());
  340. set_UseCornerTangents(mat_tessellation_cornertangents.GetBool());
  341. ALIGN16 Vector4D Geo[16] ALIGN16_POST;
  342. ALIGN16 Vector4D TanU[12] ALIGN16_POST;
  343. ALIGN16 Vector4D TanV[12] ALIGN16_POST;
  344. #ifdef _DEBUG
  345. if ( mat_tess_dump.GetBool() )
  346. {
  347. // Debug Arrays
  348. g_DebugCornerPositions.EnsureCapacity( nNumPatches * 4 );
  349. g_DebugEdgePositions.EnsureCapacity( nNumPatches * 8 );
  350. g_DebugInteriorPositions.EnsureCapacity( nNumPatches * 4 );
  351. // Empty the arrays this time around
  352. g_DebugCornerPositions.RemoveAll();
  353. g_DebugEdgePositions.RemoveAll();
  354. g_DebugInteriorPositions.RemoveAll();
  355. }
  356. #endif
  357. for( int p = 0; p < nNumPatches; p++ )
  358. {
  359. #if defined( USE_OPT )
  360. ComputeACCAllPatches( pWSVertices, &quad, Geo, TanU, TanV, bRegularPatch );
  361. #else
  362. ComputeACCGeometryPatch( pWSVertices, &quad, Geo );
  363. ComputeACCTangentPatches( pWSVertices, &quad, Geo, TanU, TanV );
  364. #endif
  365. for ( int i=0; i < 16; i++ )
  366. {
  367. pSubDBuff[ i * 3 + 0 ] = Geo[i].x;
  368. pSubDBuff[ i * 3 + 1 ] = Geo[i].y;
  369. pSubDBuff[ i * 3 + 2 ] = Geo[i].z;
  370. }
  371. for ( int i=0; i<12; i++ )
  372. {
  373. pSubDBuff[ i * 3 + 0 + 48 ] = TanU[ i ].x;
  374. pSubDBuff[ i * 3 + 1 + 48 ] = TanU[ i ].y;
  375. pSubDBuff[ i * 3 + 2 + 48 ] = TanU[ i ].z;
  376. }
  377. for ( int i=0; i<12; i++ )
  378. {
  379. pSubDBuff[ i * 3 + 0 + 84 ] = TanV[ i ].x;
  380. pSubDBuff[ i * 3 + 1 + 84 ] = TanV[ i ].y;
  381. pSubDBuff[ i * 3 + 2 + 84 ] = TanV[ i ].z;
  382. }
  383. pSubDBuff += 120; // 30 * sizeof( float )
  384. nextPatchIndices = InitializeTopologyIndexStruct( quad, nextPatchIndices );
  385. }
  386. #ifdef _DEBUG
  387. if ( mat_tess_dump.GetBool() )
  388. {
  389. // These should be a particular size
  390. Assert( g_DebugCornerPositions.Count() == ( nNumPatches * 4 ) );
  391. Assert( g_DebugEdgePositions.Count() == ( nNumPatches * 8 ) );
  392. Assert( g_DebugInteriorPositions.Count() == ( nNumPatches * 4 ) );
  393. DumpDebugPositions();
  394. mat_tess_dump.SetValue( 0 ); // Turn back off
  395. }
  396. #endif
  397. }
  398. //-----------------------------------------------------------------------------------
  399. // Top level function for mapping a quad mesh to an array of Bicubic Bezier patches
  400. //-----------------------------------------------------------------------------------
  401. void CStudioRender::GenerateBicubicPatches( mstudiomesh_t* pmesh, studiomeshgroup_t* pGroup, bool bDoFlex )
  402. {
  403. #if defined( LINUX )
  404. Assert(0);
  405. #else
  406. VPROF_BUDGET( "CStudioRender::GenerateBicubicPatches", _T("SubD Rendering") );
  407. FillTables(); // This only does work the first time through
  408. Assert( pmesh );
  409. Assert( pGroup );
  410. const mstudio_meshvertexdata_t *vertData = pmesh->GetVertexData( m_pStudioHdr );
  411. Assert( vertData );
  412. mstudiovertex_t *pVertices = vertData->Vertex( 0 );
  413. m_vSkinnedSubDVertices.SetCount( pGroup->m_NumVertices );
  414. // First, apply software flexing and skinning to the vertices
  415. SkinSubDCage( pVertices, pGroup->m_NumVertices, m_PoseToWorld,
  416. m_VertexCache, pGroup->m_pGroupIndexToMeshIndex, m_vSkinnedSubDVertices.Base(), bDoFlex );
  417. // Early out
  418. if ( mat_tessellation_update_buffers.GetBool() == false )
  419. return;
  420. // Lock the subd buffers
  421. int nNumPatches = 0;
  422. for ( int s=0; s<pGroup->m_NumStrips; ++s )
  423. {
  424. nNumPatches += pGroup->m_pUniqueFaces[s];
  425. }
  426. CMatRenderContextPtr pRenderContext( g_pMaterialSystem );
  427. float *pSubDBuff = pRenderContext->LockSubDBuffer( nNumPatches );
  428. // Now we are in world space, we can map to array of Bicubic patches
  429. int totalIndices = 0;
  430. float *pCurrentPtr = pSubDBuff;
  431. for ( int s=0; s<pGroup->m_NumStrips; ++s )
  432. {
  433. OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[s];
  434. int StripFaces = pGroup->m_pUniqueFaces[s];
  435. GenerateWorldSpacePatches( pCurrentPtr, StripFaces, &pGroup->m_pTopologyIndices[totalIndices], m_vSkinnedSubDVertices.Base(), ( pStrip->flags & OptimizedModel::STRIP_IS_QUADLIST_REG ) != 0 );
  436. totalIndices += pStrip->numTopologyIndices;
  437. pCurrentPtr += StripFaces * 120;
  438. }
  439. // Unlock subd buffers
  440. pRenderContext->UnlockSubDBuffer( );
  441. #endif // !LINUX
  442. }
  443. // Transform Tangent vector
  444. static void R_TransformTangent( const Vector4D *pSrcTangentS, matrix3x4_t *pSkinMat, Vector4DAligned &tangentS )
  445. {
  446. VPROF_BUDGET( "R_TransformTangent", _T("SubD Rendering") );
  447. tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2];
  448. tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2];
  449. tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2];
  450. tangentS.w = pSrcTangentS->w;
  451. }
  452. // Transforms per-vertex tangent vector, copies texture coordinates etc into dynamic VB
  453. void CStudioRender::SoftwareProcessQuadMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder,
  454. int numFaces, unsigned short* pGroupToMesh,
  455. unsigned short *pTopologyIndices, bool bTangentSpace, bool bDoFlex )
  456. {
  457. VPROF_BUDGET( "CStudioRender::SoftwareProcessQuadMesh", _T("SubD Rendering") );
  458. Vector4D *pStudioTangentS = NULL;
  459. ALIGN16 QuadTessVertex_t quadVertex ALIGN16_POST;
  460. // QuadTessVertex_t currently has the following map:
  461. // +-----------------------------------+
  462. // | tanX | tanY | tanZ | sBWrnk | <- Tangent in .xyz, Binormal sign flip bit plus wrinkle in .w
  463. // +-----------------------------------+
  464. // | tcU0 | tcV0 | tcU1 | tcV1 | <- Interior TC, Parametric V Edge TC
  465. // +-----------------------------------+
  466. // | tcU2 | tcV2 | tcU3 | tcV3 | <- Parametric U Edge TC, Corner TC
  467. // +-----------------------------------+
  468. quadVertex.m_vTangent.Init( 1.0f, 0.0f, 0.0f, 1.0f );
  469. ALIGN16 matrix3x4_t *pSkinMat, matTemp ALIGN16_POST;
  470. Assert( numFaces > 0 );
  471. const mstudio_meshvertexdata_t *pVertData = pmesh->GetVertexData( m_pStudioHdr );
  472. Assert( pVertData );
  473. if ( !pVertData )
  474. return;
  475. mstudiovertex_t *pVertices = pVertData->Vertex( 0 );
  476. if ( bTangentSpace )
  477. {
  478. pStudioTangentS = pVertData->TangentS( 0 );
  479. }
  480. TopologyIndexStruct quad;
  481. unsigned short *nextPatchIndices = InitializeTopologyIndexStruct( quad, pTopologyIndices );
  482. for ( int i=0; i < numFaces; ++i ) // Run over faces
  483. {
  484. int patchCorner = 0;
  485. #if 0
  486. Vector4D debugTangent[4];
  487. for ( int j=0; j < 4; ++j )
  488. {
  489. int idx = quad.oneRing[patchCorner];
  490. memcpy( &debugTangent[j], &pStudioTangentS[idx], sizeof( Vector4D ) );
  491. patchCorner += quad.vtx1RingSize[j];
  492. }
  493. // These should be the same sign for a given patch.
  494. // If they're not, that's bad
  495. Assert( ( debugTangent[0].w == debugTangent[1].w ) &&
  496. ( debugTangent[1].w == debugTangent[2].w ) &&
  497. ( debugTangent[2].w == debugTangent[3].w ) );
  498. patchCorner = 0;
  499. #endif
  500. for ( int j=0; j < 4; ++j ) // Four verts per face
  501. {
  502. int idx = quad.oneRing[patchCorner];
  503. mstudiovertex_t &vert = pVertices[idx];
  504. if ( bTangentSpace )
  505. {
  506. pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, m_PoseToWorld, matTemp );
  507. if ( bDoFlex && m_VertexCache.IsVertexFlexed( idx ) )
  508. {
  509. CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex( idx );
  510. R_TransformTangent( &(pFlexedVertex->m_TangentS), pSkinMat, *(Vector4DAligned*)&quadVertex.m_vTangent );
  511. }
  512. else // non-flexed case
  513. {
  514. R_TransformTangent( &pStudioTangentS[idx], pSkinMat, *(Vector4DAligned*)&quadVertex.m_vTangent );
  515. quadVertex.m_vTangent.w *= 2; // non-flexed vertex should have wrinkle of -2 or +2
  516. }
  517. }
  518. // Store 4 texcoords per quad corner
  519. quadVertex.m_vUV01.x = pVertices[ quad.vUV0[j] ].m_vecTexCoord.x;
  520. quadVertex.m_vUV01.y = pVertices[ quad.vUV0[j] ].m_vecTexCoord.y;
  521. quadVertex.m_vUV01.z = pVertices[ quad.vUV1[j] ].m_vecTexCoord.x;
  522. quadVertex.m_vUV01.w = pVertices[ quad.vUV1[j] ].m_vecTexCoord.y;
  523. quadVertex.m_vUV23.x = pVertices[ quad.vUV2[j] ].m_vecTexCoord.x;
  524. quadVertex.m_vUV23.y = pVertices[ quad.vUV2[j] ].m_vecTexCoord.y;
  525. quadVertex.m_vUV23.z = pVertices[ quad.vUV3[j] ].m_vecTexCoord.x;
  526. quadVertex.m_vUV23.w = pVertices[ quad.vUV3[j] ].m_vecTexCoord.y;
  527. meshBuilder.FastQuadVertexSSE( quadVertex );
  528. patchCorner += quad.vtx1RingSize[j];
  529. }
  530. nextPatchIndices = InitializeTopologyIndexStruct( quad, nextPatchIndices );
  531. }
  532. meshBuilder.FastAdvanceNVertices( numFaces * 4 );
  533. }