//===== Copyright (c) 1996-2008, Valve Corporation, All rights reserved. ======// // // Purpose: Support for mapping from a quad mesh to Bicubic Patches, as a means // of rendering approximate Catmull-Clark subdivision surfaces // //===========================================================================// #include "studio.h" #include "studiorendercontext.h" #include "materialsystem/imaterialsystem.h" #include "materialsystem/imaterial.h" #include "materialsystem/imaterialvar.h" #include "materialsystem/itexture.h" #include "materialsystem/imesh.h" #include "mathlib/mathlib.h" #include "studiorender.h" #include "optimize.h" #include "tier1/convar.h" #include "tier1/keyvalues.h" #include "tier0/vprof.h" // memdbgon must be the last include file in a .cpp file!!! #include "tier0/memdbgon.h" #define R_STUDIOSUBD #include "r_studiosubd_patches.h" #ifdef _DEBUG // Temporary debug arrays extern CUtlVector<Vector4D> g_DebugCornerPositions; extern CUtlVector<Vector4D> g_DebugEdgePositions; extern CUtlVector<Vector4D> g_DebugInteriorPositions; #endif // // Check out CL# 584588 for an SSE-ized version of the older versions of these // routines, which came from an older MS doc, by way of the DX10 SDK // static void R_TransformVert( const Vector *pSrcPos, matrix3x4_t *pSkinMat, Vector4DAligned &pos ) { VPROF_BUDGET( "R_TransformVert", _T("SubD Rendering") ); // NOTE: Could add SSE stuff here, if we knew what SSE stuff could make it faster pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; pos.w = 1.0f; } // This function is duplicate code **** static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix ) { VPROF_BUDGET( "ComputeSkinMatrixSSE", _T("SubD Rendering") ); // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization #if defined( _WIN32 ) && !defined( WIN64 ) && !defined( _X360 ) switch( boneweights.numbones ) { default: case 1: return &pPoseToWorld[boneweights.bone[0]]; case 2: { matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; float *pWeights = boneweights.weight; _asm { mov eax, DWORD PTR [pWeights] movss xmm6, dword ptr[eax] ; boneweights.weight[0] movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1] mov eax, DWORD PTR [boneMat0] mov ecx, DWORD PTR [boneMat1] mov edi, DWORD PTR [scratchMatrix] // Fill xmm6, and 7 with all the bone weights shufps xmm6, xmm6, 0 shufps xmm7, xmm7, 0 // Load up all rows of the three matrices movaps xmm0, XMMWORD PTR [eax] movaps xmm1, XMMWORD PTR [ecx] movaps xmm2, XMMWORD PTR [eax + 16] movaps xmm3, XMMWORD PTR [ecx + 16] movaps xmm4, XMMWORD PTR [eax + 32] movaps xmm5, XMMWORD PTR [ecx + 32] // Multiply the rows by the weights mulps xmm0, xmm6 mulps xmm1, xmm7 mulps xmm2, xmm6 mulps xmm3, xmm7 mulps xmm4, xmm6 mulps xmm5, xmm7 addps xmm0, xmm1 addps xmm2, xmm3 addps xmm4, xmm5 movaps XMMWORD PTR [edi], xmm0 movaps XMMWORD PTR [edi + 16], xmm2 movaps XMMWORD PTR [edi + 32], xmm4 } } return &scratchMatrix; case 3: { matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; float *pWeights = boneweights.weight; _asm { mov eax, DWORD PTR [pWeights] movss xmm5, dword ptr[eax] ; boneweights.weight[0] movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1] movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2] mov eax, DWORD PTR [boneMat0] mov ecx, DWORD PTR [boneMat1] mov edx, DWORD PTR [boneMat2] mov edi, DWORD PTR [scratchMatrix] // Fill xmm5, 6, and 7 with all the bone weights shufps xmm5, xmm5, 0 shufps xmm6, xmm6, 0 shufps xmm7, xmm7, 0 // Load up the first row of the three matrices movaps xmm0, XMMWORD PTR [eax] movaps xmm1, XMMWORD PTR [ecx] movaps xmm2, XMMWORD PTR [edx] // Multiply the rows by the weights mulps xmm0, xmm5 mulps xmm1, xmm6 mulps xmm2, xmm7 addps xmm0, xmm1 addps xmm0, xmm2 movaps XMMWORD PTR [edi], xmm0 // Load up the second row of the three matrices movaps xmm0, XMMWORD PTR [eax + 16] movaps xmm1, XMMWORD PTR [ecx + 16] movaps xmm2, XMMWORD PTR [edx + 16] // Multiply the rows by the weights mulps xmm0, xmm5 mulps xmm1, xmm6 mulps xmm2, xmm7 addps xmm0, xmm1 addps xmm0, xmm2 movaps XMMWORD PTR [edi + 16], xmm0 // Load up the third row of the three matrices movaps xmm0, XMMWORD PTR [eax + 32] movaps xmm1, XMMWORD PTR [ecx + 32] movaps xmm2, XMMWORD PTR [edx + 32] // Multiply the rows by the weights mulps xmm0, xmm5 mulps xmm1, xmm6 mulps xmm2, xmm7 addps xmm0, xmm1 addps xmm0, xmm2 movaps XMMWORD PTR [edi + 32], xmm0 } } return &scratchMatrix; case 4: { matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]]; float *pWeights = boneweights.weight; _asm { mov eax, DWORD PTR [pWeights] movss xmm4, dword ptr[eax] ; boneweights.weight[0] movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1] movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2] movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3] mov eax, DWORD PTR [boneMat0] mov ecx, DWORD PTR [boneMat1] mov edx, DWORD PTR [boneMat2] mov esi, DWORD PTR [boneMat3] mov edi, DWORD PTR [scratchMatrix] // Fill xmm5, 6, and 7 with all the bone weights shufps xmm4, xmm4, 0 shufps xmm5, xmm5, 0 shufps xmm6, xmm6, 0 shufps xmm7, xmm7, 0 // Load up the first row of the four matrices movaps xmm0, XMMWORD PTR [eax] movaps xmm1, XMMWORD PTR [ecx] movaps xmm2, XMMWORD PTR [edx] movaps xmm3, XMMWORD PTR [esi] // Multiply the rows by the weights mulps xmm0, xmm4 mulps xmm1, xmm5 mulps xmm2, xmm6 mulps xmm3, xmm7 addps xmm0, xmm1 addps xmm2, xmm3 addps xmm0, xmm2 movaps XMMWORD PTR [edi], xmm0 // Load up the second row of the three matrices movaps xmm0, XMMWORD PTR [eax + 16] movaps xmm1, XMMWORD PTR [ecx + 16] movaps xmm2, XMMWORD PTR [edx + 16] movaps xmm3, XMMWORD PTR [esi + 16] // Multiply the rows by the weights mulps xmm0, xmm4 mulps xmm1, xmm5 mulps xmm2, xmm6 mulps xmm3, xmm7 addps xmm0, xmm1 addps xmm2, xmm3 addps xmm0, xmm2 movaps XMMWORD PTR [edi + 16], xmm0 // Load up the third row of the three matrices movaps xmm0, XMMWORD PTR [eax + 32] movaps xmm1, XMMWORD PTR [ecx + 32] movaps xmm2, XMMWORD PTR [edx + 32] movaps xmm3, XMMWORD PTR [esi + 32] // Multiply the rows by the weights mulps xmm0, xmm4 mulps xmm1, xmm5 mulps xmm2, xmm6 mulps xmm3, xmm7 addps xmm0, xmm1 addps xmm2, xmm3 addps xmm0, xmm2 movaps XMMWORD PTR [edi + 32], xmm0 } } return &scratchMatrix; } #else #ifndef LINUX #pragma message( "ComputeSkinMatrixSSE C implementation only" ) #endif extern matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &scratchMatrix ); return ComputeSkinMatrix( boneweights, pPoseToWorld, scratchMatrix ); #endif Assert( 0 ); return NULL; } #ifdef _DEBUG static ConVar mat_tess_dump( "mat_tess_dump", "0", FCVAR_CHEAT ); #endif void CStudioRender::SkinSubDCage( mstudiovertex_t *pVertices, int nNumVertices, matrix3x4_t *pPoseToWorld, CCachedRenderData &vertexCache, unsigned short* pGroupToMesh, fltx4 *vOutput, bool bDoFlex ) { VPROF_BUDGET( "CStudioRender::SkinSubDCage", _T("SubD Rendering") ); Vector *pSrcPos; ALIGN16 matrix3x4_t *pSkinMat, temp ALIGN16_POST; Assert( nNumVertices > 0 ); for ( int j=0; j < nNumVertices; ++j ) { mstudiovertex_t &vert = pVertices[pGroupToMesh[j]]; pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp ); if ( bDoFlex && vertexCache.IsVertexFlexed( pGroupToMesh[j] ) ) { CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex( pGroupToMesh[j] ); pSrcPos = &pFlexedVertex->m_Position.AsVector3D(); // Copy strange signed, 0..3 wrinkle tangent-flip encoding over to tangent.w pFlexedVertex->m_TangentS.w = pFlexedVertex->m_Position.w; } else // non-flexed case { pSrcPos = &vert.m_vecPosition; } // Transform into world space Vector4DAligned vTemp; R_TransformVert( pSrcPos, pSkinMat, *(Vector4DAligned*)&vTemp ); vOutput[j] = LoadAlignedSIMD( (float *) &vTemp ); } } inline unsigned short *InitializeTopologyIndexStruct( TopologyIndexStruct &quad, unsigned short *topologyIndex ) { quad.vtx1RingSize = topologyIndex; topologyIndex += 4; quad.vtx1RingCenterQuadOffset = topologyIndex; topologyIndex += 4; quad.valences = topologyIndex; topologyIndex += 4; quad.minOneRingOffset = topologyIndex; topologyIndex += 4; quad.bndVtx = topologyIndex; topologyIndex += 4; quad.bndEdge = topologyIndex; topologyIndex += 4; quad.cornerVtx = topologyIndex; topologyIndex += 4; quad.loopGapAngle = topologyIndex; topologyIndex += 4; quad.nbCornerVtx = topologyIndex; topologyIndex += 4; quad.edgeBias = topologyIndex; topologyIndex += 8; quad.vUV0 = topologyIndex; topologyIndex += 4; quad.vUV1 = topologyIndex; topologyIndex += 4; quad.vUV2 = topologyIndex; topologyIndex += 4; quad.vUV3 = topologyIndex; topologyIndex += 4; quad.oneRing = topologyIndex; topologyIndex += quad.vtx1RingSize[0]+quad.vtx1RingSize[1]+quad.vtx1RingSize[2]+quad.vtx1RingSize[3]; return topologyIndex; } static ConVar mat_tessellation_update_buffers( "mat_tessellation_update_buffers", "1", FCVAR_CHEAT ); static ConVar mat_tessellation_cornertangents( "mat_tessellation_cornertangents", "1", FCVAR_CHEAT ); static ConVar mat_tessellation_accgeometrytangents( "mat_tessellation_accgeometrytangents", "0", FCVAR_CHEAT ); #ifdef _DEBUG bool NotQuiteEqual( Vector4D &vA, Vector4D &vB ) { float flEpsilon = 0.05f; Vector4D vDelta = vA - vB; float flDist = sqrt( vDelta.x * vDelta.x + vDelta.y * vDelta.y + vDelta.z * vDelta.z ); bool bSameVector = ( vA.x == vB.x ) && ( vA.y == vB.y ) && ( vA.z == vB.z ); return ( flDist < flEpsilon ) && !bSameVector; } void DumpDebugPositions() { for ( int i=0; i< g_DebugCornerPositions.Count(); i++ ) { bool bCrack = false; for ( int j=0; j< g_DebugCornerPositions.Count(); j++ ) { if ( NotQuiteEqual( g_DebugCornerPositions[i], g_DebugCornerPositions[j] ) ) { bCrack = true; Assert(0); } } DevMsg( "%s C - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugCornerPositions[i].x, g_DebugCornerPositions[i].y, g_DebugCornerPositions[i].z ); } for ( int i=0; i< g_DebugEdgePositions.Count(); i++ ) { bool bCrack = false; for ( int j=0; j< g_DebugEdgePositions.Count(); j++ ) { if ( NotQuiteEqual( g_DebugEdgePositions[i], g_DebugEdgePositions[j] ) ) { bCrack = true; } } DevMsg( "%s E - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugEdgePositions[i].x, g_DebugEdgePositions[i].y, g_DebugEdgePositions[i].z ); } for ( int i=0; i< g_DebugInteriorPositions.Count(); i++ ) { bool bCrack = false; for ( int j=0; j< g_DebugInteriorPositions.Count(); j++ ) { if ( NotQuiteEqual( g_DebugInteriorPositions[i], g_DebugInteriorPositions[j] ) ) { bCrack = true; } } DevMsg( "%s I - %.15f, %.15f, %.15f\n", bCrack ? "*** " : " ", g_DebugInteriorPositions[i].x, g_DebugInteriorPositions[i].y, g_DebugInteriorPositions[i].z ); } } #endif // _DEBUG void GenerateWorldSpacePatches( float *pSubDBuff, int nNumPatches, unsigned short *pTopologyIndices, fltx4 *pWSVertices, bool bRegularPatch ) { VPROF_BUDGET( "CStudioRender::GenerateWorldSpacePatches", _T("SubD Rendering") ); TopologyIndexStruct quad; unsigned short *nextPatchIndices = InitializeTopologyIndexStruct( quad, pTopologyIndices ); set_ShowACCGeometryTangents(mat_tessellation_accgeometrytangents.GetBool()); set_UseCornerTangents(mat_tessellation_cornertangents.GetBool()); ALIGN16 Vector4D Geo[16] ALIGN16_POST; ALIGN16 Vector4D TanU[12] ALIGN16_POST; ALIGN16 Vector4D TanV[12] ALIGN16_POST; #ifdef _DEBUG if ( mat_tess_dump.GetBool() ) { // Debug Arrays g_DebugCornerPositions.EnsureCapacity( nNumPatches * 4 ); g_DebugEdgePositions.EnsureCapacity( nNumPatches * 8 ); g_DebugInteriorPositions.EnsureCapacity( nNumPatches * 4 ); // Empty the arrays this time around g_DebugCornerPositions.RemoveAll(); g_DebugEdgePositions.RemoveAll(); g_DebugInteriorPositions.RemoveAll(); } #endif for( int p = 0; p < nNumPatches; p++ ) { #if defined( USE_OPT ) ComputeACCAllPatches( pWSVertices, &quad, Geo, TanU, TanV, bRegularPatch ); #else ComputeACCGeometryPatch( pWSVertices, &quad, Geo ); ComputeACCTangentPatches( pWSVertices, &quad, Geo, TanU, TanV ); #endif for ( int i=0; i < 16; i++ ) { pSubDBuff[ i * 3 + 0 ] = Geo[i].x; pSubDBuff[ i * 3 + 1 ] = Geo[i].y; pSubDBuff[ i * 3 + 2 ] = Geo[i].z; } for ( int i=0; i<12; i++ ) { pSubDBuff[ i * 3 + 0 + 48 ] = TanU[ i ].x; pSubDBuff[ i * 3 + 1 + 48 ] = TanU[ i ].y; pSubDBuff[ i * 3 + 2 + 48 ] = TanU[ i ].z; } for ( int i=0; i<12; i++ ) { pSubDBuff[ i * 3 + 0 + 84 ] = TanV[ i ].x; pSubDBuff[ i * 3 + 1 + 84 ] = TanV[ i ].y; pSubDBuff[ i * 3 + 2 + 84 ] = TanV[ i ].z; } pSubDBuff += 120; // 30 * sizeof( float ) nextPatchIndices = InitializeTopologyIndexStruct( quad, nextPatchIndices ); } #ifdef _DEBUG if ( mat_tess_dump.GetBool() ) { // These should be a particular size Assert( g_DebugCornerPositions.Count() == ( nNumPatches * 4 ) ); Assert( g_DebugEdgePositions.Count() == ( nNumPatches * 8 ) ); Assert( g_DebugInteriorPositions.Count() == ( nNumPatches * 4 ) ); DumpDebugPositions(); mat_tess_dump.SetValue( 0 ); // Turn back off } #endif } //----------------------------------------------------------------------------------- // Top level function for mapping a quad mesh to an array of Bicubic Bezier patches //----------------------------------------------------------------------------------- void CStudioRender::GenerateBicubicPatches( mstudiomesh_t* pmesh, studiomeshgroup_t* pGroup, bool bDoFlex ) { #if defined( LINUX ) Assert(0); #else VPROF_BUDGET( "CStudioRender::GenerateBicubicPatches", _T("SubD Rendering") ); FillTables(); // This only does work the first time through Assert( pmesh ); Assert( pGroup ); const mstudio_meshvertexdata_t *vertData = pmesh->GetVertexData( m_pStudioHdr ); Assert( vertData ); mstudiovertex_t *pVertices = vertData->Vertex( 0 ); m_vSkinnedSubDVertices.SetCount( pGroup->m_NumVertices ); // First, apply software flexing and skinning to the vertices SkinSubDCage( pVertices, pGroup->m_NumVertices, m_PoseToWorld, m_VertexCache, pGroup->m_pGroupIndexToMeshIndex, m_vSkinnedSubDVertices.Base(), bDoFlex ); // Early out if ( mat_tessellation_update_buffers.GetBool() == false ) return; // Lock the subd buffers int nNumPatches = 0; for ( int s=0; s<pGroup->m_NumStrips; ++s ) { nNumPatches += pGroup->m_pUniqueFaces[s]; } CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); float *pSubDBuff = pRenderContext->LockSubDBuffer( nNumPatches ); // Now we are in world space, we can map to array of Bicubic patches int totalIndices = 0; float *pCurrentPtr = pSubDBuff; for ( int s=0; s<pGroup->m_NumStrips; ++s ) { OptimizedModel::StripHeader_t *pStrip = &pGroup->m_pStripData[s]; int StripFaces = pGroup->m_pUniqueFaces[s]; GenerateWorldSpacePatches( pCurrentPtr, StripFaces, &pGroup->m_pTopologyIndices[totalIndices], m_vSkinnedSubDVertices.Base(), ( pStrip->flags & OptimizedModel::STRIP_IS_QUADLIST_REG ) != 0 ); totalIndices += pStrip->numTopologyIndices; pCurrentPtr += StripFaces * 120; } // Unlock subd buffers pRenderContext->UnlockSubDBuffer( ); #endif // !LINUX } // Transform Tangent vector static void R_TransformTangent( const Vector4D *pSrcTangentS, matrix3x4_t *pSkinMat, Vector4DAligned &tangentS ) { VPROF_BUDGET( "R_TransformTangent", _T("SubD Rendering") ); tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2]; tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2]; tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2]; tangentS.w = pSrcTangentS->w; } // Transforms per-vertex tangent vector, copies texture coordinates etc into dynamic VB void CStudioRender::SoftwareProcessQuadMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, int numFaces, unsigned short* pGroupToMesh, unsigned short *pTopologyIndices, bool bTangentSpace, bool bDoFlex ) { VPROF_BUDGET( "CStudioRender::SoftwareProcessQuadMesh", _T("SubD Rendering") ); Vector4D *pStudioTangentS = NULL; ALIGN16 QuadTessVertex_t quadVertex ALIGN16_POST; // QuadTessVertex_t currently has the following map: // +-----------------------------------+ // | tanX | tanY | tanZ | sBWrnk | <- Tangent in .xyz, Binormal sign flip bit plus wrinkle in .w // +-----------------------------------+ // | tcU0 | tcV0 | tcU1 | tcV1 | <- Interior TC, Parametric V Edge TC // +-----------------------------------+ // | tcU2 | tcV2 | tcU3 | tcV3 | <- Parametric U Edge TC, Corner TC // +-----------------------------------+ quadVertex.m_vTangent.Init( 1.0f, 0.0f, 0.0f, 1.0f ); ALIGN16 matrix3x4_t *pSkinMat, matTemp ALIGN16_POST; Assert( numFaces > 0 ); const mstudio_meshvertexdata_t *pVertData = pmesh->GetVertexData( m_pStudioHdr ); Assert( pVertData ); if ( !pVertData ) return; mstudiovertex_t *pVertices = pVertData->Vertex( 0 ); if ( bTangentSpace ) { pStudioTangentS = pVertData->TangentS( 0 ); } TopologyIndexStruct quad; unsigned short *nextPatchIndices = InitializeTopologyIndexStruct( quad, pTopologyIndices ); for ( int i=0; i < numFaces; ++i ) // Run over faces { int patchCorner = 0; #if 0 Vector4D debugTangent[4]; for ( int j=0; j < 4; ++j ) { int idx = quad.oneRing[patchCorner]; memcpy( &debugTangent[j], &pStudioTangentS[idx], sizeof( Vector4D ) ); patchCorner += quad.vtx1RingSize[j]; } // These should be the same sign for a given patch. // If they're not, that's bad Assert( ( debugTangent[0].w == debugTangent[1].w ) && ( debugTangent[1].w == debugTangent[2].w ) && ( debugTangent[2].w == debugTangent[3].w ) ); patchCorner = 0; #endif for ( int j=0; j < 4; ++j ) // Four verts per face { int idx = quad.oneRing[patchCorner]; mstudiovertex_t &vert = pVertices[idx]; if ( bTangentSpace ) { pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, m_PoseToWorld, matTemp ); if ( bDoFlex && m_VertexCache.IsVertexFlexed( idx ) ) { CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex( idx ); R_TransformTangent( &(pFlexedVertex->m_TangentS), pSkinMat, *(Vector4DAligned*)&quadVertex.m_vTangent ); } else // non-flexed case { R_TransformTangent( &pStudioTangentS[idx], pSkinMat, *(Vector4DAligned*)&quadVertex.m_vTangent ); quadVertex.m_vTangent.w *= 2; // non-flexed vertex should have wrinkle of -2 or +2 } } // Store 4 texcoords per quad corner quadVertex.m_vUV01.x = pVertices[ quad.vUV0[j] ].m_vecTexCoord.x; quadVertex.m_vUV01.y = pVertices[ quad.vUV0[j] ].m_vecTexCoord.y; quadVertex.m_vUV01.z = pVertices[ quad.vUV1[j] ].m_vecTexCoord.x; quadVertex.m_vUV01.w = pVertices[ quad.vUV1[j] ].m_vecTexCoord.y; quadVertex.m_vUV23.x = pVertices[ quad.vUV2[j] ].m_vecTexCoord.x; quadVertex.m_vUV23.y = pVertices[ quad.vUV2[j] ].m_vecTexCoord.y; quadVertex.m_vUV23.z = pVertices[ quad.vUV3[j] ].m_vecTexCoord.x; quadVertex.m_vUV23.w = pVertices[ quad.vUV3[j] ].m_vecTexCoord.y; meshBuilder.FastQuadVertexSSE( quadVertex ); patchCorner += quad.vtx1RingSize[j]; } nextPatchIndices = InitializeTopologyIndexStruct( quad, nextPatchIndices ); } meshBuilder.FastAdvanceNVertices( numFaces * 4 ); }