|
|
//===== Copyright � 1996-2005, Valve Corporation, All rights reserved. ======//
//
// Purpose:
//
// $NoKeywords: $
//
//===========================================================================//
#include "tier0/dbg.h"
#include "mathlib/mathlib.h"
#include "bone_setup.h"
#include <string.h>
#ifdef POSIX
#define _rotl(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
#endif
#include "collisionutils.h"
#include "vstdlib/random.h"
#include "tier0/vprof.h"
#include "bone_accessor.h"
#include "mathlib/ssequaternion.h"
#include "bitvec.h"
#include "datamanager.h"
#include "convar.h"
#include "tier0/tslist.h"
#include "vphysics_interface.h"
#include "datacache/idatacache.h"
#include "mathlib/capsule.h"
#include "tier0/miniprofiler.h"
#ifdef CLIENT_DLL
#include "posedebugger.h"
#endif
#include "engine/ivdebugoverlay.h"
#include "bone_utils.h"
// memdbgon must be the last include file in a .cpp file!!!
#include "tier0/memdbgon.h"
// -----------------------------------------------------------------
CBoneSetupMemoryPool<BoneQuaternionAligned> g_QuaternionPool; CBoneSetupMemoryPool<BoneVector> g_VectorPool; CBoneSetupMemoryPool<matrix3x4a_t> g_MatrixPool;
// -----------------------------------------------------------------
CBoneCache *CBoneCache::CreateResource( const bonecacheparams_t ¶ms ) { BONE_PROFILE_FUNC(); short studioToCachedIndex[MAXSTUDIOBONES]; short cachedToStudioIndex[MAXSTUDIOBONES]; int cachedBoneCount = 0; for ( int i = 0; i < params.pStudioHdr->numbones(); i++ ) { // skip bones that aren't part of the boneMask (and aren't the root bone)
if (i != 0 && !(params.pStudioHdr->boneFlags(i) & params.boneMask)) { studioToCachedIndex[i] = -1; continue; } studioToCachedIndex[i] = cachedBoneCount; cachedToStudioIndex[cachedBoneCount] = i; cachedBoneCount++; } int tableSizeStudio = sizeof(short) * params.pStudioHdr->numbones(); int tableSizeCached = sizeof(short) * cachedBoneCount; int matrixSize = sizeof(matrix3x4_t) * cachedBoneCount; size_t size = AlignValue( sizeof(CBoneCache) + tableSizeStudio + tableSizeCached, 16 ) + matrixSize; CBoneCache *pMem = (CBoneCache *)MemAlloc_AllocAligned( size, 16 ); Construct( pMem ); Assert( size == ( uint )size ); // make sure we're not trimming the int in 64bit
pMem->Init( params, size, studioToCachedIndex, cachedToStudioIndex, cachedBoneCount ); return pMem; }
unsigned int CBoneCache::EstimatedSize( const bonecacheparams_t ¶ms ) { // conservative estimate - max size
return ( params.pStudioHdr->numbones() * (sizeof(short) + sizeof(short) + sizeof(matrix3x4_t)) + 3 ) & ~3; }
void CBoneCache::DestroyResource() { MemAlloc_FreeAligned( this ); }
CBoneCache::CBoneCache() { m_size = 0; m_cachedBoneCount = 0; }
void CBoneCache::Init( const bonecacheparams_t ¶ms, unsigned int size, short *pStudioToCached, short *pCachedToStudio, int cachedBoneCount ) { BONE_PROFILE_FUNC(); m_cachedBoneCount = cachedBoneCount; m_size = size; m_timeValid = params.curtime; m_boneMask = params.boneMask;
int studioTableSize = params.pStudioHdr->numbones() * sizeof(short); m_cachedToStudioOffset = studioTableSize; memcpy( StudioToCached(), pStudioToCached, studioTableSize );
int cachedTableSize = cachedBoneCount * sizeof(short); memcpy( CachedToStudio(), pCachedToStudio, cachedTableSize );
m_matrixOffset = AlignValue( sizeof(CBoneCache) + m_cachedToStudioOffset + cachedTableSize, 16 ); UpdateBones( params.pBoneToWorld, params.pStudioHdr->numbones(), params.curtime ); }
void CBoneCache::UpdateBones( const matrix3x4a_t *pBoneToWorld, int numbones, float curtime ) { BONE_PROFILE_FUNC(); matrix3x4a_t *pBones = BoneArray(); const short *pCachedToStudio = CachedToStudio();
for ( int i = 0; i < m_cachedBoneCount; i++ ) { int index = pCachedToStudio[i]; //MatrixCopy( pBoneToWorld[index], pBones[i] );
const float *pInput = pBoneToWorld[index].Base(); float *pOutput = pBones[i].Base();
fltx4 fl4Tmp0 = LoadAlignedSIMD( pInput ); StoreAlignedSIMD( pOutput, fl4Tmp0 ); fltx4 fl4Tmp1 = LoadAlignedSIMD( pInput + 4 ); StoreAlignedSIMD( pOutput+4, fl4Tmp1 ); fltx4 fl4Tmp2 = LoadAlignedSIMD( pInput + 8 ); StoreAlignedSIMD( pOutput+8, fl4Tmp2 ); } m_timeValid = curtime; }
matrix3x4a_t *CBoneCache::GetCachedBone( int studioIndex ) { BONE_PROFILE_FUNC(); int cachedIndex = StudioToCached()[studioIndex]; if ( cachedIndex >= 0 ) { return BoneArray() + cachedIndex; } return NULL; }
void CBoneCache::ReadCachedBones( matrix3x4a_t *pBoneToWorld ) { BONE_PROFILE_FUNC(); matrix3x4a_t *pBones = BoneArray(); const short *pCachedToStudio = CachedToStudio(); for ( int i = 0; i < m_cachedBoneCount; i++ ) { //MatrixCopy( pBones[i], pBoneToWorld[pCachedToStudio[i]] );
const float *pInput = pBones[i].Base(); float *pOutput = pBoneToWorld[pCachedToStudio[i]].Base(); fltx4 fl4Tmp0 = LoadAlignedSIMD( pInput ); StoreAlignedSIMD( pOutput, fl4Tmp0 ); fltx4 fl4Tmp1 = LoadAlignedSIMD( pInput + 4 ); StoreAlignedSIMD( pOutput+4, fl4Tmp1 ); fltx4 fl4Tmp2 = LoadAlignedSIMD( pInput + 8 ); StoreAlignedSIMD( pOutput+8, fl4Tmp2 ); } }
void CBoneCache::ReadCachedBonePointers( matrix3x4_t **bones, int numbones ) { BONE_PROFILE_FUNC(); memset( bones, 0, sizeof(matrix3x4_t *) * numbones ); matrix3x4a_t *pBones = BoneArray(); const short *pCachedToStudio = CachedToStudio(); for ( int i = 0; i < m_cachedBoneCount; i++ ) { bones[pCachedToStudio[i]] = pBones + i; } }
bool CBoneCache::IsValid( float curtime, float dt ) { if ( curtime - m_timeValid <= dt ) return true; return false; }
// private functions
matrix3x4a_t *CBoneCache::BoneArray() { return (matrix3x4a_t *)( (byte *)(this) + m_matrixOffset ); }
short *CBoneCache::StudioToCached() { return (short *)( (char *)(this+1) ); }
short *CBoneCache::CachedToStudio() { return (short *)( (char *)(this+1) + m_cachedToStudioOffset ); }
// Construct a singleton
static CDataManager<CBoneCache, bonecacheparams_t, CBoneCache *, CThreadFastMutex> g_StudioBoneCache( 128 * 1024L );
void Studio_LockBoneCache() { g_StudioBoneCache.AccessMutex().Lock(); }
void Studio_UnlockBoneCache() { g_StudioBoneCache.AccessMutex().Unlock(); }
CBoneCache *Studio_GetBoneCache( memhandle_t cacheHandle, bool bLock ) { AUTO_LOCK( g_StudioBoneCache.AccessMutex() ); if ( !bLock ) { return g_StudioBoneCache.GetResource_NoLock( cacheHandle ); } else { return g_StudioBoneCache.LockResource( cacheHandle ); } }
void Studio_ReleaseBoneCache( memhandle_t cacheHandle ) { g_StudioBoneCache.UnlockResource( cacheHandle ); g_StudioBoneCache.FlushToTargetSize(); }
memhandle_t Studio_CreateBoneCache( bonecacheparams_t ¶ms ) { AUTO_LOCK( g_StudioBoneCache.AccessMutex() ); return g_StudioBoneCache.CreateResource( params ); }
void Studio_DestroyBoneCache( memhandle_t cacheHandle ) { AUTO_LOCK( g_StudioBoneCache.AccessMutex() ); g_StudioBoneCache.DestroyResource( cacheHandle ); }
void Studio_InvalidateBoneCacheIfNotMatching( memhandle_t cacheHandle, float flTimeValid ) { AUTO_LOCK( g_StudioBoneCache.AccessMutex() ); CBoneCache *pCache = g_StudioBoneCache.GetResource_NoLock( cacheHandle ); if ( pCache && pCache->m_timeValid != flTimeValid ) { pCache->m_timeValid = -1.0f; } }
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
void BuildBoneChain( const CStudioHdr *pStudioHdr, const matrix3x4a_t &rootxform, const BoneVector pos[], const BoneQuaternion q[], int iBone, matrix3x4a_t *pBoneToWorld ) { CBoneBitList boneComputed; BuildBoneChainPartial( pStudioHdr, rootxform, pos, q, iBone, pBoneToWorld, boneComputed, -1 ); return; }
//-----------------------------------------------------------------------------
// Purpose: build boneToWorld transforms for a specific bone
//-----------------------------------------------------------------------------
void BuildBoneChain( const CStudioHdr *pStudioHdr, const matrix3x4a_t &rootxform, const BoneVector pos[], const BoneQuaternion q[], int iBone, matrix3x4a_t *pBoneToWorld, CBoneBitList &boneComputed ) { BuildBoneChainPartial( pStudioHdr, rootxform, pos, q, iBone, pBoneToWorld, boneComputed, -1 ); }
void BuildBoneChainPartial( const CStudioHdr *pStudioHdr, const matrix3x4_t &rootxform, const BoneVector pos[], const BoneQuaternion q[], int iBone, matrix3x4_t *pBoneToWorld, CBoneBitList &boneComputed, int iRoot ) { if ( boneComputed.IsBoneMarked(iBone) ) return;
matrix3x4_t bonematrix; QuaternionMatrix( q[iBone], pos[iBone], bonematrix );
int parent = pStudioHdr->boneParent( iBone ); if (parent == -1 || iBone == iRoot) { ConcatTransforms( rootxform, bonematrix, pBoneToWorld[iBone] ); } else { // evil recursive!!!
BuildBoneChainPartial( pStudioHdr, rootxform, pos, q, parent, pBoneToWorld, boneComputed, iRoot ); ConcatTransforms( pBoneToWorld[parent], bonematrix, pBoneToWorld[iBone]); }
boneComputed.MarkBone(iBone); }
//-----------------------------------------------------------------------------
// Purpose: qt = ( s * p ) * q
//-----------------------------------------------------------------------------
void QuaternionSM( float s, const Quaternion &p, const Quaternion &q, Quaternion &qt ) { Quaternion p1, q1;
QuaternionScale( p, s, p1 ); QuaternionMult( p1, q, q1 ); QuaternionNormalize( q1 ); qt[0] = q1[0]; qt[1] = q1[1]; qt[2] = q1[2]; qt[3] = q1[3]; }
#if ALLOW_SIMD_QUATERNION_MATH
FORCEINLINE fltx4 QuaternionSMSIMD( const fltx4 &s, const fltx4 &p, const fltx4 &q ) { fltx4 p1, q1, result; p1 = QuaternionScaleSIMD( p, s ); q1 = QuaternionMultSIMD( p1, q ); result = QuaternionNormalizeSIMD( q1 ); return result; }
FORCEINLINE fltx4 QuaternionSMSIMD( float s, const fltx4 &p, const fltx4 &q ) { return QuaternionSMSIMD( ReplicateX4(s), p, q ); } #endif
//-----------------------------------------------------------------------------
// Purpose: qt = p * ( s * q )
//-----------------------------------------------------------------------------
void QuaternionMA( const Quaternion &p, float s, const Quaternion &q, Quaternion &qt ) { Quaternion p1, q1;
QuaternionScale( q, s, q1 ); QuaternionMult( p, q1, p1 ); QuaternionNormalize( p1 ); qt[0] = p1[0]; qt[1] = p1[1]; qt[2] = p1[2]; qt[3] = p1[3]; }
#if ALLOW_SIMD_QUATERNION_MATH
FORCEINLINE fltx4 QuaternionMASIMD( const fltx4 &p, const fltx4 &s, const fltx4 &q ) { fltx4 p1, q1, result; q1 = QuaternionScaleSIMD( q, s ); p1 = QuaternionMultSIMD( p, q1 ); result = QuaternionNormalizeSIMD( p1 ); return result; }
FORCEINLINE fltx4 QuaternionMASIMD( const fltx4 &p, float s, const fltx4 &q ) { return QuaternionMASIMD(p, ReplicateX4(s), q); } #endif
//-----------------------------------------------------------------------------
// Purpose: qt = p + s * q
//-----------------------------------------------------------------------------
void QuaternionAccumulate( const Quaternion &p, float s, const Quaternion &q, Quaternion &qt ) { Quaternion q2; QuaternionAlign( p, q, q2 );
qt[0] = p[0] + s * q2[0]; qt[1] = p[1] + s * q2[1]; qt[2] = p[2] + s * q2[2]; qt[3] = p[3] + s * q2[3]; }
#if ALLOW_SIMD_QUATERNION_MATH
FORCEINLINE fltx4 QuaternionAccumulateSIMD( const fltx4 &p, float s, const fltx4 &q ) { fltx4 q2, s4, result; q2 = QuaternionAlignSIMD( p, q ); s4 = ReplicateX4( s ); result = MaddSIMD( s4, q2, p ); return result; } #endif
//-----------------------------------------------------------------------------
// Purpose: blend together in world space q1,pos1 with q2,pos2. Return result in q1,pos1.
// 0 returns q1, pos1. 1 returns q2, pos2
//-----------------------------------------------------------------------------
void WorldSpaceSlerp( const CStudioHdr *pStudioHdr, BoneQuaternion q1[MAXSTUDIOBONES], BoneVector pos1[MAXSTUDIOBONES], mstudioseqdesc_t &seqdesc, int sequence, const BoneQuaternion q2[MAXSTUDIOBONES], const BoneVector pos2[MAXSTUDIOBONES], float s, int boneMask ) { BONE_PROFILE_FUNC(); int i, j; float s1; // weight of parent for q2, pos2
float s2; // weight for q2, pos2
// make fake root transform
matrix3x4a_t rootXform; SetIdentityMatrix( rootXform );
// matrices for q2, pos2
matrix3x4a_t *srcBoneToWorld = g_MatrixPool.Alloc(); CBoneBitList srcBoneComputed;
matrix3x4a_t *destBoneToWorld = g_MatrixPool.Alloc(); CBoneBitList destBoneComputed;
matrix3x4a_t *targetBoneToWorld = g_MatrixPool.Alloc(); CBoneBitList targetBoneComputed;
virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel(); const virtualgroup_t *pSeqGroup = NULL; if (pVModel) { pSeqGroup = pVModel->pSeqGroup( sequence ); }
const mstudiobone_t *pbone = pStudioHdr->pBone( 0 );
for (i = 0; i < pStudioHdr->numbones(); i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask)) { continue; }
int n = pbone[i].parent; s1 = 0.0; if (pSeqGroup) { j = pSeqGroup->boneMap[i]; if (j >= 0) { s2 = s * seqdesc.weight( j ); // blend in based on this bones weight
if (n != -1) { s1 = s * seqdesc.weight( pSeqGroup->boneMap[n] ); } } else { s2 = 0.0; } } else { s2 = s * seqdesc.weight( i ); // blend in based on this bones weight
if (n != -1) { s1 = s * seqdesc.weight( n ); } }
if ( s2 > 0.0 || s1 > 0.0 ) { Quaternion srcQ, destQ; Vector srcPos, destPos; Quaternion targetQ; Vector targetPos; Vector tmp;
BuildBoneChain( pStudioHdr, rootXform, pos1, q1, i, destBoneToWorld, destBoneComputed ); BuildBoneChain( pStudioHdr, rootXform, pos2, q2, i, srcBoneToWorld, srcBoneComputed );
MatrixAngles( destBoneToWorld[i], destQ, destPos ); MatrixAngles( srcBoneToWorld[i], srcQ, srcPos );
QuaternionSlerp( destQ, srcQ, s2, targetQ ); AngleMatrix( RadianEuler(targetQ), destPos, targetBoneToWorld[i] );
// back solve
if (n == -1) { MatrixAngles( targetBoneToWorld[i], q1[i], tmp ); } else { matrix3x4a_t worldToBone; MatrixInvert( targetBoneToWorld[n], worldToBone );
matrix3x4a_t local; ConcatTransforms_Aligned( worldToBone, targetBoneToWorld[i], local ); MatrixAngles( local, q1[i], tmp );
// blend bone lengths (local space)
//pos1[i] = Lerp( s2, pos1[i], pos2[i] );
pos1[i] = pos1[i] + (pos2[i] - pos1[i]) * s2; } } } g_MatrixPool.Free( srcBoneToWorld ); g_MatrixPool.Free( destBoneToWorld ); g_MatrixPool.Free( targetBoneToWorld ); }
#define PARANOID_SIMD_DOUBLECHECK 0 // set this to one to perform both SIMD and scalar bones every frame,
// then compare the results.
#define PARANOID_SIMD_TIMING_TEST 0 // enable to allow running many iterations of SlerpBones per frame
// for timing purposes
#ifdef _X360
// SIMD bone setup is a perf win on 360
static ConVar cl_simdbones( "cl_simdbones", "1", FCVAR_REPLICATED, "Use SIMD bone setup." ); #else
// SIMD bone setup is a perf loss on the PC
static ConVar cl_simdbones( "cl_simdbones", "0", FCVAR_REPLICATED, "Use SIMD bone setup." ); #endif
void SlerpBonesSpeedy( const CStudioHdr *pStudioHdr, BoneQuaternionAligned q1[MAXSTUDIOBONES], BoneVector pos1[MAXSTUDIOBONES], mstudioseqdesc_t &seqdesc, // source of q2 and pos2
int sequence, const BoneQuaternionAligned q2[MAXSTUDIOBONES], const BoneVector pos2[MAXSTUDIOBONES], float s, int boneMask ); volatile int iForBreakpoint;
//-----------------------------------------------------------------------------
// Purpose: blend together q1,pos1 with q2,pos2. Return result in q1,pos1.
// 0 returns q1, pos1. 1 returns q2, pos2
//-----------------------------------------------------------------------------
#if PARANOID_SIMD_TIMING_TEST
static ConVar cl_bones_simd_timing_version( "cl_bones_simd_timing_version", "0", FCVAR_REPLICATED, "0 = scalar version, 1 = simd version." ); void SlerpBonesSlow( #else
void SlerpBones( #endif
const CStudioHdr *pStudioHdr, BoneQuaternion * RESTRICT q1, BoneVector * RESTRICT pos1, mstudioseqdesc_t &seqdesc, // source of q2 and pos2
int sequence, const BoneQuaternionAligned * RESTRICT q2, // [MAXSTUDIOBONES],
const BoneVector * RESTRICT pos2, // [MAXSTUDIOBONES],
float s, int boneMask ) { BONE_PROFILE_FUNC(); SNPROF_ANIM("SlerpBones"); #if PARANOID_SIMD_DOUBLECHECK
// copy off the input arrays so we can do them twice
static CThreadFastMutex m_mutex; AUTO_LOCK( m_mutex ); static BoneQuaternionAligned doublecheckQuat[MAXSTUDIOBONES]; static BoneQuaternionAligned doublecheckOriginalQuat[MAXSTUDIOBONES]; static BoneVector doublecheckPos[MAXSTUDIOBONES]; static BoneVector doublecheckOriginalPos[MAXSTUDIOBONES]; #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
BoneVector *originalPosPointer = pos1; BoneQuaternion *originalQuatPointer = q1; #endif
{ memcpy( doublecheckQuat, q1, MAXSTUDIOBONES * sizeof(BoneQuaternionAligned) ); memcpy( doublecheckOriginalQuat, q1, MAXSTUDIOBONES * sizeof(BoneQuaternionAligned) ); memcpy( doublecheckPos, pos1, MAXSTUDIOBONES * sizeof(BoneVector) ); memcpy( doublecheckOriginalPos, pos1, MAXSTUDIOBONES * sizeof(BoneVector) ); } #endif
// Test for 16-byte alignment, and if present, use the speedy SIMD version.
if ( (reinterpret_cast<uintp>(q1) & 0x0F) == 0 && (reinterpret_cast<uintp>(q2) & 0x0F) == 0 ) { // Msg("Aligned\n");
if ( cl_simdbones.GetBool() #if PARANOID_SIMD_TIMING_TEST
&& (cl_bones_simd_timing_version.GetInt() != 0) #endif
) {
#if ( PARANOID_SIMD_DOUBLECHECK == 1 ) // do simd into sep array, scalar into original, then compare
// if double checking, write to static arrays
// then do things the ordinary way
// then check up at the end.
SlerpBonesSpeedy(pStudioHdr, reinterpret_cast<BoneQuaternionAligned *>(doublecheckQuat), doublecheckPos, seqdesc, sequence, q2, pos2, s, boneMask ); #elif ( PARANOID_SIMD_DOUBLECHECK == 2 )
// if double checking, write to static arrays
// then do things the ordinary way
// then check up at the end.
SlerpBonesSpeedy(pStudioHdr, reinterpret_cast<BoneQuaternionAligned *>(q1), pos1, seqdesc, sequence, q2, pos2, s, boneMask ); pos1 = doublecheckPos; q1 = doublecheckQuat; #else
return SlerpBonesSpeedy(pStudioHdr, reinterpret_cast<BoneQuaternionAligned *>(q1), pos1, seqdesc, sequence, q2, pos2, s, boneMask ); #endif
} } else { // Msg("misaligned\n");
}
if (s <= 0.0f) return; if (s > 1.0f) { s = 1.0f; }
if ( (seqdesc.flags & STUDIO_WORLD) || (seqdesc.flags & STUDIO_WORLD_AND_RELATIVE) ) { WorldSpaceSlerp( pStudioHdr, q1, pos1, seqdesc, sequence, q2, pos2, s, boneMask ); if (seqdesc.flags & STUDIO_WORLD) return; }
int i, j; virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel(); const virtualgroup_t *pSeqGroup = NULL; if (pVModel) { pSeqGroup = pVModel->pSeqGroup( sequence ); }
// Build weightlist for all bones
int nBoneCount = pStudioHdr->numbones(); float *pS2 = (float*)stackalloc( nBoneCount * sizeof(float) ); for (i = 0; i < nBoneCount; i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask)) { pS2[i] = 0.0f; continue; }
if ( !pSeqGroup ) { pS2[i] = s * seqdesc.weight( i ); // blend in based on this bones weight
continue; }
j = pSeqGroup->boneMap[i]; if ( j >= 0 ) { pS2[i] = s * seqdesc.weight( j ); // blend in based on this bones weight
} else { pS2[i] = 0.0; } }
float s1, s2; if ( seqdesc.flags & STUDIO_DELTA ) { for ( i = 0; i < nBoneCount; i++ ) { s2 = pS2[i]; if ( s2 <= 0.0f ) continue;
if ( seqdesc.flags & STUDIO_POST ) { #ifndef _X360
QuaternionMA( q1[i], s2, q2[i], q1[i] ); #else
fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() ); fltx4 q2simd = LoadAlignedSIMD( q2[i] ); fltx4 result = QuaternionMASIMD( q1simd, s2, q2simd ); StoreUnalignedSIMD( q1[i].Base(), result ); #endif
} else { #ifndef _X360
QuaternionSM( s2, q2[i], q1[i], q1[i] ); #else
fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() ); fltx4 q2simd = LoadAlignedSIMD( q2[i] ); fltx4 result = QuaternionSMSIMD( s2, q2simd, q1simd ); StoreUnalignedSIMD( q1[i].Base(), result ); #endif
} // do this explicitly to make the scheduling better
// (otherwise it might think pos1 and pos2 overlap,
// and thus save one before starting the next)
float x,y,z; x = pos1[i][0] + pos2[i][0] * s2; y = pos1[i][1] + pos2[i][1] * s2; z = pos1[i][2] + pos2[i][2] * s2; pos1[i][0] = x; pos1[i][1] = y; pos1[i][2] = z; } return; }
BoneQuaternionAligned q3; for (i = 0; i < nBoneCount; i++) { s2 = pS2[i]; if ( s2 <= 0.0f ) continue;
s1 = 1.0 - s2;
#ifdef _X360
fltx4 q1simd, q2simd, result; q1simd = LoadUnalignedSIMD( q1[i].Base() ); q2simd = LoadAlignedSIMD( q2[i] ); #endif
if ( pStudioHdr->boneFlags(i) & BONE_FIXED_ALIGNMENT ) { #ifndef _X360
QuaternionSlerpNoAlign( q2[i], q1[i], s1, q3 ); #else
result = QuaternionSlerpNoAlignSIMD( q2simd, q1simd, s1 ); #endif
} else { #ifndef _X360
QuaternionSlerp( q2[i], q1[i], s1, q3 ); #else
result = QuaternionSlerpSIMD( q2simd, q1simd, s1 ); #endif
}
#ifndef _X360
q1[i][0] = q3[0]; q1[i][1] = q3[1]; q1[i][2] = q3[2]; q1[i][3] = q3[3]; #else
StoreUnalignedSIMD( q1[i].Base(), result ); #endif
pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * s2; pos1[i][1] = pos1[i][1] * s1 + pos2[i][1] * s2; pos1[i][2] = pos1[i][2] * s1 + pos2[i][2] * s2; }
#if PARANOID_SIMD_DOUBLECHECK
// check everything
if (cl_simdbones.GetBool()) { #if ( PARANOID_SIMD_DOUBLECHECK == 2)
pos1 = originalPosPointer ; q1 = originalQuatPointer ; #endif
for (i = 0 ; i < nBoneCount ; ++i) { static volatile int PARANOID_II = i; if ( pS2[i] <= 0.0f ) { // these aren't used, but test them to make sure they haven't been overwritten.
// it's important that the garbage there remain garbage, for some reason.
const unsigned int *ORIGINAL_Q1, *SCALAR_Q1, *SIMD_Q1, *Q2; #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
SCALAR_Q1 = reinterpret_cast<const unsigned int *>(doublecheckQuat[i].Base()); SIMD_Q1 = reinterpret_cast<const unsigned int *>(q1[i].Base()); #else
SIMD_Q1 = reinterpret_cast<const unsigned int *>(doublecheckQuat[i].Base()); SCALAR_Q1 = reinterpret_cast<const unsigned int *>(q1[i].Base()); #endif
ORIGINAL_Q1 = reinterpret_cast<const unsigned int *>(doublecheckOriginalQuat[i].Base()); Q2 = reinterpret_cast<const unsigned int *>(q2[i].Base()); if(!( SIMD_Q1[0] == SCALAR_Q1[0] && SIMD_Q1[1] == SCALAR_Q1[1] && SIMD_Q1[2] == SCALAR_Q1[2] && SIMD_Q1[3] == SCALAR_Q1[3] )) { AssertMsg(false,"Wrote invalid quats\n"); ++iForBreakpoint; }
const unsigned int *ORIGINAL_V1, *SCALAR_V1, *SIMD_V1, *V2; #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
SCALAR_V1 = reinterpret_cast<const unsigned int *>(doublecheckPos[i].Base()); SIMD_V1 = reinterpret_cast<const unsigned int *>(pos1[i].Base()); #else
SIMD_V1 = reinterpret_cast<const unsigned int *>(doublecheckPos[i].Base()); SCALAR_V1 = reinterpret_cast<const unsigned int *>(pos1[i].Base()); #endif
ORIGINAL_V1 = reinterpret_cast<const unsigned int *>(doublecheckOriginalPos[i].Base()); V2 = reinterpret_cast<const unsigned int *>(pos2[i].Base()); if(!( SIMD_V1[0] == SCALAR_V1[0] && SIMD_V1[1] == SCALAR_V1[1] && SIMD_V1[2] == SCALAR_V1[2] )) { AssertMsg(false,"Wrote invalid pos\n"); ++iForBreakpoint; }
} else { // test quaternions, unless they were slerped from opposite directions
if ( !(QuaternionDotProduct(doublecheckQuat[i], q1[i]) > 0.99f) && !(QuaternionDotProduct(doublecheckQuat[i], q1[i]) < -0.99f) ) { BoneQuaternionAligned ORIGINAL_Q1, SCALAR_Q1, SIMD_Q1, Q2; #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
SCALAR_Q1 = doublecheckQuat[i]; SIMD_Q1 = q1[i]; #else
SIMD_Q1 = doublecheckQuat[i]; SCALAR_Q1 = q1[i]; #endif
ORIGINAL_Q1 = doublecheckOriginalQuat[i]; Q2 = q2[i];
AssertMsg( false, "SIMD and scalar SlerpBones quats do not match up.\n" ); }
// test positions, unless they were slerped from opposite directions
BoneVector posDiff; posDiff = pos1[i] - doublecheckPos[i]; if ( !posDiff.IsZero() ) { BoneVector ORIGINAL_V1, SCALAR_V1, SIMD_V1, V2; #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
SCALAR_V1 = doublecheckPos[i]; SIMD_V1 = pos1[i]; #else
SIMD_V1 = doublecheckPos[i]; SCALAR_V1 = pos1[i]; #endif
ORIGINAL_V1 = doublecheckOriginalPos[i]; V2 = pos2[i];
AssertMsg( false, "SIMD and scalar SlerpBones pos do not match up.\n" ); } } } // compare the slack space in the array -- did we overwrite unused bones?
for ( i ; i < MAXSTUDIOBONES ; ++ i) { if ( memcmp(pos1+i, doublecheckOriginalPos+i, sizeof(BoneVector)) != 0) { AssertMsg(false, "slack positions overwritten\n"); ++iForBreakpoint; } if ( memcmp(q1+i, doublecheckOriginalQuat+i, sizeof(BoneVector)) != 0) { AssertMsg(false, "slack quats overwritten\n"); ++iForBreakpoint; } }
#if ( PARANOID_SIMD_DOUBLECHECK == 1 )
// dupe SIMD version back over, becaus ewe wrote it into this other array
memcpy(q1, doublecheckQuat, nBoneCount * sizeof(BoneQuaternionAligned) ); memcpy(pos1, doublecheckPos, nBoneCount * sizeof(BoneVector) ); #elif ( PARANOID_SIMD_DOUBLECHECK == 2 )
memcpy(pos1, doublecheckPos, nBoneCount * sizeof(BoneVector) ); #endif
} #endif
}
ConVar cl_use_simd_bones( "cl_use_simd_bones", "1", FCVAR_REPLICATED, "1 use SIMD bones 0 use scalar bones." ); //-----------------------------------------------------------------------------
// Purpose: blend together q1,pos1 with q2,pos2. Return result in q1,pos1.
// Uses four-at-a-time SIMD.
//-----------------------------------------------------------------------------
void SlerpBonesSpeedy( const CStudioHdr * RESTRICT pStudioHdr, BoneQuaternionAligned q1[MAXSTUDIOBONES], BoneVector pos1[MAXSTUDIOBONES], mstudioseqdesc_t &seqdesc, // source of q2 and pos2
int sequence, const BoneQuaternionAligned q2[MAXSTUDIOBONES], const BoneVector pos2[MAXSTUDIOBONES], float s, int boneMask ) { BONE_PROFILE_FUNC(); // ex: x360: 1.2ms
// Assert 16-byte alignment of in and out arrays.
AssertMsg( ((reinterpret_cast<uintp>(q1) & 0x0F)==0) && ((reinterpret_cast<uintp>(q2) & 0x0F)==0) , "Input arrays to SlerpBones are not aligned! Catastrophe is inevitable.\n");
// Test for overlapping buffers
#if PARANOID_SIMD_DOUBLECHECK
{ int nBoneCount = pStudioHdr->numbones();
int qbot = reinterpret_cast<int>(q1); int qtop = reinterpret_cast<int>(q1 + nBoneCount);
int pbot = reinterpret_cast<int>(pos1); int ptop = reinterpret_cast<int>(pos1 + nBoneCount);
if ( ((pbot >= qbot) && (pbot <= qtop)) || ((ptop >= qbot) && (ptop <= qtop)) || ((qbot >= pbot) && (qbot <= ptop)) || ((qtop >= pbot) && (qtop <= ptop)) ) { DebuggerBreak(); } } #endif
if (s <= 0.0f) return; if (s > 1.0f) { s = 1.0f; }
if ( (seqdesc.flags & STUDIO_WORLD) || (seqdesc.flags & STUDIO_WORLD_AND_RELATIVE) ) { WorldSpaceSlerp( pStudioHdr, q1, pos1, seqdesc, sequence, q2, pos2, s, boneMask ); if (seqdesc.flags & STUDIO_WORLD) return; }
// haul the input arrays into cache if they're not there already
PREFETCH360(q1,0); PREFETCH360(pos1,0); PREFETCH360(q2,0); PREFETCH360(pos2,0);
int i; virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel(); const virtualgroup_t * RESTRICT pSeqGroup = NULL; if (pVModel) { pSeqGroup = pVModel->pSeqGroup( sequence ); }
// Build weightlist for all bones
int nBoneCount = pStudioHdr->numbones(); float * RESTRICT pS2 = (float*)stackalloc( nBoneCount * sizeof(float) ); // 16-byte aligned
if ( pSeqGroup ) // hoist this branch outside of the inner loop for speed (even correctly predicted branches are an eight cycle latency)
{ for (i = 0; i < nBoneCount; i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask) || pSeqGroup->boneMap[i] < 0 ) { pS2[i] = 0.0f; } else { // boneMap[i] is not a float, don't be lured by the siren call of fcmp
pS2[i] = s * seqdesc.weight( pSeqGroup->boneMap[i] ); } } } else // !pSeqGroup
{ for (i = 0; i < nBoneCount; i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask)) { pS2[i] = 0.0f; } else { pS2[i] = s * seqdesc.weight( i ); // blend in based on this bones weight
} } }
float weight; int nBoneCountRoundedFour = ( nBoneCount ) & (~(3)); if ( seqdesc.flags & STUDIO_DELTA ) { // do as many as we can four at a time, then take care of stragglers.
for ( i = 0; i < nBoneCountRoundedFour; i+=4 ) { // drag the next cache line in
PREFETCH360(q1,i*16 + 128); PREFETCH360(pos1,i*16 + 128); PREFETCH360(q2,i*16 + 128); PREFETCH360(pos2,i*16 + 128);
fltx4 weightfour = LoadAlignedSIMD(pS2+i); // four weights
FourQuaternions q1four, q2four; FourQuaternions result;
q1four.LoadAndSwizzleAligned(q1+i); // four quaternions
q2four.LoadAndSwizzleAligned(q2+i); // four quaternions
if ( seqdesc.flags & STUDIO_POST ) {
// result = q1 * ( weight * q2 )
result = q1four.MulAc(weightfour, q2four); } else {
// result = ( s * q1 ) * q2
result = q2four.ScaleMul(weightfour, q1four); }
// mask out unused channels, replacing them with original data
{ bi32x4 tinyScales = CmpLeSIMD( weightfour, Four_Zeros ); result.x = MaskedAssign(tinyScales, q1four.x, result.x); result.y = MaskedAssign(tinyScales, q1four.y, result.y); result.z = MaskedAssign(tinyScales, q1four.z, result.z); result.w = MaskedAssign(tinyScales, q1four.w, result.w); }
result.SwizzleAndStoreAlignedMasked(q1+i, CmpGtSIMD(weightfour,Four_Zeros) );
fltx4 originalpos1simd[4], pos1simd[4], pos2simd[4]; originalpos1simd[0] = pos1simd[0] = LoadUnalignedSIMD(pos1[i+0].Base()); originalpos1simd[1] = pos1simd[1] = LoadUnalignedSIMD(pos1[i+1].Base()); originalpos1simd[2] = pos1simd[2] = LoadUnalignedSIMD(pos1[i+2].Base()); originalpos1simd[3] = pos1simd[3] = LoadUnalignedSIMD(pos1[i+3].Base()); pos2simd[0] = LoadUnalignedSIMD(pos2[i+0].Base()); pos2simd[1] = LoadUnalignedSIMD(pos2[i+1].Base()); pos2simd[2] = LoadUnalignedSIMD(pos2[i+2].Base()); pos2simd[3] = LoadUnalignedSIMD(pos2[i+3].Base()); fltx4 splatweights[4] = { SplatXSIMD(weightfour), SplatYSIMD(weightfour), SplatZSIMD(weightfour), SplatWSIMD(weightfour) };
fltx4 Zero = Four_Zeros; pos1simd[0] = MaddSIMD(pos2simd[0], splatweights[0], pos1simd[0] ); splatweights[0] = ( fltx4 ) CmpGtSIMD(splatweights[0], Zero); pos1simd[1] = MaddSIMD(pos2simd[1], splatweights[1], pos1simd[1] ); splatweights[1] = ( fltx4 ) CmpGtSIMD(splatweights[1], Zero); pos1simd[2] = MaddSIMD(pos2simd[2], splatweights[2], pos1simd[2] ); splatweights[2] = ( fltx4 ) CmpGtSIMD(splatweights[2], Zero); pos1simd[3] = MaddSIMD(pos2simd[3], splatweights[3], pos1simd[3] ); splatweights[3] = ( fltx4 ) CmpGtSIMD(splatweights[3], Zero);
// mask out unweighted bones
/*
if (pS2[i+0] > 0) StoreUnaligned3SIMD( pos1[i + 0].Base(), pos1simd[0] ); if (pS2[i+1] > 0) StoreUnaligned3SIMD( pos1[i + 1].Base(), pos1simd[1] ); if (pS2[i+2] > 0) StoreUnaligned3SIMD( pos1[i + 2].Base(), pos1simd[2] ); if (pS2[i+3] > 0) StoreUnaligned3SIMD( pos1[i + 3].Base(), pos1simd[3] ); */ StoreUnaligned3SIMD( pos1[i + 0].Base(), MaskedAssign( ( bi32x4 ) splatweights[0], pos1simd[0], originalpos1simd[0] ) ); StoreUnaligned3SIMD( pos1[i + 1].Base(), MaskedAssign( ( bi32x4 ) splatweights[1], pos1simd[1], originalpos1simd[1] ) ); StoreUnaligned3SIMD( pos1[i + 2].Base(), MaskedAssign( ( bi32x4 ) splatweights[2], pos1simd[2], originalpos1simd[2] ) ); StoreUnaligned3SIMD( pos1[i + 3].Base(), MaskedAssign( ( bi32x4 ) splatweights[3], pos1simd[3], originalpos1simd[3] ) );
}
// take care of stragglers
for ( false ; i < nBoneCount; i++ ) { weight = pS2[i]; if ( weight <= 0.0f ) continue;
if ( seqdesc.flags & STUDIO_POST ) { #ifndef _X360
QuaternionMA( q1[i], weight, q2[i], q1[i] ); #else
fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() ); fltx4 q2simd = LoadAlignedSIMD( q2[i] ); fltx4 result = QuaternionMASIMD( q1simd, weight, q2simd ); StoreUnalignedSIMD( q1[i].Base(), result ); #endif
// FIXME: are these correct?
pos1[i][0] = pos1[i][0] + pos2[i][0] * weight; pos1[i][1] = pos1[i][1] + pos2[i][1] * weight; pos1[i][2] = pos1[i][2] + pos2[i][2] * weight; } else { #ifndef _X360
QuaternionSM( weight, q2[i], q1[i], q1[i] ); #else
fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() ); fltx4 q2simd = LoadAlignedSIMD( q2[i] ); fltx4 result = QuaternionSMSIMD( weight, q2simd, q1simd ); StoreUnalignedSIMD( q1[i].Base(), result ); #endif
// FIXME: are these correct?
pos1[i][0] = pos1[i][0] + pos2[i][0] * weight; pos1[i][1] = pos1[i][1] + pos2[i][1] * weight; pos1[i][2] = pos1[i][2] + pos2[i][2] * weight; } } return; }
//// SLERP PHASE
// Some bones need to be slerped with alignment.
// Others do not.
// Some need to be ignored altogether.
// Build arrays indicating which are which.
// This is the corral approach. Another approach
// would be to compute both the aligned and unaligned
// slerps of each bone in the first pass through the
// array, and then do a masked selection of each
// based on the masks. However there really isn't
// a convenient way to turn the int flags that
// specify which approach to take, into fltx4 masks.
// float * RESTRICT pS2 = (float*)stackalloc( nBoneCount * sizeof(float) );
int * RESTRICT aBonesSlerpAlign = (int *)stackalloc(nBoneCount * sizeof(int)); float * RESTRICT aBonesSlerpAlignWeights = (float *)stackalloc(nBoneCount * sizeof(float)); int * RESTRICT aBonesSlerpNoAlign = (int *)stackalloc(nBoneCount * sizeof(int)); float * RESTRICT aBonesSlerpNoAlignWeights = (float *)stackalloc(nBoneCount * sizeof(float)); int numBonesSlerpAlign = 0; int numBonesSlerpNoAlign = 0; // BoneQuaternionAligned * RESTRICT testOutput = (BoneQuaternionAligned *)stackalloc(nBoneCount * sizeof(BoneQuaternionAligned));
// sweep forward through the array and determine where to corral each bone.
for ( i = 0 ; i < nBoneCount ; ++i ) { float weight = pS2[i]; if (weight == 1.0f) { q1[i] = q2[i]; pos1[i] = pos2[i]; } else if (weight > 0.0f) // ignore small bones
{ if ( pStudioHdr->boneFlags(i) & BONE_FIXED_ALIGNMENT ) { aBonesSlerpNoAlign[numBonesSlerpNoAlign] = i; aBonesSlerpNoAlignWeights[numBonesSlerpNoAlign] = weight; ++numBonesSlerpNoAlign; } else { aBonesSlerpAlign[numBonesSlerpAlign] = i; aBonesSlerpAlignWeights[numBonesSlerpAlign] = weight; ++numBonesSlerpAlign; } } }
// okay, compute all the aligned, and all the unaligned bones, four at
// a time if possible.
const fltx4 One = Four_Ones; /////////////////
// // // Aligned!
nBoneCountRoundedFour = (numBonesSlerpAlign) & ~3; for (i = 0 ; i < nBoneCountRoundedFour ; i+=4 ) { // drag the next cache line in
PREFETCH360(q1, i*16 + 128); PREFETCH360(pos1, i*sizeof(*pos1) + 128); PREFETCH360(q2, i*16 + 128); PREFETCH360(pos2, i*sizeof(*pos2) + 128);
fltx4 weights = LoadAlignedSIMD( aBonesSlerpAlignWeights+i ); fltx4 oneMinusWeight = SubSIMD(One, weights);
// position component:
// pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * weight;
fltx4 pos1simd[4]; fltx4 pos2simd[4]; pos1simd[0] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+0]].Base()); pos1simd[1] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+1]].Base()); pos1simd[2] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+2]].Base()); pos1simd[3] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+3]].Base()); pos2simd[0] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+0]].Base()); pos2simd[1] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+1]].Base()); pos2simd[2] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+2]].Base()); pos2simd[3] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+3]].Base());
pos1simd[0] = MulSIMD( SplatXSIMD(oneMinusWeight) , pos1simd[0] ); pos1simd[1] = MulSIMD( SplatYSIMD(oneMinusWeight) , pos1simd[1] ); pos1simd[2] = MulSIMD( SplatZSIMD(oneMinusWeight) , pos1simd[2] ); pos1simd[3] = MulSIMD( SplatWSIMD(oneMinusWeight) , pos1simd[3] );
fltx4 posWriteMasks[4]; // don't overwrite where there was zero weight
{ fltx4 splatweights[4]; fltx4 Zero = Four_Zeros; splatweights[0] = SplatXSIMD(weights); splatweights[1] = SplatYSIMD(weights); splatweights[2] = SplatZSIMD(weights); splatweights[3] = SplatWSIMD(weights);
pos1simd[0] = MaddSIMD( splatweights[0] , pos2simd[0], pos1simd[0] ); posWriteMasks[0] = ( fltx4 ) CmpGtSIMD(splatweights[0], Zero); pos1simd[1] = MaddSIMD( splatweights[1] , pos2simd[1], pos1simd[1] ); posWriteMasks[1] = ( fltx4 ) CmpGtSIMD(splatweights[1], Zero); pos1simd[2] = MaddSIMD( splatweights[2] , pos2simd[2], pos1simd[2] ); posWriteMasks[2] = ( fltx4 ) CmpGtSIMD(splatweights[2], Zero); pos1simd[3] = MaddSIMD( splatweights[3] , pos2simd[3], pos1simd[3] ); posWriteMasks[3] = ( fltx4 ) CmpGtSIMD(splatweights[3], Zero); }
FourQuaternions q1four, q2four, result; q1four.LoadAndSwizzleAligned( q1 + aBonesSlerpAlign[i+0], q1 + aBonesSlerpAlign[i+1], q1 + aBonesSlerpAlign[i+2], q1 + aBonesSlerpAlign[i+3] );
#if 0
// FIXME: the SIMD slerp doesn't handle quaternions that have opposite signs
q2four.LoadAndSwizzleAligned( q2 + aBonesSlerpAlign[i+0], q2 + aBonesSlerpAlign[i+1], q2 + aBonesSlerpAlign[i+2], q2 + aBonesSlerpAlign[i+3] ); result = q2four.Slerp(q1four, oneMinusWeight); #else
// force the quaternions to be the same sign (< 180 degree separation)
BoneQuaternionAligned q20, q21, q22, q23; QuaternionAlign( q1[aBonesSlerpAlign[i+0]], q2[aBonesSlerpAlign[i+0]], q20 ); QuaternionAlign( q1[aBonesSlerpAlign[i+1]], q2[aBonesSlerpAlign[i+1]], q21 ); QuaternionAlign( q1[aBonesSlerpAlign[i+2]], q2[aBonesSlerpAlign[i+2]], q22 ); QuaternionAlign( q1[aBonesSlerpAlign[i+3]], q2[aBonesSlerpAlign[i+3]], q23 ); q2four.LoadAndSwizzleAligned( &q20, &q21, &q22, &q23 ); result = q2four.SlerpNoAlign(q1four, oneMinusWeight); #endif
result.SwizzleAndStoreAligned( q1 + aBonesSlerpAlign[i+0], q1 + aBonesSlerpAlign[i+1], q1 + aBonesSlerpAlign[i+2], q1 + aBonesSlerpAlign[i+3] );
StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+0]].Base(), pos1simd[0] ); StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+1]].Base(), pos1simd[1] ); StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+2]].Base(), pos1simd[2] ); StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+3]].Base(), pos1simd[3] ); }
// handle stragglers
for ( i ; i < numBonesSlerpAlign ; ++i ) { BoneQuaternionAligned q3; weight = aBonesSlerpAlignWeights[i]; int k = aBonesSlerpAlign[i];
float s1 = 1.0 - weight;
#ifdef _X360
fltx4 q1simd, q2simd, result; q1simd = LoadAlignedSIMD( q1[k].Base() ); q2simd = LoadAlignedSIMD( q2[k] ); #endif
#ifndef _X360
QuaternionSlerp( q2[k], q1[k], s1, q3 ); #else
result = QuaternionSlerpSIMD( q2simd, q1simd, s1 ); #endif
#ifndef _X360
q1[k][0] = q3[0]; q1[k][1] = q3[1]; q1[k][2] = q3[2]; q1[k][3] = q3[3]; #else
StoreAlignedSIMD( q1[k].Base(), result ); #endif
pos1[k][0] = pos1[k][0] * s1 + pos2[k][0] * weight; pos1[k][1] = pos1[k][1] * s1 + pos2[k][1] * weight; pos1[k][2] = pos1[k][2] * s1 + pos2[k][2] * weight; } ///////////////////
// // // Unaligned!
nBoneCountRoundedFour = (numBonesSlerpNoAlign) & ~3; for (i = 0 ; i < nBoneCountRoundedFour ; i+=4 ) { // drag the next cache line in
PREFETCH360(q1, i*16 + 128); PREFETCH360(pos1, i*sizeof(*pos1) + 128); PREFETCH360(q2, i*16 + 128); PREFETCH360(pos2, i*sizeof(*pos2) + 128);
fltx4 weights = LoadAlignedSIMD( aBonesSlerpNoAlignWeights+i ); fltx4 oneMinusWeight = SubSIMD(One, weights);
// position component:
// pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * weight;
fltx4 pos1simd[4]; fltx4 pos2simd[4]; pos1simd[0] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+0]].Base()); pos1simd[1] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+1]].Base()); pos1simd[2] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+2]].Base()); pos1simd[3] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+3]].Base()); pos2simd[0] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+0]].Base()); pos2simd[1] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+1]].Base()); pos2simd[2] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+2]].Base()); pos2simd[3] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+3]].Base());
pos1simd[0] = MulSIMD( SplatXSIMD(oneMinusWeight) , pos1simd[0] ); pos1simd[1] = MulSIMD( SplatYSIMD(oneMinusWeight) , pos1simd[1] ); pos1simd[2] = MulSIMD( SplatZSIMD(oneMinusWeight) , pos1simd[2] ); pos1simd[3] = MulSIMD( SplatWSIMD(oneMinusWeight) , pos1simd[3] );
pos1simd[0] = MaddSIMD( SplatXSIMD(weights) , pos2simd[0], pos1simd[0] ); pos1simd[1] = MaddSIMD( SplatYSIMD(weights) , pos2simd[1], pos1simd[1] ); pos1simd[2] = MaddSIMD( SplatZSIMD(weights) , pos2simd[2], pos1simd[2] ); pos1simd[3] = MaddSIMD( SplatWSIMD(weights) , pos2simd[3], pos1simd[3] );
FourQuaternions q1four, q2four, result; q1four.LoadAndSwizzleAligned( q1 + aBonesSlerpNoAlign[i+0], q1 + aBonesSlerpNoAlign[i+1], q1 + aBonesSlerpNoAlign[i+2], q1 + aBonesSlerpNoAlign[i+3] ); q2four.LoadAndSwizzleAligned( q2 + aBonesSlerpNoAlign[i+0], q2 + aBonesSlerpNoAlign[i+1], q2 + aBonesSlerpNoAlign[i+2], q2 + aBonesSlerpNoAlign[i+3] );
result = q2four.SlerpNoAlign(q1four, oneMinusWeight);
result.SwizzleAndStoreAligned( q1 + aBonesSlerpNoAlign[i+0], q1 + aBonesSlerpNoAlign[i+1], q1 + aBonesSlerpNoAlign[i+2], q1 + aBonesSlerpNoAlign[i+3] );
StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+0]].Base(), pos1simd[0]); StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+1]].Base(), pos1simd[1]); StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+2]].Base(), pos1simd[2]); StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+3]].Base(), pos1simd[3]); } // handle stragglers
for ( i ; i < numBonesSlerpNoAlign ; ++i ) { weight = aBonesSlerpNoAlignWeights[i]; int k = aBonesSlerpNoAlign[i];
float s1 = 1.0 - weight;
#ifdef _X360
fltx4 q1simd, q2simd, result; q1simd = LoadAlignedSIMD( q1[k].Base() ); q2simd = LoadAlignedSIMD( q2[k] ); #endif
#ifndef _X360
BoneQuaternionAligned q3; QuaternionSlerpNoAlign( q2[k], q1[k], s1, q3 ); #else
result = QuaternionSlerpNoAlignSIMD( q2simd, q1simd, s1 ); #endif
#ifndef _X360
q1[k][0] = q3[0]; q1[k][1] = q3[1]; q1[k][2] = q3[2]; q1[k][3] = q3[3]; #else
StoreAlignedSIMD( q1[k].Base(), result ); #endif
pos1[k][0] = pos1[k][0] * s1 + pos2[k][0] * weight; pos1[k][1] = pos1[k][1] * s1 + pos2[k][1] * weight; pos1[k][2] = pos1[k][2] * s1 + pos2[k][2] * weight; }
}
#if PARANOID_SIMD_TIMING_TEST
static ConVar cl_bones_simd_timing_iter( "cl_bones_simd_timing_iter", "100", FCVAR_REPLICATED, "number of times to run SlerpBones." ); void SlerpBones( const CStudioHdr *pStudioHdr, Quaternion q1[MAXSTUDIOBONES], BoneVector pos1[MAXSTUDIOBONES], mstudioseqdesc_t &seqdesc, // source of q2 and pos2
int sequence, const BoneQuaternionAligned q2[MAXSTUDIOBONES], const BoneVector pos2[MAXSTUDIOBONES], float s, int boneMask ) { BONE_PROFILE_FUNC(); // copy off the input arrays for safety
int numBones = pStudioHdr->numbones(); BoneQuaternionAligned fake_q1[MAXSTUDIOBONES]; BoneVector fake_pos1[MAXSTUDIOBONES]; bool version = cl_bones_simd_timing_version.GetBool();
// fruitlessly run as many times as specified
for (int i = cl_bones_simd_timing_iter.GetInt() ; i > 0 ; --i ) { memcpy( fake_q1, q1, numBones * sizeof(Quaternion) ); memcpy( fake_pos1, pos1, numBones * sizeof(BoneVector) );
if (version) // 1 = simd 0 = scalar
{ SlerpBonesSpeedy(pStudioHdr, fake_q1, fake_pos1, seqdesc, sequence, q2, pos2, s, boneMask ); } else {
SlerpBonesSlow(pStudioHdr, fake_q1, fake_pos1, seqdesc, sequence, q2, pos2, s, boneMask ); } }
// run once for real
if (version) // 1 = simd 0 = scalar
{ SlerpBonesSpeedy(pStudioHdr, static_cast<BoneQuaternionAligned *>(q1), pos1, seqdesc, sequence, q2, pos2, s, boneMask ); } else {
SlerpBonesSlow(pStudioHdr, q1, pos1, seqdesc, sequence, q2, pos2, s, boneMask ); } } #endif
template <int N> struct GetLog2_t {}; template<> struct GetLog2_t<0x00100000> { enum {kLog2 = 20}; };
inline void AlwaysAssert(bool condition) { Assert(condition); }
bool IsInList(int value, const int *pBegin, const int *pEnd) { for(const int *p = pBegin; p < pEnd; ++p) if(*p == value) return true; return false; }
//CLinkedMiniProfiler g_lmp_BlendBones1("BlendBones1",&g_pPhysicsMiniProfilers);
//CLinkedMiniProfiler g_lmp_BlendBones2("BlendBones2",&g_pPhysicsMiniProfilers);
ConVar g_cv_BlendBonesMode("BlendBonesMode", "2", FCVAR_REPLICATED);
//---------------------------------------------------------------------
// Make sure quaternions are within 180 degrees of one another, if not, reverse q
//---------------------------------------------------------------------
FORCEINLINE fltx4 BoneQuaternionAlignSIMD( const fltx4 &p, const fltx4 &q ) { // decide if one of the quaternions is backwards
bi32x4 cmp = CmpLtSIMD( Dot4SIMD(p,q), Four_Zeros ); fltx4 result = MaskedAssign( cmp, NegSIMD(q), q ); return result; }
// SSE + X360 implementation
FORCEINLINE fltx4 BoneQuaternionNormalizeSIMD( const fltx4 &q ) { fltx4 radius, result; bi32x4 mask; radius = Dot4SIMD( q, q ); mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
result = ReciprocalSqrtSIMD( radius ); result = MulSIMD( result, q ); return MaskedAssign( mask, q, result ); // if radius was 0, just return q
}
//-----------------------------------------------------------------------------
// Purpose: Inter-animation blend. Assumes both types are identical.
// blend together q1,pos1 with q2,pos2. Return result in q1,pos1.
// 0 returns q1, pos1. 1 returns q2, pos2
//-----------------------------------------------------------------------------
void BlendBones( const CStudioHdr *pStudioHdr, BoneQuaternionAligned q1[MAXSTUDIOBONES], BoneVector pos1[MAXSTUDIOBONES], mstudioseqdesc_t &seqdesc, int sequence, const BoneQuaternionAligned q2[MAXSTUDIOBONES], const BoneVector pos2[MAXSTUDIOBONES], float s, int boneMask ) { AlwaysAssert(0 == ((uintp(q1)|uintp(pos1)|uintp(q2)|uintp(pos2)) & 0xF)); BONE_PROFILE_FUNC(); // in: x360: up to 1.67 ms
int i, j; Quaternion q3;
virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel(); const virtualgroup_t *pSeqGroup = NULL; if (pVModel) { pSeqGroup = pVModel->pSeqGroup( sequence ); }
if (s <= 0) { Assert(0); // shouldn't have been called
return; } else if (s >= 1.0) { //CMiniProfilerGuard mpguard(&g_lmp_BlendBones1, pStudioHdr->numbones());
Assert(0); // shouldn't have been called
for (i = 0; i < pStudioHdr->numbones(); i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask)) { continue; }
if (pSeqGroup) { j = pSeqGroup->boneMap[i]; } else { j = i; }
if (j >= 0 && seqdesc.weight( j ) > 0.0) { q1[i] = q2[i]; pos1[i] = pos2[i]; } }
return; }
float s2 = s; float s1 = 1.0 - s2;
//CMiniProfilerGuard mpguard(&g_lmp_BlendBones2,pStudioHdr->numbones()); // 130-180 ticks without profilers; 167-190 ticks with all profilers on
int nMode = g_cv_BlendBonesMode.GetInt(); #ifndef DEDICATED
if(nMode) { const int numBones = pStudioHdr->numbones(); const int *RESTRICT pBonePseudoWeight = (int*)seqdesc.pBoneweight(0); // we'll treat floats as ints to check for > 0.0
int *RESTRICT pActiveBones = (int*)stackalloc(numBones * sizeof(int) * 2), *RESTRICT pActiveBonesEnd = pActiveBones; { BONE_PROFILE_LOOP(BlendBoneLoop2a,numBones); // 20 ticks straight; 12-14 ticks 4 at a time; 14-19 ticks 8 at a time (compiler generated code)
i = 0; #ifdef _X360 // on PC, this is slower
for(; i+3 < numBones; i+=4) { int isBoneActiveA = pStudioHdr->boneFlags(i ) & boneMask; int isBoneActiveB = pStudioHdr->boneFlags(i+1) & boneMask; int isBoneActiveC = pStudioHdr->boneFlags(i+2) & boneMask; int isBoneActiveD = pStudioHdr->boneFlags(i+3) & boneMask; isBoneActiveA = isBoneActiveA | -isBoneActiveA; // the high bit is now 1 iff the flags check
isBoneActiveB = isBoneActiveB | -isBoneActiveB; // the high bit is now 1 iff the flags check
isBoneActiveC = isBoneActiveC | -isBoneActiveC; // the high bit is now 1 iff the flags check
isBoneActiveD = isBoneActiveD | -isBoneActiveD; // the high bit is now 1 iff the flags check
isBoneActiveA = _rotl(isBoneActiveA,1) & 1; // now it's either 0 or 1
isBoneActiveB = _rotl(isBoneActiveB,1) & 1; // now it's either 0 or 1
isBoneActiveC = _rotl(isBoneActiveC,1) & 1; // now it's either 0 or 1
isBoneActiveD = _rotl(isBoneActiveD,1) & 1; // now it's either 0 or 1
*pActiveBonesEnd = i+0; pActiveBonesEnd += isBoneActiveA; *pActiveBonesEnd = i+1; pActiveBonesEnd += isBoneActiveB; *pActiveBonesEnd = i+2; pActiveBonesEnd += isBoneActiveC; *pActiveBonesEnd = i+3; pActiveBonesEnd += isBoneActiveD; } #endif
for(; i < numBones; ++i) { *pActiveBonesEnd = i; int isBoneActive = pStudioHdr->boneFlags(i) & boneMask; isBoneActive = isBoneActive | -isBoneActive; // the high bit is now 1 iff the flags check
isBoneActive = _rotl(isBoneActive,1) & 1; // now it's either 0 or 1
pActiveBonesEnd += isBoneActive; } }
// now we have a list of bones whose flags & mask != 0
// we need to create bone pay
if(pSeqGroup) { int *pEnd = pActiveBones; { BONE_PROFILE_LOOP(BlendBoneLoop2b,pActiveBonesEnd - pActiveBones);//21-25 straight; 16-18 4 at a time;
int *RESTRICT pActiveBone = pActiveBones; #ifdef _X360 // on PC, this is slower
for(; pActiveBone + 3 < pActiveBonesEnd; pActiveBone += 4) { int nActiveBoneA = pActiveBone[0]; int nActiveBoneB = pActiveBone[1]; int nActiveBoneC = pActiveBone[2]; int nActiveBoneD = pActiveBone[3]; int nMappedBoneA = pSeqGroup->boneMap[nActiveBoneA]; int nMappedBoneB = pSeqGroup->boneMap[nActiveBoneB]; int nMappedBoneC = pSeqGroup->boneMap[nActiveBoneC]; int nMappedBoneD = pSeqGroup->boneMap[nActiveBoneD]; pEnd[numBones] = nMappedBoneA; *pEnd = nActiveBoneA; pEnd += _rotl(~nMappedBoneA,1) & 1; // if nMappedBone < 0, don't advance the end
pEnd[numBones] = nMappedBoneB; *pEnd = nActiveBoneB; pEnd += _rotl(~nMappedBoneB,1) & 1; // if nMappedBone < 0, don't advance the end
pEnd[numBones] = nMappedBoneC; *pEnd = nActiveBoneC; pEnd += _rotl(~nMappedBoneC,1) & 1; // if nMappedBone < 0, don't advance the end
pEnd[numBones] = nMappedBoneD; *pEnd = nActiveBoneD; pEnd += _rotl(~nMappedBoneD,1) & 1; // if nMappedBone < 0, don't advance the end
} #endif
for(; pActiveBone < pActiveBonesEnd; ++pActiveBone) { int nActiveBone = *pActiveBone; int nMappedBone = pSeqGroup->boneMap[nActiveBone]; pEnd[numBones] = nMappedBone; *pEnd = nActiveBone; pEnd += _rotl(~nMappedBone,1) & 1; // if nMappedBone < 0, don't advance the end
} }
pActiveBonesEnd = pEnd; // the new end of the array of active bones, with negatively-mapped bones taken out
// now get rid of non-positively-weighted bones
pEnd = pActiveBones; { BONE_PROFILE_LOOP(BlendBoneLoop2c,pActiveBonesEnd - pActiveBones);//18-23 straight; 14-17 ticks 4 at a time
int *RESTRICT pActiveBone = pActiveBones; #ifdef _X360 // on PC, this is slower
int *RESTRICT pMappedBone = pActiveBones+numBones; for(; pActiveBone+3 < pActiveBonesEnd; pActiveBone += 4, pMappedBone += 4) { int nActiveBoneA = pActiveBone[0]; int nActiveBoneB = pActiveBone[1]; int nActiveBoneC = pActiveBone[2]; int nActiveBoneD = pActiveBone[3]; int nMappedBoneA = pMappedBone[0]; int nMappedBoneB = pMappedBone[1]; int nMappedBoneC = pMappedBone[2]; int nMappedBoneD = pMappedBone[3]; int pseudoWeightA = pBonePseudoWeight[nMappedBoneA]; int pseudoWeightB = pBonePseudoWeight[nMappedBoneB]; int pseudoWeightC = pBonePseudoWeight[nMappedBoneC]; int pseudoWeightD = pBonePseudoWeight[nMappedBoneD];
*pEnd = nActiveBoneA; pEnd += _rotl(-pseudoWeightA, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
*pEnd = nActiveBoneB; pEnd += _rotl(-pseudoWeightB, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
*pEnd = nActiveBoneC; pEnd += _rotl(-pseudoWeightC, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
*pEnd = nActiveBoneD; pEnd += _rotl(-pseudoWeightD, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
} #endif
for(; pActiveBone < pActiveBonesEnd; ++pActiveBone) { int nActiveBone = *pActiveBone; int nMappedBone = pActiveBone[numBones]; int pseudoWeight = pBonePseudoWeight[nMappedBone];
*pEnd = nActiveBone; pEnd += _rotl(-pseudoWeight, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
} } pActiveBonesEnd = pEnd; } else { // one mapping stage off
// now get rid of non-positively-weighted bones
int *pEnd = pActiveBones; {BONE_PROFILE_LOOP(BlendBoneLoop2d,pActiveBonesEnd-pActiveBones);//20-50
for(int *RESTRICT pActiveBone = pActiveBones; pActiveBone < pActiveBonesEnd; ++pActiveBone) { int nActiveBone = *pActiveBone; int pseudoWeight = pBonePseudoWeight[nActiveBone];
*pEnd = nActiveBone; pEnd += _rotl(-pseudoWeight, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
}} pActiveBonesEnd = pEnd; }
enum { nBoneFixedAlignmentShift = GetLog2_t<BONE_FIXED_ALIGNMENT>::kLog2 };
// NOTE: When merging back to main, enable this code because Fixed-Alignment is not used in L4D, but may be used in main
fltx4 scale1 = ReplicateX4( s1 ); fltx4 scale2 = SubSIMD( Four_Ones, scale1 ); //fltx4 maskW = LoadAlignedSIMD( (const float *)(g_SIMD_ComponentMask[3]) );
// pass through all active bones to blend them; those that need it are already aligned
{ // 120-155 ticks 4 horizontal at a time; 130 ticks with 1 dot quaternion alignment
//
BONE_PROFILE_LOOP(BlendBoneLoop2g,pActiveBonesEnd-pActiveBones);
const int *RESTRICT p = pActiveBones, *RESTRICT pNext; #if 0//ndef _X360
// swizzled (vertical) 4 at a time processing
for(; (pNext = p+4) < pActiveBonesEnd; p = pNext) { int nBoneA = p[0], nBoneB = p[1], nBoneC = p[2], nBoneD = p[3];
BoneQuaternionAligned *RESTRICT pq1A = &q1[nBoneA]; BoneQuaternionAligned *RESTRICT pq1B = &q1[nBoneB]; BoneQuaternionAligned *RESTRICT pq1C = &q1[nBoneC]; BoneQuaternionAligned *RESTRICT pq1D = &q1[nBoneD];
const BoneQuaternionAligned *RESTRICT pq2A = &q2[nBoneA]; const BoneQuaternionAligned *RESTRICT pq2B = &q2[nBoneB]; const BoneQuaternionAligned *RESTRICT pq2C = &q2[nBoneC]; const BoneQuaternionAligned *RESTRICT pq2D = &q2[nBoneD];
float *pp1A = pos1[nBoneA].Base(); float *pp1B = pos1[nBoneB].Base(); float *pp1C = pos1[nBoneC].Base(); float *pp1D = pos1[nBoneD].Base();
const float *pp2A = pos2[nBoneA].Base(); const float *pp2B = pos2[nBoneB].Base(); const float *pp2C = pos2[nBoneC].Base(); const float *pp2D = pos2[nBoneD].Base();
FourQuaternions four4q1, four4q2; four4q1.LoadAndSwizzleAligned(pq1A,pq1B,pq1C,pq1D); four4q2.LoadAndSwizzleAligned(pq2A,pq2B,pq2C,pq2D);
FourVectors four4Pos1, four4Pos2; four4Pos1.LoadAndSwizzleUnaligned(pp1A,pp1B,pp1C,pp1D); four4Pos2.LoadAndSwizzleUnaligned(pp2A,pp2B,pp2C,pp2D);
four4q1 = QuaternionAlign(four4q2, four4q1);
FourQuaternions four4Blended = QuaternionNormalize(Madd( four4q1, scale1, Mul( four4q2 , scale2 ))); // now blend the linear parts
FourVectors f4PosBlended = Madd(four4Pos1, scale1, Mul(four4Pos2, scale2)); f4PosBlended.TransposeOntoUnaligned3(*(fltx4*)pp1A, *(fltx4*)pp1B, *(fltx4*)pp1C, *(fltx4*)pp1D);
four4Blended.SwizzleAndStoreAligned(pq1A,pq1B,pq1C,pq1D); } #else
// horizontal 4 at a time processing
for(; (pNext = p+4) < pActiveBonesEnd; p = pNext) { int nBoneA = p[0], nBoneB = p[1], nBoneC = p[2], nBoneD = p[3]; //PREFETCH_CACHE_LINE(&q1[nBoneD+2],0);
//PREFETCH_CACHE_LINE(&q2[nBoneD+2],0);
//PREFETCH_CACHE_LINE(&pos1[nBoneD+2],0);
//PREFETCH_CACHE_LINE(&pos2[nBoneD+2],0);
float *RESTRICT pq1A = q1[nBoneA].Base(), *pp1A = pos1[nBoneA].Base(); float *RESTRICT pq1B = q1[nBoneB].Base(), *pp1B = pos1[nBoneB].Base(); float *RESTRICT pq1C = q1[nBoneC].Base(), *pp1C = pos1[nBoneC].Base(); float *RESTRICT pq1D = q1[nBoneD].Base(), *pp1D = pos1[nBoneD].Base(); const float *RESTRICT pq2A = q2[nBoneA].Base(), *pp2A = pos2[nBoneA].Base(); const float *RESTRICT pq2B = q2[nBoneB].Base(), *pp2B = pos2[nBoneB].Base(); const float *RESTRICT pq2C = q2[nBoneC].Base(), *pp2C = pos2[nBoneC].Base(); const float *RESTRICT pq2D = q2[nBoneD].Base(), *pp2D = pos2[nBoneD].Base(); fltx4 f4q1A = LoadAlignedSIMD(pq1A), f4q2A = LoadAlignedSIMD(pq2A); fltx4 f4q1B = LoadAlignedSIMD(pq1B), f4q2B = LoadAlignedSIMD(pq2B); fltx4 f4q1C = LoadAlignedSIMD(pq1C), f4q2C = LoadAlignedSIMD(pq2C); fltx4 f4q1D = LoadAlignedSIMD(pq1D), f4q2D = LoadAlignedSIMD(pq2D); fltx4 f4Pos1A = LoadUnaligned3SIMD(pp1A), f4Pos2A = LoadUnaligned3SIMD(pp2A); fltx4 f4Pos1B = LoadUnaligned3SIMD(pp1B), f4Pos2B = LoadUnaligned3SIMD(pp2B); fltx4 f4Pos1C = LoadUnaligned3SIMD(pp1C), f4Pos2C = LoadUnaligned3SIMD(pp2C); fltx4 f4Pos1D = LoadUnaligned3SIMD(pp1D), f4Pos2D = LoadUnaligned3SIMD(pp2D); f4q1A = BoneQuaternionAlignSIMD(f4q2A, f4q1A); f4q1B = BoneQuaternionAlignSIMD(f4q2B, f4q1B); f4q1C = BoneQuaternionAlignSIMD(f4q2C, f4q1C); f4q1D = BoneQuaternionAlignSIMD(f4q2D, f4q1D); fltx4 f4BlendedA = MulSIMD( scale2, f4q2A ); fltx4 f4BlendedB = MulSIMD( scale2, f4q2B ); fltx4 f4BlendedC = MulSIMD( scale2, f4q2C ); fltx4 f4BlendedD = MulSIMD( scale2, f4q2D ); f4BlendedA = MaddSIMD( scale1, f4q1A, f4BlendedA ); f4BlendedB = MaddSIMD( scale1, f4q1B, f4BlendedB ); f4BlendedC = MaddSIMD( scale1, f4q1C, f4BlendedC ); f4BlendedD = MaddSIMD( scale1, f4q1D, f4BlendedD ); f4BlendedA = BoneQuaternionNormalizeSIMD(f4BlendedA); f4BlendedB = BoneQuaternionNormalizeSIMD(f4BlendedB); f4BlendedC = BoneQuaternionNormalizeSIMD(f4BlendedC); f4BlendedD = BoneQuaternionNormalizeSIMD(f4BlendedD); // now blend the linear parts
fltx4 f4PosBlendedA = MaddSIMD(scale1, f4Pos1A, MulSIMD(scale2,f4Pos2A)); fltx4 f4PosBlendedB = MaddSIMD(scale1, f4Pos1B, MulSIMD(scale2,f4Pos2B)); fltx4 f4PosBlendedC = MaddSIMD(scale1, f4Pos1C, MulSIMD(scale2,f4Pos2C)); fltx4 f4PosBlendedD = MaddSIMD(scale1, f4Pos1D, MulSIMD(scale2,f4Pos2D)); //f4PosBlended = MaskedAssign(maskW, f4Pos1, f4PosBlended);
StoreAlignedSIMD(pq1A,f4BlendedA); StoreUnaligned3SIMD(pp1A, f4PosBlendedA); StoreAlignedSIMD(pq1B,f4BlendedB); StoreUnaligned3SIMD(pp1B, f4PosBlendedB); StoreAlignedSIMD(pq1C,f4BlendedC); StoreUnaligned3SIMD(pp1C, f4PosBlendedC); StoreAlignedSIMD(pq1D,f4BlendedD); StoreUnaligned3SIMD(pp1D, f4PosBlendedD); } #endif
for(; p < pActiveBonesEnd; ++p) { int nBone = *p; float *RESTRICT pq1 = q1[nBone].Base(), *RESTRICT pp1 = pos1[nBone].Base(); const float *RESTRICT pq2 = q2[nBone].Base(), *RESTRICT pp2 = pos2[nBone].Base(); fltx4 f4q1 = LoadAlignedSIMD(pq1), f4q2 = LoadAlignedSIMD(pq2); fltx4 f4Pos1 = LoadUnaligned3SIMD(pp1), f4Pos2 = LoadUnaligned3SIMD(pp2); f4q1 = BoneQuaternionAlignSIMD(f4q2, f4q1); fltx4 f4Blended = MulSIMD( scale2, f4q2 ); f4Blended = MaddSIMD( scale1, f4q1, f4Blended ); f4Blended = BoneQuaternionNormalizeSIMD(f4Blended); // now blend the linear parts
fltx4 f4PosBlended = MaddSIMD(scale1, f4Pos1, MulSIMD(scale2,f4Pos2)); //f4PosBlended = MaskedAssign(maskW, f4Pos1, f4PosBlended);
StoreAlignedSIMD(pq1,f4Blended); StoreUnaligned3SIMD(pp1, f4PosBlended); } } } else #endif // POSIX
{ // 360-400 ticks per loop pass
// there are usually 40-100 bones on average in a frame
for (i = 0; i < pStudioHdr->numbones(); i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask)) { continue; }
if (pSeqGroup) { j = pSeqGroup->boneMap[i]; } else { j = i; }
if (j >= 0 && seqdesc.weight( j ) > 0.0) { if (pStudioHdr->boneFlags(i) & BONE_FIXED_ALIGNMENT) { QuaternionBlendNoAlign( q2[i], q1[i], s1, q3 ); } else { QuaternionBlend( q2[i], q1[i], s1, q3 ); } q1[i][0] = q3[0]; q1[i][1] = q3[1]; q1[i][2] = q3[2]; q1[i][3] = q3[3]; pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * s2; pos1[i][1] = pos1[i][1] * s1 + pos2[i][1] * s2; pos1[i][2] = pos1[i][2] * s1 + pos2[i][2] * s2; } } } }
//-----------------------------------------------------------------------------
// Purpose: Scale a set of bones. Must be of type delta
//-----------------------------------------------------------------------------
void ScaleBones( const CStudioHdr *pStudioHdr, BoneQuaternion q1[MAXSTUDIOBONES], BoneVector pos1[MAXSTUDIOBONES], int sequence, float s, int boneMask ) { BONE_PROFILE_FUNC(); int i, j; Quaternion q3;
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( sequence );
virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel(); const virtualgroup_t *pSeqGroup = NULL; if (pVModel) { pSeqGroup = pVModel->pSeqGroup( sequence ); }
float s2 = s; float s1 = 1.0 - s2;
for (i = 0; i < pStudioHdr->numbones(); i++) { // skip unused bones
if (!(pStudioHdr->boneFlags(i) & boneMask)) { continue; }
if (pSeqGroup) { j = pSeqGroup->boneMap[i]; } else { j = i; }
if (j >= 0 && seqdesc.weight( j ) > 0.0) { QuaternionIdentityBlend( q1[i], s1, q1[i] ); VectorScale( pos1[i], s2, pos1[i] ); } } }
//-----------------------------------------------------------------------------
// Purpose: resolve a global pose parameter to the specific setting for this sequence
//-----------------------------------------------------------------------------
int Studio_LocalPoseParameter( const CStudioHdr *pStudioHdr, const float poseParameter[], mstudioseqdesc_t &seqdesc, int iSequence, int iLocalIndex, float &flSetting ) { BONE_PROFILE_FUNC(); int iPose = pStudioHdr->GetSharedPoseParameter( iSequence, seqdesc.paramindex[iLocalIndex] );
if (iPose == -1) { flSetting = 0; return 0; }
const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iPose );
float flValue = poseParameter[iPose];
if (Pose.loop) { float wrap = (Pose.start + Pose.end) / 2.0 + Pose.loop / 2.0; float shift = Pose.loop - wrap;
flValue = flValue - Pose.loop * floor((flValue + shift) / Pose.loop); }
int nIndex = 0; if (seqdesc.posekeyindex == 0) { float flLocalStart = ((float)seqdesc.paramstart[iLocalIndex] - Pose.start) / (Pose.end - Pose.start); float flLocalEnd = ((float)seqdesc.paramend[iLocalIndex] - Pose.start) / (Pose.end - Pose.start);
// convert into local range
flSetting = (flValue - flLocalStart) / (flLocalEnd - flLocalStart);
// clamp. This shouldn't ever need to happen if it's looping.
if (flSetting < 0) flSetting = 0; if (flSetting > 1) flSetting = 1;
nIndex = 0; if (seqdesc.groupsize[iLocalIndex] > 2 ) { // estimate index
nIndex = (int)(flSetting * (seqdesc.groupsize[iLocalIndex] - 1)); if (nIndex == seqdesc.groupsize[iLocalIndex] - 1) { nIndex = seqdesc.groupsize[iLocalIndex] - 2; } flSetting = flSetting * (seqdesc.groupsize[iLocalIndex] - 1) - nIndex; } } else { flValue = flValue * (Pose.end - Pose.start) + Pose.start; nIndex = 0; // FIXME: this needs to be 2D
// FIXME: this shouldn't be a linear search
while (1) { flSetting = (flValue - seqdesc.poseKey( iLocalIndex, nIndex )) / (seqdesc.poseKey( iLocalIndex, nIndex + 1 ) - seqdesc.poseKey( iLocalIndex, nIndex )); /*
if (index > 0 && flSetting < 0.0) { index--; continue; } else */ if (nIndex < seqdesc.groupsize[iLocalIndex] - 2 && flSetting > 1.0) { nIndex++; continue; } break; }
// clamp.
if (flSetting < 0.0f) flSetting = 0.0f; if (flSetting > 1.0f) flSetting = 1.0f; } return nIndex; }
void Studio_CalcBoneToBoneTransform( const CStudioHdr *pStudioHdr, int inputBoneIndex, int outputBoneIndex, matrix3x4_t& matrixOut ) { const mstudiobone_t *pbone = pStudioHdr->pBone( inputBoneIndex );
matrix3x4a_t inputToPose; MatrixInvert( pbone->poseToBone, inputToPose ); ConcatTransforms( pStudioHdr->pBone( outputBoneIndex )->poseToBone, inputToPose, matrixOut ); }
//-----------------------------------------------------------------------------
// Purpose: Lookup a bone controller
//-----------------------------------------------------------------------------
static mstudiobonecontroller_t* FindController( const CStudioHdr *pStudioHdr, int iController) { // find first controller that matches the index
for (int i = 0; i < pStudioHdr->numbonecontrollers(); i++) { if (pStudioHdr->pBonecontroller( i )->inputfield == iController) return pStudioHdr->pBonecontroller( i ); }
return NULL; }
//-----------------------------------------------------------------------------
// Purpose: converts a ranged bone controller value into a 0..1 encoded value
// Output: ctlValue contains 0..1 encoding.
// returns clamped ranged value
//-----------------------------------------------------------------------------
float Studio_SetController( const CStudioHdr *pStudioHdr, int iController, float flValue, float &ctlValue ) { BONE_PROFILE_FUNC(); if (! pStudioHdr) return flValue;
mstudiobonecontroller_t *pbonecontroller = FindController(pStudioHdr, iController); if(!pbonecontroller) { ctlValue = 0; return flValue; }
// wrap 0..360 if it's a rotational controller
if (pbonecontroller->type & (STUDIO_XR | STUDIO_YR | STUDIO_ZR)) { // ugly hack, invert value if end < start
if (pbonecontroller->end < pbonecontroller->start) flValue = -flValue;
// does the controller not wrap?
if (pbonecontroller->start + 359.0 >= pbonecontroller->end) { if (flValue > ((pbonecontroller->start + pbonecontroller->end) / 2.0) + 180) flValue = flValue - 360; if (flValue < ((pbonecontroller->start + pbonecontroller->end) / 2.0) - 180) flValue = flValue + 360; } else { if (flValue > 360) flValue = flValue - (int)(flValue / 360.0) * 360.0; else if (flValue < 0) flValue = flValue + (int)((flValue / -360.0) + 1) * 360.0; } }
ctlValue = (flValue - pbonecontroller->start) / (pbonecontroller->end - pbonecontroller->start); if (ctlValue < 0) ctlValue = 0; if (ctlValue > 1) ctlValue = 1;
float flReturnVal = ((1.0 - ctlValue)*pbonecontroller->start + ctlValue *pbonecontroller->end);
// ugly hack, invert value if a rotational controller and end < start
if (pbonecontroller->type & (STUDIO_XR | STUDIO_YR | STUDIO_ZR) && pbonecontroller->end < pbonecontroller->start ) { flReturnVal *= -1; } return flReturnVal; }
//-----------------------------------------------------------------------------
// Purpose: converts a 0..1 encoded bone controller value into a ranged value
// Output: returns ranged value
//-----------------------------------------------------------------------------
float Studio_GetController( const CStudioHdr *pStudioHdr, int iController, float ctlValue ) { if (!pStudioHdr) return 0.0;
mstudiobonecontroller_t *pbonecontroller = FindController(pStudioHdr, iController); if(!pbonecontroller) return 0;
return ctlValue * (pbonecontroller->end - pbonecontroller->start) + pbonecontroller->start; }
//-----------------------------------------------------------------------------
// Purpose: Calculates default values for the pose parameters
// Output: fills in an array
//-----------------------------------------------------------------------------
void Studio_CalcDefaultPoseParameters( const CStudioHdr *pStudioHdr, float flPoseParameter[], int nCount ) { int nPoseCount = pStudioHdr->GetNumPoseParameters(); int nNumParams = MIN( nCount, MAXSTUDIOPOSEPARAM );
for ( int i = 0; i < nNumParams; ++i ) { // Default to middle of the pose parameter range
flPoseParameter[ i ] = 0.5f; if ( i < nPoseCount ) { const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( i );
// Want to try for a zero state. If one doesn't exist set it to .5 by default.
if ( Pose.start < 0.0f && Pose.end > 0.0f ) { float flPoseDelta = Pose.end - Pose.start; flPoseParameter[i] = -Pose.start / flPoseDelta; } } } }
//-----------------------------------------------------------------------------
// Purpose: converts a ranged pose parameter value into a 0..1 encoded value
// Output: ctlValue contains 0..1 encoding.
// returns clamped ranged value
//-----------------------------------------------------------------------------
float Studio_SetPoseParameter( const CStudioHdr *pStudioHdr, int iParameter, float flValue, float &ctlValue ) { if (iParameter < 0 || iParameter >= pStudioHdr->GetNumPoseParameters()) { ctlValue = 0; return 0; }
const mstudioposeparamdesc_t &PoseParam = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iParameter );
Assert( IsFinite( flValue ) );
if (PoseParam.loop) { float wrap = (PoseParam.start + PoseParam.end) / 2.0 + PoseParam.loop / 2.0; float shift = PoseParam.loop - wrap;
flValue = flValue - PoseParam.loop * floor((flValue + shift) / PoseParam.loop); }
ctlValue = (flValue - PoseParam.start) / (PoseParam.end - PoseParam.start);
if (ctlValue < 0) ctlValue = 0; if (ctlValue > 1) ctlValue = 1;
Assert( IsFinite( ctlValue ) );
return ctlValue * (PoseParam.end - PoseParam.start) + PoseParam.start; }
//-----------------------------------------------------------------------------
// Purpose: converts a 0..1 encoded pose parameter value into a ranged value
// Output: returns ranged value
//-----------------------------------------------------------------------------
float Studio_GetPoseParameter( const CStudioHdr *pStudioHdr, int iParameter, float ctlValue ) { if (iParameter < 0 || iParameter >= pStudioHdr->GetNumPoseParameters()) { return 0; }
const mstudioposeparamdesc_t &PoseParam = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iParameter );
return ctlValue * (PoseParam.end - PoseParam.start) + PoseParam.start; }
#pragma warning (disable : 4701)
static int ClipRayToCapsule( const Ray_t &ray, mstudiobbox_t *pbox, matrix3x4_t& matrix, trace_t &tr ) { BONE_PROFILE_FUNC();
Vector vecCapsuleCenters[ 2 ]; VectorTransform( pbox->bbmin, matrix, vecCapsuleCenters[0] ); VectorTransform( pbox->bbmax, matrix, vecCapsuleCenters[1] );
CShapeCastResult cast; Assert( tr.fraction >= 0 && tr.fraction <= 1.0f ); CastCapsuleRay( cast, ray.m_Start /*+start offset?*/, ray.m_Delta * tr.fraction, vecCapsuleCenters, pbox->flCapsuleRadius ); if ( cast.DidHit() ) { tr.fraction *= cast.m_flHitTime; if ( cast.m_bStartInSolid ) { tr.startsolid = true; // tr.allsolid - not computed yet
}
// tr.contents, dispFlags - not computed yet
tr.endpos = cast.m_vHitPoint; tr.plane.normal = cast.m_vHitNormal;
//extern IVDebugOverlay *debugoverlay;
//debugoverlay->AddCapsuleOverlay( vecCapsuleCenters[ 0 ], vecCapsuleCenters[ 1 ], pbox->flCapsuleRadius, 0, 255, 0, 255, 10 );
//debugoverlay->AddLineOverlay( ray.m_Start /*+offset?*/, cast.m_vHitPoint, 0, 0, 255, 200, 0.25f, 10 );
//debugoverlay->AddLineOverlay( cast.m_vHitPoint, cast.m_vHitPoint + 4 * cast.m_vHitNormal, 0, 255, 0, 200, 0.25f, 10 );
// plane.dist and others are not computed yet
return 0; // hitside is not computed (yet?)
} return -1; }
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
static int ClipRayToHitbox( const Ray_t &ray, mstudiobbox_t *pbox, matrix3x4_t& matrix, trace_t &tr ) { const float flProjEpsilon = 0.01f; BONE_PROFILE_FUNC();
if ( pbox->flCapsuleRadius > 0 ) { return ClipRayToCapsule( ray, pbox, matrix, tr ); }
// scale by current t so hits shorten the ray and increase the likelihood of early outs
Vector delta2; VectorScale( ray.m_Delta, (0.5f * tr.fraction), delta2 );
// OPTIMIZE: Store this in the box instead of computing it here
// compute center in local space
Vector boxextents; boxextents.x = (pbox->bbmin.x + pbox->bbmax.x) * 0.5; boxextents.y = (pbox->bbmin.y + pbox->bbmax.y) * 0.5; boxextents.z = (pbox->bbmin.z + pbox->bbmax.z) * 0.5; // transform to world space
Vector boxCenter; VectorTransform( boxextents, matrix, boxCenter );
// calc extents from local center
boxextents.x = pbox->bbmax.x - boxextents.x; boxextents.y = pbox->bbmax.y - boxextents.y; boxextents.z = pbox->bbmax.z - boxextents.z; // OPTIMIZE: This is optimized for world space. If the transform is fast enough, it may make more
// sense to just xform and call UTIL_ClipToBox() instead. MEASURE THIS.
// save the extents of the ray along
Vector extent, uextent; Vector segmentCenter; segmentCenter.x = ray.m_Start.x + delta2.x - boxCenter.x; segmentCenter.y = ray.m_Start.y + delta2.y - boxCenter.y; segmentCenter.z = ray.m_Start.z + delta2.z - boxCenter.z;
extent.Init();
// check box axes for separation
for ( int j = 0; j < 3; j++ ) { extent[j] = delta2.x * matrix[0][j] + delta2.y * matrix[1][j] + delta2.z * matrix[2][j]; uextent[j] = fabsf(extent[j]); float coord = segmentCenter.x * matrix[0][j] + segmentCenter.y * matrix[1][j] + segmentCenter.z * matrix[2][j]; coord = fabsf(coord);
if ( coord > (boxextents[j] + uextent[j]) ) return -1; }
// now check cross axes for separation
float tmp, cextent; Vector cross; CrossProduct( delta2, segmentCenter, cross ); cextent = cross.x * matrix[0][0] + cross.y * matrix[1][0] + cross.z * matrix[2][0]; cextent = fabsf(cextent); tmp = boxextents[1]*uextent[2] + boxextents[2]*uextent[1]; tmp = MAX(tmp, flProjEpsilon); if ( cextent > tmp ) return -1;
cextent = cross.x * matrix[0][1] + cross.y * matrix[1][1] + cross.z * matrix[2][1]; cextent = fabsf(cextent); tmp = boxextents[0]*uextent[2] + boxextents[2]*uextent[0]; tmp = MAX(tmp, flProjEpsilon); if ( cextent > tmp ) return -1;
cextent = cross.x * matrix[0][2] + cross.y * matrix[1][2] + cross.z * matrix[2][2]; cextent = fabsf(cextent); tmp = boxextents[0]*uextent[1] + boxextents[1]*uextent[0]; tmp = MAX(tmp, flProjEpsilon); if ( cextent > tmp ) return -1;
Vector start;
// Compute ray start in bone space
VectorITransform( ray.m_Start, matrix, start ); // extent is delta2 in bone space, recompute delta in bone space
VectorScale( extent, 2, extent );
// delta was prescaled by the current t, so no need to see if this intersection
// is closer
trace_t boxTrace; if ( !IntersectRayWithBox( start, extent, pbox->bbmin, pbox->bbmax, 0.0f, &boxTrace ) ) return -1;
Assert( IsFinite(boxTrace.fraction) ); tr.fraction *= boxTrace.fraction; tr.startsolid = boxTrace.startsolid; int hitside = boxTrace.plane.type; if ( boxTrace.plane.normal[hitside] >= 0 ) { hitside += 3; } return hitside; }
#pragma warning (default : 4701)
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
bool SweepBoxToStudio( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set, matrix3x4_t **hitboxbones, int fContentsMask, trace_t &tr ) { BONE_PROFILE_FUNC(); tr.fraction = 1.0; tr.startsolid = false;
// OPTIMIZE: Partition these?
Ray_t clippedRay = ray; int hitbox = -1; for ( int i = 0; i < set->numhitboxes; i++ ) { mstudiobbox_t *pbox = set->pHitbox(i);
// Filter based on contents mask
int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents; if ( ( fBoneContents & fContentsMask ) == 0 ) continue; //FIXME: Won't work with scaling!
trace_t obbTrace; if ( IntersectRayWithOBB( clippedRay, *hitboxbones[pbox->bone], pbox->bbmin, pbox->bbmax, 0.0f, &obbTrace ) ) { tr.startpos = obbTrace.startpos; tr.endpos = obbTrace.endpos; tr.plane = obbTrace.plane; tr.startsolid = obbTrace.startsolid; tr.allsolid = obbTrace.allsolid;
// This logic here is to shorten the ray each time to get more early outs
tr.fraction *= obbTrace.fraction; clippedRay.m_Delta *= obbTrace.fraction; hitbox = i; if (tr.startsolid) break; } }
if ( hitbox >= 0 ) { tr.hitgroup = set->pHitbox(hitbox)->group; tr.hitbox = hitbox; const mstudiobone_t *pBone = pStudioHdr->pBone( set->pHitbox(hitbox)->bone ); tr.contents = pBone->contents | CONTENTS_HITBOX; tr.physicsbone = pBone->physicsbone; tr.surface.name = "**studio**"; tr.surface.flags = SURF_HITBOX; tr.surface.surfaceProps = pBone->GetSurfaceProp();
Assert( tr.physicsbone >= 0 ); return true; } return false; }
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
bool TraceToStudio( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set, matrix3x4_t **hitboxbones, int fContentsMask, const Vector &vecOrigin, float flScale, trace_t &tr ) { BONE_PROFILE_FUNC(); if ( !ray.m_IsRay ) { return SweepBoxToStudio( pProps, ray, pStudioHdr, set, hitboxbones, fContentsMask, tr ); }
tr.fraction = 1.0; tr.startsolid = false;
// no hit yet
int hitbox = -1; int hitside = -1;
// OPTIMIZE: Partition these?
for ( int i = 0; i < set->numhitboxes; i++ ) { mstudiobbox_t *pbox = set->pHitbox(i);
// Filter based on contents mask
int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents; if ( ( fBoneContents & fContentsMask ) == 0 ) continue; // columns are axes of the bones in world space, translation is in world space
matrix3x4_t& matrix = *hitboxbones[pbox->bone]; // Because we're sending in a matrix with scale data, and because the matrix inversion in the hitbox
// code does not handle that case, we pre-scale the bones and ray down here and do our collision checks
// in unscaled space. We can then rescale the results afterwards.
int side = -1; if ( flScale < 1.0f-FLT_EPSILON || flScale > 1.0f+FLT_EPSILON ) { matrix3x4_t matScaled; MatrixCopy( matrix, matScaled ); float invScale = 1.0f / flScale;
Vector vecBoneOrigin; MatrixGetColumn( matScaled, 3, vecBoneOrigin ); // Pre-scale the origin down
Vector vecNewOrigin = vecBoneOrigin - vecOrigin; vecNewOrigin *= invScale; vecNewOrigin += vecOrigin; MatrixSetColumn( vecNewOrigin, 3, matScaled );
// Scale it uniformly
VectorScale( matScaled[0], invScale, matScaled[0] ); VectorScale( matScaled[1], invScale, matScaled[1] ); VectorScale( matScaled[2], invScale, matScaled[2] ); // Pre-scale our ray as well
Vector vecRayStart = ray.m_Start - vecOrigin; vecRayStart *= invScale; vecRayStart += vecOrigin; Vector vecRayDelta = ray.m_Delta * invScale;
Ray_t newRay; newRay.Init( vecRayStart, vecRayStart + vecRayDelta ); side = ClipRayToHitbox( newRay, pbox, matScaled, tr ); } else { side = ClipRayToHitbox( ray, pbox, matrix, tr ); }
if ( side >= 0 ) { hitbox = i; hitside = side; } }
if ( hitbox >= 0 ) { mstudiobbox_t *pbox = set->pHitbox(hitbox); VectorMA( ray.m_Start, tr.fraction, ray.m_Delta, tr.endpos ); tr.hitgroup = set->pHitbox(hitbox)->group; tr.hitbox = hitbox; const mstudiobone_t *pBone = pStudioHdr->pBone( pbox->bone ); tr.contents = pBone->contents | CONTENTS_HITBOX; tr.physicsbone = pBone->physicsbone; tr.surface.name = "**studio**"; tr.surface.flags = SURF_HITBOX; tr.surface.surfaceProps = pBone->GetSurfaceProp();
Assert( tr.physicsbone >= 0 ); matrix3x4_t& matrix = *hitboxbones[pbox->bone]; if ( hitside >= 3 ) { hitside -= 3; tr.plane.normal[0] = matrix[0][hitside]; tr.plane.normal[1] = matrix[1][hitside]; tr.plane.normal[2] = matrix[2][hitside]; //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) + pbox->bbmax[hitside];
} else { tr.plane.normal[0] = -matrix[0][hitside]; tr.plane.normal[1] = -matrix[1][hitside]; tr.plane.normal[2] = -matrix[2][hitside]; //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) - pbox->bbmin[hitside];
} // simpler plane constant equation
tr.plane.dist = DotProduct( tr.endpos, tr.plane.normal ); tr.plane.type = 3; return true; } return false; }
//-----------------------------------------------------------------------------
// Purpose:
//-----------------------------------------------------------------------------
bool TraceToStudioCsgoHitgroupsPriority( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set, matrix3x4_t **hitboxbones, int fContentsMask, const Vector &vecOrigin, float flScale, trace_t &tr ) { BONE_PROFILE_FUNC(); if ( !ray.m_IsRay ) { return SweepBoxToStudio( pProps, ray, pStudioHdr, set, hitboxbones, fContentsMask, tr ); }
tr.fraction = 1.0; tr.startsolid = false;
//
// We will collect trace results depending on hit group type of hitboxes
// and prefer to hit the hitboxes in order of damage.
//
enum EHitGroupType_t { k_EHitGroupType_Head, k_EHitGroupType_Stomach, k_EHitGroupType_Chest, k_EHitGroupType_Arms, k_EHitGroupType_General, k_EHitGroupType_Legs, k_EHitGroupType_Count };
struct HitGroupResult_t { trace_t m_trHitGroup; int m_nHitbox; // index of the hitbox hit, -1 if no it
int m_nHitSide; // hit side
};
// We'll collect results here, initialize to nothing hit
HitGroupResult_t arrHitGroupResults[ k_EHitGroupType_Count ]; for ( int j = 0; j < Q_ARRAYSIZE( arrHitGroupResults ); ++ j ) { Q_memcpy( &arrHitGroupResults[j].m_trHitGroup, &tr, sizeof( arrHitGroupResults[j].m_trHitGroup ) ); arrHitGroupResults[j].m_nHitbox = -1; arrHitGroupResults[j].m_nHitSide = -1; }
// OPTIMIZE: Partition these?
for ( int i = 0; i < set->numhitboxes; i++ ) { mstudiobbox_t *pbox = set->pHitbox(i);
// Filter based on contents mask
int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents; if ( ( fBoneContents & fContentsMask ) == 0 ) continue;
// Collect the results into appropriate hitgroup bucket
HitGroupResult_t *pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_General ]; switch ( pbox->group ) { case 1: pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Head ]; break; case 3: pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Stomach ]; break; case 2: pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Chest ]; break; case 4: case 5: pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Arms ]; break; case 6: case 7: pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Legs ]; break; } Assert( IsFinite( pHitGroupResult->m_trHitGroup.fraction ) );
// columns are axes of the bones in world space, translation is in world space
matrix3x4_t& matrix = *hitboxbones[pbox->bone];
// Because we're sending in a matrix with scale data, and because the matrix inversion in the hitbox
// code does not handle that case, we pre-scale the bones and ray down here and do our collision checks
// in unscaled space. We can then rescale the results afterwards.
int side = -1; if ( flScale < 1.0f-FLT_EPSILON || flScale > 1.0f+FLT_EPSILON ) { matrix3x4_t matScaled; MatrixCopy( matrix, matScaled );
matrix3x4_t matOrientation; AngleMatrix(pbox->angOffsetOrientation, matOrientation); MatrixMultiply(matScaled, matOrientation, matScaled);
float invScale = 1.0f / flScale;
Vector vecBoneOrigin; MatrixGetColumn( matScaled, 3, vecBoneOrigin );
// Pre-scale the origin down
Vector vecNewOrigin = vecBoneOrigin - vecOrigin; vecNewOrigin *= invScale; vecNewOrigin += vecOrigin; MatrixSetColumn( vecNewOrigin, 3, matScaled );
// Scale it uniformly
VectorScale( matScaled[0], invScale, matScaled[0] ); VectorScale( matScaled[1], invScale, matScaled[1] ); VectorScale( matScaled[2], invScale, matScaled[2] );
// Pre-scale our ray as well
Vector vecRayStart = ray.m_Start - vecOrigin; vecRayStart *= invScale; vecRayStart += vecOrigin;
Vector vecRayDelta = ray.m_Delta * invScale;
Ray_t newRay; newRay.Init( vecRayStart, vecRayStart + vecRayDelta );
side = ClipRayToHitbox( newRay, pbox, matScaled, pHitGroupResult->m_trHitGroup ); } else {
matrix3x4_t matCopy; MatrixCopy( matrix, matCopy );
matrix3x4_t matOrientation; AngleMatrix(pbox->angOffsetOrientation, matOrientation); MatrixMultiply(matCopy, matOrientation, matCopy);
side = ClipRayToHitbox( ray, pbox, matCopy, pHitGroupResult->m_trHitGroup ); } Assert( IsFinite( pHitGroupResult->m_trHitGroup.fraction ) );
if ( side >= 0 ) { pHitGroupResult->m_nHitbox = i; pHitGroupResult->m_nHitSide = side; } }
//
// Now based on bucketing hitbox group results determine which hitbox we will return
// and copy the trace results to the output parameter.
//
int hitbox = -1; int hitside = -1; // CSGO specific hitbox computation - characters' neck hitbox is classified as a headshot, but
// it deeply interpenetrates the chest. We don't want players shooting at the middle of the chest
// to register a headshot by penetrating into neck through chest or stomach, so if we have a
// headshot trace make sure that it doesn't occur by penetrating chest or stomach.
if ( arrHitGroupResults[k_EHitGroupType_Head].m_nHitbox >= 0 ) { // We have a potential headshot, check if it's penetrating via stomach or chest
for ( int j = k_EHitGroupType_Stomach; j <= k_EHitGroupType_Chest; ++ j ) { if ( arrHitGroupResults[j].m_trHitGroup.fraction < arrHitGroupResults[k_EHitGroupType_Head].m_trHitGroup.fraction ) { // The bullet first hit the stomach/chest hitbox, so ignore the headshot
arrHitGroupResults[k_EHitGroupType_Head].m_nHitbox = -1; break; } } } // Now pick the hitbox hit with the highest priority for damage
for ( int j = 0; j < Q_ARRAYSIZE( arrHitGroupResults ); ++ j ) { if ( arrHitGroupResults[j].m_nHitbox >= 0 ) { hitbox = arrHitGroupResults[j].m_nHitbox; hitside = arrHitGroupResults[j].m_nHitSide; Q_memcpy( &tr, &arrHitGroupResults[j].m_trHitGroup, sizeof( arrHitGroupResults[j].m_trHitGroup ) ); break; } }
if ( hitbox >= 0 ) { mstudiobbox_t *pbox = set->pHitbox(hitbox); VectorMA( ray.m_Start, tr.fraction, ray.m_Delta, tr.endpos ); tr.hitgroup = set->pHitbox(hitbox)->group; tr.hitbox = hitbox; const mstudiobone_t *pBone = pStudioHdr->pBone( pbox->bone ); tr.contents = pBone->contents | CONTENTS_HITBOX; tr.physicsbone = pBone->physicsbone; tr.surface.name = "**studio**"; tr.surface.flags = SURF_HITBOX; tr.surface.surfaceProps = pBone->GetSurfaceProp();
Assert( tr.physicsbone >= 0 ); matrix3x4_t& matrix = *hitboxbones[pbox->bone]; if ( hitside >= 3 ) { hitside -= 3; tr.plane.normal[0] = matrix[0][hitside]; tr.plane.normal[1] = matrix[1][hitside]; tr.plane.normal[2] = matrix[2][hitside]; //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) + pbox->bbmax[hitside];
} else { tr.plane.normal[0] = -matrix[0][hitside]; tr.plane.normal[1] = -matrix[1][hitside]; tr.plane.normal[2] = -matrix[2][hitside]; //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) - pbox->bbmin[hitside];
} // simpler plane constant equation
tr.plane.dist = DotProduct( tr.endpos, tr.plane.normal ); tr.plane.type = 3; return true; } return false; }
//-----------------------------------------------------------------------------
/**
* TERROR: Version of TraceToStudio that favors certain high-damage hitgroups such as the head */ bool TraceToStudioGrouped( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set, matrix3x4_t **hitboxbones, int fContentsMask, trace_t &tr, const CUtlVector< int > &sortedHitgroups ) { BONE_PROFILE_FUNC(); if ( !ray.m_IsRay ) { return SweepBoxToStudio( pProps, ray, pStudioHdr, set, hitboxbones, fContentsMask, tr ); }
tr.fraction = 1.0; tr.startsolid = false;
// no hit yet
int hitbox = -1; int hitside = -1;
for ( int n=0; n<sortedHitgroups.Count(); ++n ) { // OPTIMIZE: Partition these?
for ( int i = 0; i < set->numhitboxes; i++ ) { mstudiobbox_t *pbox = set->pHitbox(i); if ( pbox->group != sortedHitgroups[n] ) continue;
// Filter based on contents mask
int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents; if ( ( fBoneContents & fContentsMask ) == 0 ) continue;
// columns are axes of the bones in world space, translation is in world space
matrix3x4_t& matrix = *hitboxbones[pbox->bone];
int side = ClipRayToHitbox( ray, pbox, matrix, tr ); if ( side >= 0 ) { hitbox = i; hitside = side; } }
// If a high damage hitgroup was traced, stop here (ignore closer, lower-damage hitgroups)
if ( hitbox >= 0 ) { break; } }
if ( hitbox >= 0 ) { mstudiobbox_t *pbox = set->pHitbox(hitbox); VectorMA( ray.m_Start, tr.fraction, ray.m_Delta, tr.endpos ); tr.hitgroup = set->pHitbox(hitbox)->group; tr.hitbox = hitbox; const mstudiobone_t *pBone = pStudioHdr->pBone( pbox->bone ); tr.contents = pBone->contents | CONTENTS_HITBOX; tr.physicsbone = pBone->physicsbone; tr.surface.surfaceProps = pBone->GetSurfaceProp(); tr.surface.name = "**studio**"; tr.surface.flags = SURF_HITBOX; Assert( tr.physicsbone >= 0 ); matrix3x4_t& matrix = *hitboxbones[pbox->bone]; if ( hitside >= 3 ) { hitside -= 3; tr.plane.normal[0] = matrix[0][hitside]; tr.plane.normal[1] = matrix[1][hitside]; tr.plane.normal[2] = matrix[2][hitside]; //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) + pbox->bbmax[hitside];
} else { tr.plane.normal[0] = -matrix[0][hitside]; tr.plane.normal[1] = -matrix[1][hitside]; tr.plane.normal[2] = -matrix[2][hitside]; //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) - pbox->bbmin[hitside];
} // simpler plane constant equation
tr.plane.dist = DotProduct( tr.endpos, tr.plane.normal ); tr.plane.type = 3; return true; } return false; }
//-----------------------------------------------------------------------------
// Purpose: returns array of animations and weightings for a sequence based on current pose parameters
//-----------------------------------------------------------------------------
void Studio_SeqAnims( const CStudioHdr *pStudioHdr, mstudioseqdesc_t &seqdesc, int iSequence, const float poseParameter[], mstudioanimdesc_t *panim[4], float *weight ) { BONE_PROFILE_FUNC(); #if _DEBUG
VPROF_INCREMENT_COUNTER("SEQ_ANIMS",1); #endif
if (!pStudioHdr || iSequence >= pStudioHdr->GetNumSeq()) { weight[0] = weight[1] = weight[2] = weight[3] = 0.0; return; }
float s0 = 0, s1 = 0; int i0 = Studio_LocalPoseParameter( pStudioHdr, poseParameter, seqdesc, iSequence, 0, s0 ); int i1 = Studio_LocalPoseParameter( pStudioHdr, poseParameter, seqdesc, iSequence, 1, s1 );
panim[0] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0 , i1 ) ) ); weight[0] = (1 - s0) * (1 - s1);
panim[1] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0+1, i1 ) ) ); weight[1] = (s0) * (1 - s1);
panim[2] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0 , i1+1 ) ) ); weight[2] = (1 - s0) * (s1);
panim[3] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0+1, i1+1 ) ) ); weight[3] = (s0) * (s1);
Assert( weight[0] >= 0.0f && weight[1] >= 0.0f && weight[2] >= 0.0f && weight[3] >= 0.0f ); }
//-----------------------------------------------------------------------------
// Purpose: returns max frame number for a sequence
//-----------------------------------------------------------------------------
int Studio_MaxFrame( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[] ) { mstudioanimdesc_t *panim[4]; float weight[4];
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ); Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
float maxFrame = 0; for (int i = 0; i < 4; i++) { if (weight[i] > 0) { maxFrame += panim[i]->numframes * weight[i]; } }
if ( maxFrame > 1 ) maxFrame -= 1;
// FIXME: why does the weights sometimes not exactly add it 1.0 and this sometimes rounds down?
return (maxFrame + 0.01); }
//-----------------------------------------------------------------------------
// Purpose: returns frames per second of a sequence
//-----------------------------------------------------------------------------
float Studio_FPS( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[] ) { mstudioanimdesc_t *panim[4]; float weight[4];
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ); Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
float t = 0;
for (int i = 0; i < 4; i++) { if (weight[i] > 0) { t += panim[i]->fps * weight[i]; } } return t; }
//-----------------------------------------------------------------------------
// Purpose: returns cycles per second of a sequence (cycles/second)
//-----------------------------------------------------------------------------
float Studio_CPS( const CStudioHdr *pStudioHdr, mstudioseqdesc_t &seqdesc, int iSequence, const float poseParameter[] ) { mstudioanimdesc_t *panim[4]; float weight[4];
Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
float t = 0;
for (int i = 0; i < 4; i++) { if (weight[i] > 0 && panim[i]->numframes > 1) { t += (panim[i]->fps / (panim[i]->numframes - 1)) * weight[i]; } }
// FIXME: add support for more than just start 0 and end 0 pose param layers
for (int j = 0; j < seqdesc.numautolayers; j++) { mstudioautolayer_t *pLayer = seqdesc.pAutolayer( j );
if (pLayer->flags & STUDIO_AL_LOCAL) continue;
float layerWeight = 0;
int iSequenceLocal = pStudioHdr->iRelativeSeq( iSequence, pLayer->iSequence );
if ( pLayer->start == 0 && pLayer->end == 0 && (pLayer->flags & STUDIO_AL_POSE) ) { int iPose = pStudioHdr->GetSharedPoseParameter( iSequenceLocal, pLayer->iPose ); if (iPose == -1) continue; const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iPose ); float s = poseParameter[ iPose ] * (Pose.end - Pose.start) + Pose.start;
Assert( (pLayer->tail - pLayer->peak) != 0 );
s = clamp( (s - pLayer->peak) / (pLayer->tail - pLayer->peak), 0, 1 );
if (pLayer->flags & STUDIO_AL_SPLINE) { s = SimpleSpline( s ); }
layerWeight = seqdesc.weight(0) * s; }
if ( layerWeight ) { mstudioseqdesc_t &seqdescLocal = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequenceLocal ); Studio_SeqAnims( pStudioHdr, seqdescLocal, iSequenceLocal, poseParameter, panim, weight );
float flLocalT = 0;
for (int i = 0; i < 4; i++) { if (weight[i] > 0 && panim[i]->numframes > 1) { flLocalT += (panim[i]->fps / (panim[i]->numframes - 1)) * weight[i]; } }
if ( flLocalT ) { t = Lerp( layerWeight, t, flLocalT ); } } }
return t; }
//-----------------------------------------------------------------------------
// Purpose: returns length (in seconds) of a sequence (seconds/cycle)
//-----------------------------------------------------------------------------
float Studio_Duration( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[] ) { mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ); float cps = Studio_CPS( pStudioHdr, seqdesc, iSequence, poseParameter );
if( cps == 0 ) return 0.0f;
return 1.0f/cps; }
//-----------------------------------------------------------------------------
// Purpose: calculate changes in position and angle relative to the start of an animations cycle
// Output: updated position and angle, relative to the origin
// returns false if animation is not a movement animation
//-----------------------------------------------------------------------------
bool Studio_AnimPosition( mstudioanimdesc_t *panim, float flCycle, Vector &vecPos, QAngle &vecAngle ) { BONE_PROFILE_FUNC(); float prevframe = 0; vecPos.Init( ); vecAngle.Init( );
if (panim->nummovements == 0) return false;
int iLoops = 0; if (flCycle > 1.0) { iLoops = (int)flCycle; } else if (flCycle < 0.0) { iLoops = (int)flCycle - 1; } flCycle = flCycle - iLoops;
float flFrame = flCycle * (panim->numframes - 1);
for (int i = 0; i < panim->nummovements; i++) { mstudiomovement_t *pmove = panim->pMovement( i );
if (pmove->endframe >= flFrame) { float f = (flFrame - prevframe) / (pmove->endframe - prevframe);
float d = pmove->v0 * f + 0.5 * (pmove->v1 - pmove->v0) * f * f;
vecPos = vecPos + d * pmove->vector; vecAngle.y = vecAngle.y * (1 - f) + pmove->angle * f; if (iLoops != 0) { mstudiomovement_t *pmove = panim->pMovement( panim->nummovements - 1 ); vecPos = vecPos + iLoops * pmove->position; vecAngle.y = vecAngle.y + iLoops * pmove->angle; } return true; } else { prevframe = pmove->endframe; vecPos = pmove->position; vecAngle.y = pmove->angle; } }
return false; }
//-----------------------------------------------------------------------------
// Purpose: calculate instantaneous velocity in ips at a given point
// in the animations cycle
// Output: velocity vector, relative to identity orientation
// returns false if animation is not a movement animation
//-----------------------------------------------------------------------------
bool Studio_AnimVelocity( mstudioanimdesc_t *panim, float flCycle, Vector &vecVelocity ) { float prevframe = 0;
float flFrame = flCycle * (panim->numframes - 1); flFrame = flFrame - (int)(flFrame / (panim->numframes - 1));
for (int i = 0; i < panim->nummovements; i++) { mstudiomovement_t *pmove = panim->pMovement( i );
if (pmove->endframe >= flFrame) { float f = (flFrame - prevframe) / (pmove->endframe - prevframe);
float vel = pmove->v0 * (1 - f) + pmove->v1 * f; // scale from per block to per sec velocity
vel = vel * panim->fps / (pmove->endframe - prevframe);
vecVelocity = pmove->vector * vel; return true; } else { prevframe = pmove->endframe; } } return false; }
//-----------------------------------------------------------------------------
// Purpose: calculate changes in position and angle between two points in an animation cycle
// Output: updated position and angle, relative to CycleFrom being at the origin
// returns false if animation is not a movement animation
//-----------------------------------------------------------------------------
bool Studio_AnimMovement( mstudioanimdesc_t *panim, float flCycleFrom, float flCycleTo, Vector &deltaPos, QAngle &deltaAngle ) { if (panim->nummovements == 0) return false;
Vector startPos; QAngle startA; Studio_AnimPosition( panim, flCycleFrom, startPos, startA );
Vector endPos; QAngle endA; Studio_AnimPosition( panim, flCycleTo, endPos, endA );
Vector tmp = endPos - startPos; deltaAngle.y = endA.y - startA.y; VectorYawRotate( tmp, -startA.y, deltaPos );
return true; }
//-----------------------------------------------------------------------------
// Purpose: finds how much of an animation to play to move given linear distance
//-----------------------------------------------------------------------------
float Studio_FindAnimDistance( mstudioanimdesc_t *panim, float flDist ) { float prevframe = 0;
if (flDist <= 0) return 0.0;
for (int i = 0; i < panim->nummovements; i++) { mstudiomovement_t *pmove = panim->pMovement( i );
float flMove = (pmove->v0 + pmove->v1) * 0.5;
if (flMove >= flDist) { float root1, root2;
// d = V0 * t + 1/2 (V1-V0) * t^2
if (SolveQuadratic( 0.5 * (pmove->v1 - pmove->v0), pmove->v0, -flDist, root1, root2 )) { float cpf = 1.0 / (panim->numframes - 1); // cycles per frame
return (prevframe + root1 * (pmove->endframe - prevframe)) * cpf; } return 0.0; } else { flDist -= flMove; prevframe = pmove->endframe; } } return 1.0; }
//-----------------------------------------------------------------------------
// Purpose: calculate changes in position and angle between two points in a sequences cycle
// Output: updated position and angle, relative to CycleFrom being at the origin
// returns false if sequence is not a movement sequence
//-----------------------------------------------------------------------------
bool Studio_SeqMovement( const CStudioHdr *pStudioHdr, int iSequence, float flCycleFrom, float flCycleTo, const float poseParameter[], Vector &deltaPos, QAngle &deltaAngles ) { mstudioanimdesc_t *panim[4]; float weight[4];
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight ); deltaPos.Init( ); deltaAngles.Init( );
bool found = false;
for (int i = 0; i < 4; i++) { if (weight[i]) { Vector localPos; QAngle localAngles;
localPos.Init(); localAngles.Init();
if (Studio_AnimMovement( panim[i], flCycleFrom, flCycleTo, localPos, localAngles )) { found = true; deltaPos = deltaPos + localPos * weight[i]; // FIXME: this makes no sense
deltaAngles = deltaAngles + localAngles * weight[i]; } else if (!(panim[i]->flags & STUDIO_DELTA) && panim[i]->nummovements == 0 && seqdesc.weight(0) > 0.0) { found = true; } } }
// FIXME: add support for more than just start 0 and end 0 pose param layers (currently no cycle handling or angular delta)
for (int j = 0; j < seqdesc.numautolayers; j++) { mstudioautolayer_t *pLayer = seqdesc.pAutolayer( j );
if (pLayer->flags & STUDIO_AL_LOCAL) continue;
float layerWeight = 0;
int iSequenceLocal = pStudioHdr->iRelativeSeq( iSequence, pLayer->iSequence );
if ( pLayer->start == 0 && pLayer->end == 0 && (pLayer->flags & STUDIO_AL_POSE) ) { int iPose = pStudioHdr->GetSharedPoseParameter( iSequenceLocal, pLayer->iPose ); if (iPose == -1) continue; const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iPose ); float s = poseParameter[ iPose ] * (Pose.end - Pose.start) + Pose.start;
Assert( (pLayer->tail - pLayer->peak) != 0 );
s = clamp( (s - pLayer->peak) / (pLayer->tail - pLayer->peak), 0, 1 );
if (pLayer->flags & STUDIO_AL_SPLINE) { s = SimpleSpline( s ); }
layerWeight = seqdesc.weight(0) * s; }
if ( layerWeight ) { Vector layerPos; //QAngle layerAngles;
layerPos.Init(); //layerAngles.Init();
bool bLayerFound = false;
mstudioseqdesc_t &seqdescLocal = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequenceLocal ); Studio_SeqAnims( pStudioHdr, seqdescLocal, iSequenceLocal, poseParameter, panim, weight );
for (int i = 0; i < 4; i++) { if (weight[i]) { Vector localPos; QAngle localAngles;
localPos.Init(); //localAngles.Init();
if ( Studio_AnimMovement( panim[i], flCycleFrom, flCycleTo, localPos, localAngles ) ) { bLayerFound = true; layerPos = layerPos + localPos * weight[i]; // FIXME: do angles
//layerAngles = layerAngles + localAngles * weight[i];
} } }
if ( bLayerFound ) { deltaPos = Lerp( layerWeight, deltaPos, layerPos ); } } }
return found; }
//-----------------------------------------------------------------------------
// Purpose: calculate changes in position and angle between two points in a sequences cycle
// Output: updated position and angle, relative to CycleFrom being at the origin
// returns false if sequence is not a movement sequence
//-----------------------------------------------------------------------------
float Studio_SeqMovementAndDuration( const CStudioHdr *pStudioHdr, int iSequence, float flCycleFrom, float flCycleTo, const float poseParameter[], Vector &deltaPos ) { mstudioanimdesc_t *panim[4]; float weight[4];
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
deltaPos.Init( );
Vector localPos; QAngle localAngles;
float t = 0; for ( int i = 0; i < 4; i++ ) { if ( weight[i] == 0.0f ) continue;
if ( panim[i]->numframes > 1 ) { t += ( panim[i]->fps / ( panim[i]->numframes - 1 ) ) * weight[i]; }
if ( Studio_AnimMovement( panim[i], flCycleFrom, flCycleTo, localPos, localAngles ) ) { VectorMA( deltaPos, weight[i], localPos, deltaPos ); } } return ( t != 0.0f ) ? 1.0f / t : 0.0f; }
//-----------------------------------------------------------------------------
// Purpose: calculate instantaneous velocity in ips at a given point in the sequence's cycle
// Output: velocity vector, relative to identity orientation
// returns false if sequence is not a movement sequence
//-----------------------------------------------------------------------------
bool Studio_SeqVelocity( const CStudioHdr *pStudioHdr, int iSequence, float flCycle, const float poseParameter[], Vector &vecVelocity ) { mstudioanimdesc_t *panim[4]; float weight[4];
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ); Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight ); vecVelocity.Init( );
bool found = false;
for (int i = 0; i < 4; i++) { if (weight[i]) { Vector vecLocalVelocity;
if (Studio_AnimVelocity( panim[i], flCycle, vecLocalVelocity )) { vecVelocity = vecVelocity + vecLocalVelocity * weight[i]; found = true; } } } return found; }
//-----------------------------------------------------------------------------
// Purpose: finds how much of an sequence to play to move given linear distance
//-----------------------------------------------------------------------------
float Studio_FindSeqDistance( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[], float flDist ) { mstudioanimdesc_t *panim[4]; float weight[4];
mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ); Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight ); float flCycle = 0;
for (int i = 0; i < 4; i++) { if (weight[i]) { float flLocalCycle = Studio_FindAnimDistance( panim[i], flDist ); flCycle = flCycle + flLocalCycle * weight[i]; } } return flCycle; }
//-----------------------------------------------------------------------------
// Purpose: lookup attachment by name
//-----------------------------------------------------------------------------
int Studio_FindAttachment( const CStudioHdr *pStudioHdr, const char *pAttachmentName ) { if ( pStudioHdr && pStudioHdr->SequencesAvailable() ) { // Extract the bone index from the name
for (int i = 0; i < pStudioHdr->GetNumAttachments(); i++) { if (!stricmp(pAttachmentName,((CStudioHdr *)pStudioHdr)->pAttachment(i).pszName( ))) { return i; } } }
return -1; }
//-----------------------------------------------------------------------------
// Purpose: lookup attachments by substring. Randomly return one of the matching attachments.
//-----------------------------------------------------------------------------
int Studio_FindRandomAttachment( const CStudioHdr *pStudioHdr, const char *pAttachmentName ) { if ( pStudioHdr ) { // First move them all matching attachments into a list
CUtlVector<int> matchingAttachments;
// Extract the bone index from the name
for (int i = 0; i < pStudioHdr->GetNumAttachments(); i++) { if ( strstr( ((CStudioHdr *)pStudioHdr)->pAttachment(i).pszName(), pAttachmentName ) ) { matchingAttachments.AddToTail(i); } }
// Then randomly return one of the attachments
if ( matchingAttachments.Count() > 0 ) return matchingAttachments[ RandomInt( 0, matchingAttachments.Count()-1 ) ]; }
return -1; }
//-----------------------------------------------------------------------------
// Purpose: lookup bone by name
//-----------------------------------------------------------------------------
int Studio_BoneIndexByName( const CStudioHdr *pStudioHdr, const char *pName ) { // binary search for the bone matching pName
int start = 0, end = pStudioHdr->numbones()-1; const byte *pBoneTable = pStudioHdr->GetBoneTableSortedByName(); const mstudiobone_t *pbones = pStudioHdr->pBone( 0 ); while (start <= end) { int mid = (start + end) >> 1; int cmp = Q_stricmp( pbones[pBoneTable[mid]].pszName(), pName ); if ( cmp < 0 ) { start = mid + 1; } else if ( cmp > 0 ) { end = mid - 1; } else { return pBoneTable[mid]; } } return -1; }
const char *Studio_GetDefaultSurfaceProps( CStudioHdr *pstudiohdr ) { return pstudiohdr->pszSurfaceProp(); }
float Studio_GetMass( CStudioHdr *pstudiohdr ) { return pstudiohdr->mass(); }
//-----------------------------------------------------------------------------
// Purpose: return pointer to sequence key value buffer
//-----------------------------------------------------------------------------
const char *Studio_GetKeyValueText( const CStudioHdr *pStudioHdr, int iSequence ) { if (pStudioHdr && pStudioHdr->SequencesAvailable()) { if (iSequence >= 0 && iSequence < pStudioHdr->GetNumSeq()) { return ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ).KeyValueText(); } } return NULL; }
bool Studio_PrefetchSequence( const CStudioHdr *pStudioHdr, int iSequence ) { bool pendingload = false; mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ); int size0 = seqdesc.groupsize[ 0 ]; int size1 = seqdesc.groupsize[ 1 ]; for ( int i = 0; i < size0; ++i ) { for ( int j = 0; j < size1; ++j ) { mstudioanimdesc_t &animdesc = ((CStudioHdr *)pStudioHdr)->pAnimdesc( seqdesc.anim( i, j ) ); int iFrame = 0; byte *panim = animdesc.pAnim( &iFrame ); if ( !panim ) { pendingload = true; } } }
// Everything for this sequence is resident?
return !pendingload; }
//-----------------------------------------------------------------------------
// Purpose: Drive a flex controller from a component of a bone
//-----------------------------------------------------------------------------
void Studio_RunBoneFlexDrivers( float *pflFlexControllerWeights, const CStudioHdr *pStudioHdr, const Vector *pvPositions, const matrix3x4_t *pBoneToWorld, const matrix3x4_t &mRootToWorld ) { bool bRootToWorldInvComputed = false; matrix3x4_t mRootToWorldInv; matrix3x4_t mParentInv; matrix3x4_t mBoneLocal;
const int nBoneFlexDriverCount = pStudioHdr->BoneFlexDriverCount();
for ( int i = 0; i < nBoneFlexDriverCount; ++i ) { const mstudioboneflexdriver_t *pBoneFlexDriver = pStudioHdr->BoneFlexDriver( i ); const mstudiobone_t *pStudioBone = pStudioHdr->pBone( pBoneFlexDriver->m_nBoneIndex );
const int nControllerCount = pBoneFlexDriver->m_nControlCount;
if ( pStudioBone->flags & BONE_USED_BY_BONE_MERGE ) { // The local space version of the bone is not available if this is a bonemerged bone
// so do the slow computation of the local version of the bone from boneToWorld
if ( pStudioBone->parent < 0 ) { if ( !bRootToWorldInvComputed ) { MatrixInvert( mRootToWorld, mRootToWorldInv ); bRootToWorldInvComputed = true; }
MatrixMultiply( mRootToWorldInv, pBoneToWorld[ pBoneFlexDriver->m_nBoneIndex ], mBoneLocal ); } else { MatrixInvert( pBoneToWorld[ pStudioBone->parent ], mParentInv ); MatrixMultiply( mParentInv, pBoneToWorld[ pBoneFlexDriver->m_nBoneIndex ], mBoneLocal ); }
for ( int j = 0; j < nControllerCount; ++j ) { const mstudioboneflexdrivercontrol_t *pController = pBoneFlexDriver->pBoneFlexDriverControl( j ); const mstudioflexcontroller_t *pFlexController = pStudioHdr->pFlexcontroller( static_cast< LocalFlexController_t >( pController->m_nFlexControllerIndex ) );
if ( pFlexController->localToGlobal < 0 ) continue;
Assert( pController->m_nFlexControllerIndex >= 0 && pController->m_nFlexControllerIndex < pStudioHdr->numflexcontrollers() ); Assert( pController->m_nBoneComponent >= 0 && pController->m_nBoneComponent <= 2 ); pflFlexControllerWeights[pFlexController->localToGlobal] = RemapValClamped( mBoneLocal[pController->m_nBoneComponent][3], pController->m_flMin, pController->m_flMax, 0.0f, 1.0f ); } } else { // Use the local space version of the bone directly for non-bonemerged bones
const Vector &position = pvPositions[ pBoneFlexDriver->m_nBoneIndex ];
for ( int j = 0; j < nControllerCount; ++j ) { const mstudioboneflexdrivercontrol_t *pController = pBoneFlexDriver->pBoneFlexDriverControl( j ); const mstudioflexcontroller_t *pFlexController = pStudioHdr->pFlexcontroller( static_cast< LocalFlexController_t >( pController->m_nFlexControllerIndex ) );
if ( pFlexController->localToGlobal < 0 ) continue;
Assert( pController->m_nFlexControllerIndex >= 0 && pController->m_nFlexControllerIndex < pStudioHdr->numflexcontrollers() ); Assert( pController->m_nBoneComponent >= 0 && pController->m_nBoneComponent <= 2 ); pflFlexControllerWeights[pFlexController->localToGlobal] = RemapValClamped( position[pController->m_nBoneComponent], pController->m_flMin, pController->m_flMax, 0.0f, 1.0f ); } } } }
|