Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3745 lines
113 KiB

  1. //===== Copyright � 1996-2005, Valve Corporation, All rights reserved. ======//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //===========================================================================//
  8. #include "tier0/dbg.h"
  9. #include "mathlib/mathlib.h"
  10. #include "bone_setup.h"
  11. #include <string.h>
  12. #ifdef POSIX
  13. #define _rotl(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  14. #endif
  15. #include "collisionutils.h"
  16. #include "vstdlib/random.h"
  17. #include "tier0/vprof.h"
  18. #include "bone_accessor.h"
  19. #include "mathlib/ssequaternion.h"
  20. #include "bitvec.h"
  21. #include "datamanager.h"
  22. #include "convar.h"
  23. #include "tier0/tslist.h"
  24. #include "vphysics_interface.h"
  25. #include "datacache/idatacache.h"
  26. #include "mathlib/capsule.h"
  27. #include "tier0/miniprofiler.h"
  28. #ifdef CLIENT_DLL
  29. #include "posedebugger.h"
  30. #endif
  31. #include "engine/ivdebugoverlay.h"
  32. #include "bone_utils.h"
  33. // memdbgon must be the last include file in a .cpp file!!!
  34. #include "tier0/memdbgon.h"
  35. // -----------------------------------------------------------------
  36. CBoneSetupMemoryPool<BoneQuaternionAligned> g_QuaternionPool;
  37. CBoneSetupMemoryPool<BoneVector> g_VectorPool;
  38. CBoneSetupMemoryPool<matrix3x4a_t> g_MatrixPool;
  39. // -----------------------------------------------------------------
  40. CBoneCache *CBoneCache::CreateResource( const bonecacheparams_t &params )
  41. {
  42. BONE_PROFILE_FUNC();
  43. short studioToCachedIndex[MAXSTUDIOBONES];
  44. short cachedToStudioIndex[MAXSTUDIOBONES];
  45. int cachedBoneCount = 0;
  46. for ( int i = 0; i < params.pStudioHdr->numbones(); i++ )
  47. {
  48. // skip bones that aren't part of the boneMask (and aren't the root bone)
  49. if (i != 0 && !(params.pStudioHdr->boneFlags(i) & params.boneMask))
  50. {
  51. studioToCachedIndex[i] = -1;
  52. continue;
  53. }
  54. studioToCachedIndex[i] = cachedBoneCount;
  55. cachedToStudioIndex[cachedBoneCount] = i;
  56. cachedBoneCount++;
  57. }
  58. int tableSizeStudio = sizeof(short) * params.pStudioHdr->numbones();
  59. int tableSizeCached = sizeof(short) * cachedBoneCount;
  60. int matrixSize = sizeof(matrix3x4_t) * cachedBoneCount;
  61. size_t size = AlignValue( sizeof(CBoneCache) + tableSizeStudio + tableSizeCached, 16 ) + matrixSize;
  62. CBoneCache *pMem = (CBoneCache *)MemAlloc_AllocAligned( size, 16 );
  63. Construct( pMem );
  64. Assert( size == ( uint )size ); // make sure we're not trimming the int in 64bit
  65. pMem->Init( params, size, studioToCachedIndex, cachedToStudioIndex, cachedBoneCount );
  66. return pMem;
  67. }
  68. unsigned int CBoneCache::EstimatedSize( const bonecacheparams_t &params )
  69. {
  70. // conservative estimate - max size
  71. return ( params.pStudioHdr->numbones() * (sizeof(short) + sizeof(short) + sizeof(matrix3x4_t)) + 3 ) & ~3;
  72. }
  73. void CBoneCache::DestroyResource()
  74. {
  75. MemAlloc_FreeAligned( this );
  76. }
  77. CBoneCache::CBoneCache()
  78. {
  79. m_size = 0;
  80. m_cachedBoneCount = 0;
  81. }
  82. void CBoneCache::Init( const bonecacheparams_t &params, unsigned int size, short *pStudioToCached, short *pCachedToStudio, int cachedBoneCount )
  83. {
  84. BONE_PROFILE_FUNC();
  85. m_cachedBoneCount = cachedBoneCount;
  86. m_size = size;
  87. m_timeValid = params.curtime;
  88. m_boneMask = params.boneMask;
  89. int studioTableSize = params.pStudioHdr->numbones() * sizeof(short);
  90. m_cachedToStudioOffset = studioTableSize;
  91. memcpy( StudioToCached(), pStudioToCached, studioTableSize );
  92. int cachedTableSize = cachedBoneCount * sizeof(short);
  93. memcpy( CachedToStudio(), pCachedToStudio, cachedTableSize );
  94. m_matrixOffset = AlignValue( sizeof(CBoneCache) + m_cachedToStudioOffset + cachedTableSize, 16 );
  95. UpdateBones( params.pBoneToWorld, params.pStudioHdr->numbones(), params.curtime );
  96. }
  97. void CBoneCache::UpdateBones( const matrix3x4a_t *pBoneToWorld, int numbones, float curtime )
  98. {
  99. BONE_PROFILE_FUNC();
  100. matrix3x4a_t *pBones = BoneArray();
  101. const short *pCachedToStudio = CachedToStudio();
  102. for ( int i = 0; i < m_cachedBoneCount; i++ )
  103. {
  104. int index = pCachedToStudio[i];
  105. //MatrixCopy( pBoneToWorld[index], pBones[i] );
  106. const float *pInput = pBoneToWorld[index].Base();
  107. float *pOutput = pBones[i].Base();
  108. fltx4 fl4Tmp0 = LoadAlignedSIMD( pInput );
  109. StoreAlignedSIMD( pOutput, fl4Tmp0 );
  110. fltx4 fl4Tmp1 = LoadAlignedSIMD( pInput + 4 );
  111. StoreAlignedSIMD( pOutput+4, fl4Tmp1 );
  112. fltx4 fl4Tmp2 = LoadAlignedSIMD( pInput + 8 );
  113. StoreAlignedSIMD( pOutput+8, fl4Tmp2 );
  114. }
  115. m_timeValid = curtime;
  116. }
  117. matrix3x4a_t *CBoneCache::GetCachedBone( int studioIndex )
  118. {
  119. BONE_PROFILE_FUNC();
  120. int cachedIndex = StudioToCached()[studioIndex];
  121. if ( cachedIndex >= 0 )
  122. {
  123. return BoneArray() + cachedIndex;
  124. }
  125. return NULL;
  126. }
  127. void CBoneCache::ReadCachedBones( matrix3x4a_t *pBoneToWorld )
  128. {
  129. BONE_PROFILE_FUNC();
  130. matrix3x4a_t *pBones = BoneArray();
  131. const short *pCachedToStudio = CachedToStudio();
  132. for ( int i = 0; i < m_cachedBoneCount; i++ )
  133. {
  134. //MatrixCopy( pBones[i], pBoneToWorld[pCachedToStudio[i]] );
  135. const float *pInput = pBones[i].Base();
  136. float *pOutput = pBoneToWorld[pCachedToStudio[i]].Base();
  137. fltx4 fl4Tmp0 = LoadAlignedSIMD( pInput );
  138. StoreAlignedSIMD( pOutput, fl4Tmp0 );
  139. fltx4 fl4Tmp1 = LoadAlignedSIMD( pInput + 4 );
  140. StoreAlignedSIMD( pOutput+4, fl4Tmp1 );
  141. fltx4 fl4Tmp2 = LoadAlignedSIMD( pInput + 8 );
  142. StoreAlignedSIMD( pOutput+8, fl4Tmp2 );
  143. }
  144. }
  145. void CBoneCache::ReadCachedBonePointers( matrix3x4_t **bones, int numbones )
  146. {
  147. BONE_PROFILE_FUNC();
  148. memset( bones, 0, sizeof(matrix3x4_t *) * numbones );
  149. matrix3x4a_t *pBones = BoneArray();
  150. const short *pCachedToStudio = CachedToStudio();
  151. for ( int i = 0; i < m_cachedBoneCount; i++ )
  152. {
  153. bones[pCachedToStudio[i]] = pBones + i;
  154. }
  155. }
  156. bool CBoneCache::IsValid( float curtime, float dt )
  157. {
  158. if ( curtime - m_timeValid <= dt )
  159. return true;
  160. return false;
  161. }
  162. // private functions
  163. matrix3x4a_t *CBoneCache::BoneArray()
  164. {
  165. return (matrix3x4a_t *)( (byte *)(this) + m_matrixOffset );
  166. }
  167. short *CBoneCache::StudioToCached()
  168. {
  169. return (short *)( (char *)(this+1) );
  170. }
  171. short *CBoneCache::CachedToStudio()
  172. {
  173. return (short *)( (char *)(this+1) + m_cachedToStudioOffset );
  174. }
  175. // Construct a singleton
  176. static CDataManager<CBoneCache, bonecacheparams_t, CBoneCache *, CThreadFastMutex> g_StudioBoneCache( 128 * 1024L );
  177. void Studio_LockBoneCache()
  178. {
  179. g_StudioBoneCache.AccessMutex().Lock();
  180. }
  181. void Studio_UnlockBoneCache()
  182. {
  183. g_StudioBoneCache.AccessMutex().Unlock();
  184. }
  185. CBoneCache *Studio_GetBoneCache( memhandle_t cacheHandle, bool bLock )
  186. {
  187. AUTO_LOCK( g_StudioBoneCache.AccessMutex() );
  188. if ( !bLock )
  189. {
  190. return g_StudioBoneCache.GetResource_NoLock( cacheHandle );
  191. }
  192. else
  193. {
  194. return g_StudioBoneCache.LockResource( cacheHandle );
  195. }
  196. }
  197. void Studio_ReleaseBoneCache( memhandle_t cacheHandle )
  198. {
  199. g_StudioBoneCache.UnlockResource( cacheHandle );
  200. g_StudioBoneCache.FlushToTargetSize();
  201. }
  202. memhandle_t Studio_CreateBoneCache( bonecacheparams_t &params )
  203. {
  204. AUTO_LOCK( g_StudioBoneCache.AccessMutex() );
  205. return g_StudioBoneCache.CreateResource( params );
  206. }
  207. void Studio_DestroyBoneCache( memhandle_t cacheHandle )
  208. {
  209. AUTO_LOCK( g_StudioBoneCache.AccessMutex() );
  210. g_StudioBoneCache.DestroyResource( cacheHandle );
  211. }
  212. void Studio_InvalidateBoneCacheIfNotMatching( memhandle_t cacheHandle, float flTimeValid )
  213. {
  214. AUTO_LOCK( g_StudioBoneCache.AccessMutex() );
  215. CBoneCache *pCache = g_StudioBoneCache.GetResource_NoLock( cacheHandle );
  216. if ( pCache && pCache->m_timeValid != flTimeValid )
  217. {
  218. pCache->m_timeValid = -1.0f;
  219. }
  220. }
  221. //-----------------------------------------------------------------------------
  222. // Purpose:
  223. //-----------------------------------------------------------------------------
  224. void BuildBoneChain(
  225. const CStudioHdr *pStudioHdr,
  226. const matrix3x4a_t &rootxform,
  227. const BoneVector pos[],
  228. const BoneQuaternion q[],
  229. int iBone,
  230. matrix3x4a_t *pBoneToWorld )
  231. {
  232. CBoneBitList boneComputed;
  233. BuildBoneChainPartial( pStudioHdr, rootxform, pos, q, iBone, pBoneToWorld, boneComputed, -1 );
  234. return;
  235. }
  236. //-----------------------------------------------------------------------------
  237. // Purpose: build boneToWorld transforms for a specific bone
  238. //-----------------------------------------------------------------------------
  239. void BuildBoneChain(
  240. const CStudioHdr *pStudioHdr,
  241. const matrix3x4a_t &rootxform,
  242. const BoneVector pos[],
  243. const BoneQuaternion q[],
  244. int iBone,
  245. matrix3x4a_t *pBoneToWorld,
  246. CBoneBitList &boneComputed )
  247. {
  248. BuildBoneChainPartial( pStudioHdr, rootxform, pos, q, iBone, pBoneToWorld, boneComputed, -1 );
  249. }
  250. void BuildBoneChainPartial(
  251. const CStudioHdr *pStudioHdr,
  252. const matrix3x4_t &rootxform,
  253. const BoneVector pos[],
  254. const BoneQuaternion q[],
  255. int iBone,
  256. matrix3x4_t *pBoneToWorld,
  257. CBoneBitList &boneComputed,
  258. int iRoot )
  259. {
  260. if ( boneComputed.IsBoneMarked(iBone) )
  261. return;
  262. matrix3x4_t bonematrix;
  263. QuaternionMatrix( q[iBone], pos[iBone], bonematrix );
  264. int parent = pStudioHdr->boneParent( iBone );
  265. if (parent == -1 || iBone == iRoot)
  266. {
  267. ConcatTransforms( rootxform, bonematrix, pBoneToWorld[iBone] );
  268. }
  269. else
  270. {
  271. // evil recursive!!!
  272. BuildBoneChainPartial( pStudioHdr, rootxform, pos, q, parent, pBoneToWorld, boneComputed, iRoot );
  273. ConcatTransforms( pBoneToWorld[parent], bonematrix, pBoneToWorld[iBone]);
  274. }
  275. boneComputed.MarkBone(iBone);
  276. }
  277. //-----------------------------------------------------------------------------
  278. // Purpose: qt = ( s * p ) * q
  279. //-----------------------------------------------------------------------------
  280. void QuaternionSM( float s, const Quaternion &p, const Quaternion &q, Quaternion &qt )
  281. {
  282. Quaternion p1, q1;
  283. QuaternionScale( p, s, p1 );
  284. QuaternionMult( p1, q, q1 );
  285. QuaternionNormalize( q1 );
  286. qt[0] = q1[0];
  287. qt[1] = q1[1];
  288. qt[2] = q1[2];
  289. qt[3] = q1[3];
  290. }
  291. #if ALLOW_SIMD_QUATERNION_MATH
  292. FORCEINLINE fltx4 QuaternionSMSIMD( const fltx4 &s, const fltx4 &p, const fltx4 &q )
  293. {
  294. fltx4 p1, q1, result;
  295. p1 = QuaternionScaleSIMD( p, s );
  296. q1 = QuaternionMultSIMD( p1, q );
  297. result = QuaternionNormalizeSIMD( q1 );
  298. return result;
  299. }
  300. FORCEINLINE fltx4 QuaternionSMSIMD( float s, const fltx4 &p, const fltx4 &q )
  301. {
  302. return QuaternionSMSIMD( ReplicateX4(s), p, q );
  303. }
  304. #endif
  305. //-----------------------------------------------------------------------------
  306. // Purpose: qt = p * ( s * q )
  307. //-----------------------------------------------------------------------------
  308. void QuaternionMA( const Quaternion &p, float s, const Quaternion &q, Quaternion &qt )
  309. {
  310. Quaternion p1, q1;
  311. QuaternionScale( q, s, q1 );
  312. QuaternionMult( p, q1, p1 );
  313. QuaternionNormalize( p1 );
  314. qt[0] = p1[0];
  315. qt[1] = p1[1];
  316. qt[2] = p1[2];
  317. qt[3] = p1[3];
  318. }
  319. #if ALLOW_SIMD_QUATERNION_MATH
  320. FORCEINLINE fltx4 QuaternionMASIMD( const fltx4 &p, const fltx4 &s, const fltx4 &q )
  321. {
  322. fltx4 p1, q1, result;
  323. q1 = QuaternionScaleSIMD( q, s );
  324. p1 = QuaternionMultSIMD( p, q1 );
  325. result = QuaternionNormalizeSIMD( p1 );
  326. return result;
  327. }
  328. FORCEINLINE fltx4 QuaternionMASIMD( const fltx4 &p, float s, const fltx4 &q )
  329. {
  330. return QuaternionMASIMD(p, ReplicateX4(s), q);
  331. }
  332. #endif
  333. //-----------------------------------------------------------------------------
  334. // Purpose: qt = p + s * q
  335. //-----------------------------------------------------------------------------
  336. void QuaternionAccumulate( const Quaternion &p, float s, const Quaternion &q, Quaternion &qt )
  337. {
  338. Quaternion q2;
  339. QuaternionAlign( p, q, q2 );
  340. qt[0] = p[0] + s * q2[0];
  341. qt[1] = p[1] + s * q2[1];
  342. qt[2] = p[2] + s * q2[2];
  343. qt[3] = p[3] + s * q2[3];
  344. }
  345. #if ALLOW_SIMD_QUATERNION_MATH
  346. FORCEINLINE fltx4 QuaternionAccumulateSIMD( const fltx4 &p, float s, const fltx4 &q )
  347. {
  348. fltx4 q2, s4, result;
  349. q2 = QuaternionAlignSIMD( p, q );
  350. s4 = ReplicateX4( s );
  351. result = MaddSIMD( s4, q2, p );
  352. return result;
  353. }
  354. #endif
  355. //-----------------------------------------------------------------------------
  356. // Purpose: blend together in world space q1,pos1 with q2,pos2. Return result in q1,pos1.
  357. // 0 returns q1, pos1. 1 returns q2, pos2
  358. //-----------------------------------------------------------------------------
  359. void WorldSpaceSlerp(
  360. const CStudioHdr *pStudioHdr,
  361. BoneQuaternion q1[MAXSTUDIOBONES],
  362. BoneVector pos1[MAXSTUDIOBONES],
  363. mstudioseqdesc_t &seqdesc,
  364. int sequence,
  365. const BoneQuaternion q2[MAXSTUDIOBONES],
  366. const BoneVector pos2[MAXSTUDIOBONES],
  367. float s,
  368. int boneMask )
  369. {
  370. BONE_PROFILE_FUNC();
  371. int i, j;
  372. float s1; // weight of parent for q2, pos2
  373. float s2; // weight for q2, pos2
  374. // make fake root transform
  375. matrix3x4a_t rootXform;
  376. SetIdentityMatrix( rootXform );
  377. // matrices for q2, pos2
  378. matrix3x4a_t *srcBoneToWorld = g_MatrixPool.Alloc();
  379. CBoneBitList srcBoneComputed;
  380. matrix3x4a_t *destBoneToWorld = g_MatrixPool.Alloc();
  381. CBoneBitList destBoneComputed;
  382. matrix3x4a_t *targetBoneToWorld = g_MatrixPool.Alloc();
  383. CBoneBitList targetBoneComputed;
  384. virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel();
  385. const virtualgroup_t *pSeqGroup = NULL;
  386. if (pVModel)
  387. {
  388. pSeqGroup = pVModel->pSeqGroup( sequence );
  389. }
  390. const mstudiobone_t *pbone = pStudioHdr->pBone( 0 );
  391. for (i = 0; i < pStudioHdr->numbones(); i++)
  392. {
  393. // skip unused bones
  394. if (!(pStudioHdr->boneFlags(i) & boneMask))
  395. {
  396. continue;
  397. }
  398. int n = pbone[i].parent;
  399. s1 = 0.0;
  400. if (pSeqGroup)
  401. {
  402. j = pSeqGroup->boneMap[i];
  403. if (j >= 0)
  404. {
  405. s2 = s * seqdesc.weight( j ); // blend in based on this bones weight
  406. if (n != -1)
  407. {
  408. s1 = s * seqdesc.weight( pSeqGroup->boneMap[n] );
  409. }
  410. }
  411. else
  412. {
  413. s2 = 0.0;
  414. }
  415. }
  416. else
  417. {
  418. s2 = s * seqdesc.weight( i ); // blend in based on this bones weight
  419. if (n != -1)
  420. {
  421. s1 = s * seqdesc.weight( n );
  422. }
  423. }
  424. if ( s2 > 0.0 || s1 > 0.0 )
  425. {
  426. Quaternion srcQ, destQ;
  427. Vector srcPos, destPos;
  428. Quaternion targetQ;
  429. Vector targetPos;
  430. Vector tmp;
  431. BuildBoneChain( pStudioHdr, rootXform, pos1, q1, i, destBoneToWorld, destBoneComputed );
  432. BuildBoneChain( pStudioHdr, rootXform, pos2, q2, i, srcBoneToWorld, srcBoneComputed );
  433. MatrixAngles( destBoneToWorld[i], destQ, destPos );
  434. MatrixAngles( srcBoneToWorld[i], srcQ, srcPos );
  435. QuaternionSlerp( destQ, srcQ, s2, targetQ );
  436. AngleMatrix( RadianEuler(targetQ), destPos, targetBoneToWorld[i] );
  437. // back solve
  438. if (n == -1)
  439. {
  440. MatrixAngles( targetBoneToWorld[i], q1[i], tmp );
  441. }
  442. else
  443. {
  444. matrix3x4a_t worldToBone;
  445. MatrixInvert( targetBoneToWorld[n], worldToBone );
  446. matrix3x4a_t local;
  447. ConcatTransforms_Aligned( worldToBone, targetBoneToWorld[i], local );
  448. MatrixAngles( local, q1[i], tmp );
  449. // blend bone lengths (local space)
  450. //pos1[i] = Lerp( s2, pos1[i], pos2[i] );
  451. pos1[i] = pos1[i] + (pos2[i] - pos1[i]) * s2;
  452. }
  453. }
  454. }
  455. g_MatrixPool.Free( srcBoneToWorld );
  456. g_MatrixPool.Free( destBoneToWorld );
  457. g_MatrixPool.Free( targetBoneToWorld );
  458. }
  459. #define PARANOID_SIMD_DOUBLECHECK 0 // set this to one to perform both SIMD and scalar bones every frame,
  460. // then compare the results.
  461. #define PARANOID_SIMD_TIMING_TEST 0 // enable to allow running many iterations of SlerpBones per frame
  462. // for timing purposes
  463. #ifdef _X360
  464. // SIMD bone setup is a perf win on 360
  465. static ConVar cl_simdbones( "cl_simdbones", "1", FCVAR_REPLICATED, "Use SIMD bone setup." );
  466. #else
  467. // SIMD bone setup is a perf loss on the PC
  468. static ConVar cl_simdbones( "cl_simdbones", "0", FCVAR_REPLICATED, "Use SIMD bone setup." );
  469. #endif
  470. void SlerpBonesSpeedy(
  471. const CStudioHdr *pStudioHdr,
  472. BoneQuaternionAligned q1[MAXSTUDIOBONES],
  473. BoneVector pos1[MAXSTUDIOBONES],
  474. mstudioseqdesc_t &seqdesc, // source of q2 and pos2
  475. int sequence,
  476. const BoneQuaternionAligned q2[MAXSTUDIOBONES],
  477. const BoneVector pos2[MAXSTUDIOBONES],
  478. float s,
  479. int boneMask );
  480. volatile int iForBreakpoint;
  481. //-----------------------------------------------------------------------------
  482. // Purpose: blend together q1,pos1 with q2,pos2. Return result in q1,pos1.
  483. // 0 returns q1, pos1. 1 returns q2, pos2
  484. //-----------------------------------------------------------------------------
  485. #if PARANOID_SIMD_TIMING_TEST
  486. static ConVar cl_bones_simd_timing_version( "cl_bones_simd_timing_version", "0", FCVAR_REPLICATED, "0 = scalar version, 1 = simd version." );
  487. void SlerpBonesSlow(
  488. #else
  489. void SlerpBones(
  490. #endif
  491. const CStudioHdr *pStudioHdr,
  492. BoneQuaternion * RESTRICT q1,
  493. BoneVector * RESTRICT pos1,
  494. mstudioseqdesc_t &seqdesc, // source of q2 and pos2
  495. int sequence,
  496. const BoneQuaternionAligned * RESTRICT q2, // [MAXSTUDIOBONES],
  497. const BoneVector * RESTRICT pos2, // [MAXSTUDIOBONES],
  498. float s,
  499. int boneMask )
  500. {
  501. BONE_PROFILE_FUNC();
  502. SNPROF_ANIM("SlerpBones");
  503. #if PARANOID_SIMD_DOUBLECHECK
  504. // copy off the input arrays so we can do them twice
  505. static CThreadFastMutex m_mutex;
  506. AUTO_LOCK( m_mutex );
  507. static BoneQuaternionAligned doublecheckQuat[MAXSTUDIOBONES];
  508. static BoneQuaternionAligned doublecheckOriginalQuat[MAXSTUDIOBONES];
  509. static BoneVector doublecheckPos[MAXSTUDIOBONES];
  510. static BoneVector doublecheckOriginalPos[MAXSTUDIOBONES];
  511. #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
  512. BoneVector *originalPosPointer = pos1;
  513. BoneQuaternion *originalQuatPointer = q1;
  514. #endif
  515. {
  516. memcpy( doublecheckQuat, q1, MAXSTUDIOBONES * sizeof(BoneQuaternionAligned) );
  517. memcpy( doublecheckOriginalQuat, q1, MAXSTUDIOBONES * sizeof(BoneQuaternionAligned) );
  518. memcpy( doublecheckPos, pos1, MAXSTUDIOBONES * sizeof(BoneVector) );
  519. memcpy( doublecheckOriginalPos, pos1, MAXSTUDIOBONES * sizeof(BoneVector) );
  520. }
  521. #endif
  522. // Test for 16-byte alignment, and if present, use the speedy SIMD version.
  523. if ( (reinterpret_cast<uintp>(q1) & 0x0F) == 0 &&
  524. (reinterpret_cast<uintp>(q2) & 0x0F) == 0 )
  525. {
  526. // Msg("Aligned\n");
  527. if ( cl_simdbones.GetBool()
  528. #if PARANOID_SIMD_TIMING_TEST
  529. && (cl_bones_simd_timing_version.GetInt() != 0)
  530. #endif
  531. )
  532. {
  533. #if ( PARANOID_SIMD_DOUBLECHECK == 1 ) // do simd into sep array, scalar into original, then compare
  534. // if double checking, write to static arrays
  535. // then do things the ordinary way
  536. // then check up at the end.
  537. SlerpBonesSpeedy(pStudioHdr,
  538. reinterpret_cast<BoneQuaternionAligned *>(doublecheckQuat),
  539. doublecheckPos,
  540. seqdesc,
  541. sequence,
  542. q2,
  543. pos2,
  544. s,
  545. boneMask
  546. );
  547. #elif ( PARANOID_SIMD_DOUBLECHECK == 2 )
  548. // if double checking, write to static arrays
  549. // then do things the ordinary way
  550. // then check up at the end.
  551. SlerpBonesSpeedy(pStudioHdr,
  552. reinterpret_cast<BoneQuaternionAligned *>(q1),
  553. pos1,
  554. seqdesc,
  555. sequence,
  556. q2,
  557. pos2,
  558. s,
  559. boneMask
  560. );
  561. pos1 = doublecheckPos;
  562. q1 = doublecheckQuat;
  563. #else
  564. return SlerpBonesSpeedy(pStudioHdr,
  565. reinterpret_cast<BoneQuaternionAligned *>(q1),
  566. pos1,
  567. seqdesc,
  568. sequence,
  569. q2,
  570. pos2,
  571. s,
  572. boneMask
  573. );
  574. #endif
  575. }
  576. }
  577. else
  578. {
  579. // Msg("misaligned\n");
  580. }
  581. if (s <= 0.0f)
  582. return;
  583. if (s > 1.0f)
  584. {
  585. s = 1.0f;
  586. }
  587. if ( (seqdesc.flags & STUDIO_WORLD) || (seqdesc.flags & STUDIO_WORLD_AND_RELATIVE) )
  588. {
  589. WorldSpaceSlerp( pStudioHdr, q1, pos1, seqdesc, sequence, q2, pos2, s, boneMask );
  590. if (seqdesc.flags & STUDIO_WORLD)
  591. return;
  592. }
  593. int i, j;
  594. virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel();
  595. const virtualgroup_t *pSeqGroup = NULL;
  596. if (pVModel)
  597. {
  598. pSeqGroup = pVModel->pSeqGroup( sequence );
  599. }
  600. // Build weightlist for all bones
  601. int nBoneCount = pStudioHdr->numbones();
  602. float *pS2 = (float*)stackalloc( nBoneCount * sizeof(float) );
  603. for (i = 0; i < nBoneCount; i++)
  604. {
  605. // skip unused bones
  606. if (!(pStudioHdr->boneFlags(i) & boneMask))
  607. {
  608. pS2[i] = 0.0f;
  609. continue;
  610. }
  611. if ( !pSeqGroup )
  612. {
  613. pS2[i] = s * seqdesc.weight( i ); // blend in based on this bones weight
  614. continue;
  615. }
  616. j = pSeqGroup->boneMap[i];
  617. if ( j >= 0 )
  618. {
  619. pS2[i] = s * seqdesc.weight( j ); // blend in based on this bones weight
  620. }
  621. else
  622. {
  623. pS2[i] = 0.0;
  624. }
  625. }
  626. float s1, s2;
  627. if ( seqdesc.flags & STUDIO_DELTA )
  628. {
  629. for ( i = 0; i < nBoneCount; i++ )
  630. {
  631. s2 = pS2[i];
  632. if ( s2 <= 0.0f )
  633. continue;
  634. if ( seqdesc.flags & STUDIO_POST )
  635. {
  636. #ifndef _X360
  637. QuaternionMA( q1[i], s2, q2[i], q1[i] );
  638. #else
  639. fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() );
  640. fltx4 q2simd = LoadAlignedSIMD( q2[i] );
  641. fltx4 result = QuaternionMASIMD( q1simd, s2, q2simd );
  642. StoreUnalignedSIMD( q1[i].Base(), result );
  643. #endif
  644. }
  645. else
  646. {
  647. #ifndef _X360
  648. QuaternionSM( s2, q2[i], q1[i], q1[i] );
  649. #else
  650. fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() );
  651. fltx4 q2simd = LoadAlignedSIMD( q2[i] );
  652. fltx4 result = QuaternionSMSIMD( s2, q2simd, q1simd );
  653. StoreUnalignedSIMD( q1[i].Base(), result );
  654. #endif
  655. }
  656. // do this explicitly to make the scheduling better
  657. // (otherwise it might think pos1 and pos2 overlap,
  658. // and thus save one before starting the next)
  659. float x,y,z;
  660. x = pos1[i][0] + pos2[i][0] * s2;
  661. y = pos1[i][1] + pos2[i][1] * s2;
  662. z = pos1[i][2] + pos2[i][2] * s2;
  663. pos1[i][0] = x;
  664. pos1[i][1] = y;
  665. pos1[i][2] = z;
  666. }
  667. return;
  668. }
  669. BoneQuaternionAligned q3;
  670. for (i = 0; i < nBoneCount; i++)
  671. {
  672. s2 = pS2[i];
  673. if ( s2 <= 0.0f )
  674. continue;
  675. s1 = 1.0 - s2;
  676. #ifdef _X360
  677. fltx4 q1simd, q2simd, result;
  678. q1simd = LoadUnalignedSIMD( q1[i].Base() );
  679. q2simd = LoadAlignedSIMD( q2[i] );
  680. #endif
  681. if ( pStudioHdr->boneFlags(i) & BONE_FIXED_ALIGNMENT )
  682. {
  683. #ifndef _X360
  684. QuaternionSlerpNoAlign( q2[i], q1[i], s1, q3 );
  685. #else
  686. result = QuaternionSlerpNoAlignSIMD( q2simd, q1simd, s1 );
  687. #endif
  688. }
  689. else
  690. {
  691. #ifndef _X360
  692. QuaternionSlerp( q2[i], q1[i], s1, q3 );
  693. #else
  694. result = QuaternionSlerpSIMD( q2simd, q1simd, s1 );
  695. #endif
  696. }
  697. #ifndef _X360
  698. q1[i][0] = q3[0];
  699. q1[i][1] = q3[1];
  700. q1[i][2] = q3[2];
  701. q1[i][3] = q3[3];
  702. #else
  703. StoreUnalignedSIMD( q1[i].Base(), result );
  704. #endif
  705. pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * s2;
  706. pos1[i][1] = pos1[i][1] * s1 + pos2[i][1] * s2;
  707. pos1[i][2] = pos1[i][2] * s1 + pos2[i][2] * s2;
  708. }
  709. #if PARANOID_SIMD_DOUBLECHECK
  710. // check everything
  711. if (cl_simdbones.GetBool())
  712. {
  713. #if ( PARANOID_SIMD_DOUBLECHECK == 2)
  714. pos1 = originalPosPointer ;
  715. q1 = originalQuatPointer ;
  716. #endif
  717. for (i = 0 ; i < nBoneCount ; ++i)
  718. {
  719. static volatile int PARANOID_II = i;
  720. if ( pS2[i] <= 0.0f )
  721. {
  722. // these aren't used, but test them to make sure they haven't been overwritten.
  723. // it's important that the garbage there remain garbage, for some reason.
  724. const unsigned int *ORIGINAL_Q1, *SCALAR_Q1, *SIMD_Q1, *Q2;
  725. #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
  726. SCALAR_Q1 = reinterpret_cast<const unsigned int *>(doublecheckQuat[i].Base());
  727. SIMD_Q1 = reinterpret_cast<const unsigned int *>(q1[i].Base());
  728. #else
  729. SIMD_Q1 = reinterpret_cast<const unsigned int *>(doublecheckQuat[i].Base());
  730. SCALAR_Q1 = reinterpret_cast<const unsigned int *>(q1[i].Base());
  731. #endif
  732. ORIGINAL_Q1 = reinterpret_cast<const unsigned int *>(doublecheckOriginalQuat[i].Base());
  733. Q2 = reinterpret_cast<const unsigned int *>(q2[i].Base());
  734. if(!( SIMD_Q1[0] == SCALAR_Q1[0] &&
  735. SIMD_Q1[1] == SCALAR_Q1[1] &&
  736. SIMD_Q1[2] == SCALAR_Q1[2] &&
  737. SIMD_Q1[3] == SCALAR_Q1[3] ))
  738. {
  739. AssertMsg(false,"Wrote invalid quats\n");
  740. ++iForBreakpoint;
  741. }
  742. const unsigned int *ORIGINAL_V1, *SCALAR_V1, *SIMD_V1, *V2;
  743. #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
  744. SCALAR_V1 = reinterpret_cast<const unsigned int *>(doublecheckPos[i].Base());
  745. SIMD_V1 = reinterpret_cast<const unsigned int *>(pos1[i].Base());
  746. #else
  747. SIMD_V1 = reinterpret_cast<const unsigned int *>(doublecheckPos[i].Base());
  748. SCALAR_V1 = reinterpret_cast<const unsigned int *>(pos1[i].Base());
  749. #endif
  750. ORIGINAL_V1 = reinterpret_cast<const unsigned int *>(doublecheckOriginalPos[i].Base());
  751. V2 = reinterpret_cast<const unsigned int *>(pos2[i].Base());
  752. if(!( SIMD_V1[0] == SCALAR_V1[0] &&
  753. SIMD_V1[1] == SCALAR_V1[1] &&
  754. SIMD_V1[2] == SCALAR_V1[2] ))
  755. {
  756. AssertMsg(false,"Wrote invalid pos\n");
  757. ++iForBreakpoint;
  758. }
  759. }
  760. else
  761. {
  762. // test quaternions, unless they were slerped from opposite directions
  763. if ( !(QuaternionDotProduct(doublecheckQuat[i], q1[i]) > 0.99f) &&
  764. !(QuaternionDotProduct(doublecheckQuat[i], q1[i]) < -0.99f) )
  765. {
  766. BoneQuaternionAligned ORIGINAL_Q1, SCALAR_Q1, SIMD_Q1, Q2;
  767. #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
  768. SCALAR_Q1 = doublecheckQuat[i];
  769. SIMD_Q1 = q1[i];
  770. #else
  771. SIMD_Q1 = doublecheckQuat[i];
  772. SCALAR_Q1 = q1[i];
  773. #endif
  774. ORIGINAL_Q1 = doublecheckOriginalQuat[i];
  775. Q2 = q2[i];
  776. AssertMsg( false, "SIMD and scalar SlerpBones quats do not match up.\n" );
  777. }
  778. // test positions, unless they were slerped from opposite directions
  779. BoneVector posDiff;
  780. posDiff = pos1[i] - doublecheckPos[i];
  781. if ( !posDiff.IsZero() )
  782. {
  783. BoneVector ORIGINAL_V1, SCALAR_V1, SIMD_V1, V2;
  784. #if ( PARANOID_SIMD_DOUBLECHECK == 2 )
  785. SCALAR_V1 = doublecheckPos[i];
  786. SIMD_V1 = pos1[i];
  787. #else
  788. SIMD_V1 = doublecheckPos[i];
  789. SCALAR_V1 = pos1[i];
  790. #endif
  791. ORIGINAL_V1 = doublecheckOriginalPos[i];
  792. V2 = pos2[i];
  793. AssertMsg( false, "SIMD and scalar SlerpBones pos do not match up.\n" );
  794. }
  795. }
  796. }
  797. // compare the slack space in the array -- did we overwrite unused bones?
  798. for ( i ; i < MAXSTUDIOBONES ; ++ i)
  799. {
  800. if ( memcmp(pos1+i, doublecheckOriginalPos+i, sizeof(BoneVector)) != 0)
  801. {
  802. AssertMsg(false, "slack positions overwritten\n");
  803. ++iForBreakpoint;
  804. }
  805. if ( memcmp(q1+i, doublecheckOriginalQuat+i, sizeof(BoneVector)) != 0)
  806. {
  807. AssertMsg(false, "slack quats overwritten\n");
  808. ++iForBreakpoint;
  809. }
  810. }
  811. #if ( PARANOID_SIMD_DOUBLECHECK == 1 )
  812. // dupe SIMD version back over, becaus ewe wrote it into this other array
  813. memcpy(q1, doublecheckQuat, nBoneCount * sizeof(BoneQuaternionAligned) );
  814. memcpy(pos1, doublecheckPos, nBoneCount * sizeof(BoneVector) );
  815. #elif ( PARANOID_SIMD_DOUBLECHECK == 2 )
  816. memcpy(pos1, doublecheckPos, nBoneCount * sizeof(BoneVector) );
  817. #endif
  818. }
  819. #endif
  820. }
  821. ConVar cl_use_simd_bones( "cl_use_simd_bones", "1", FCVAR_REPLICATED, "1 use SIMD bones 0 use scalar bones." );
  822. //-----------------------------------------------------------------------------
  823. // Purpose: blend together q1,pos1 with q2,pos2. Return result in q1,pos1.
  824. // Uses four-at-a-time SIMD.
  825. //-----------------------------------------------------------------------------
  826. void SlerpBonesSpeedy(
  827. const CStudioHdr * RESTRICT pStudioHdr,
  828. BoneQuaternionAligned q1[MAXSTUDIOBONES],
  829. BoneVector pos1[MAXSTUDIOBONES],
  830. mstudioseqdesc_t &seqdesc, // source of q2 and pos2
  831. int sequence,
  832. const BoneQuaternionAligned q2[MAXSTUDIOBONES],
  833. const BoneVector pos2[MAXSTUDIOBONES],
  834. float s,
  835. int boneMask )
  836. {
  837. BONE_PROFILE_FUNC(); // ex: x360: 1.2ms
  838. // Assert 16-byte alignment of in and out arrays.
  839. AssertMsg(
  840. ((reinterpret_cast<uintp>(q1) & 0x0F)==0) &&
  841. ((reinterpret_cast<uintp>(q2) & 0x0F)==0) ,
  842. "Input arrays to SlerpBones are not aligned! Catastrophe is inevitable.\n");
  843. // Test for overlapping buffers
  844. #if PARANOID_SIMD_DOUBLECHECK
  845. {
  846. int nBoneCount = pStudioHdr->numbones();
  847. int qbot = reinterpret_cast<int>(q1);
  848. int qtop = reinterpret_cast<int>(q1 + nBoneCount);
  849. int pbot = reinterpret_cast<int>(pos1);
  850. int ptop = reinterpret_cast<int>(pos1 + nBoneCount);
  851. if ( ((pbot >= qbot) && (pbot <= qtop)) ||
  852. ((ptop >= qbot) && (ptop <= qtop)) ||
  853. ((qbot >= pbot) && (qbot <= ptop)) ||
  854. ((qtop >= pbot) && (qtop <= ptop)) )
  855. {
  856. DebuggerBreak();
  857. }
  858. }
  859. #endif
  860. if (s <= 0.0f)
  861. return;
  862. if (s > 1.0f)
  863. {
  864. s = 1.0f;
  865. }
  866. if ( (seqdesc.flags & STUDIO_WORLD) || (seqdesc.flags & STUDIO_WORLD_AND_RELATIVE) )
  867. {
  868. WorldSpaceSlerp( pStudioHdr, q1, pos1, seqdesc, sequence, q2, pos2, s, boneMask );
  869. if (seqdesc.flags & STUDIO_WORLD)
  870. return;
  871. }
  872. // haul the input arrays into cache if they're not there already
  873. PREFETCH360(q1,0);
  874. PREFETCH360(pos1,0);
  875. PREFETCH360(q2,0);
  876. PREFETCH360(pos2,0);
  877. int i;
  878. virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel();
  879. const virtualgroup_t * RESTRICT pSeqGroup = NULL;
  880. if (pVModel)
  881. {
  882. pSeqGroup = pVModel->pSeqGroup( sequence );
  883. }
  884. // Build weightlist for all bones
  885. int nBoneCount = pStudioHdr->numbones();
  886. float * RESTRICT pS2 = (float*)stackalloc( nBoneCount * sizeof(float) ); // 16-byte aligned
  887. if ( pSeqGroup ) // hoist this branch outside of the inner loop for speed (even correctly predicted branches are an eight cycle latency)
  888. {
  889. for (i = 0; i < nBoneCount; i++)
  890. {
  891. // skip unused bones
  892. if (!(pStudioHdr->boneFlags(i) & boneMask) ||
  893. pSeqGroup->boneMap[i] < 0 )
  894. {
  895. pS2[i] = 0.0f;
  896. }
  897. else
  898. {
  899. // boneMap[i] is not a float, don't be lured by the siren call of fcmp
  900. pS2[i] = s * seqdesc.weight( pSeqGroup->boneMap[i] );
  901. }
  902. }
  903. }
  904. else // !pSeqGroup
  905. {
  906. for (i = 0; i < nBoneCount; i++)
  907. {
  908. // skip unused bones
  909. if (!(pStudioHdr->boneFlags(i) & boneMask))
  910. {
  911. pS2[i] = 0.0f;
  912. }
  913. else
  914. {
  915. pS2[i] = s * seqdesc.weight( i ); // blend in based on this bones weight
  916. }
  917. }
  918. }
  919. float weight;
  920. int nBoneCountRoundedFour = ( nBoneCount ) & (~(3));
  921. if ( seqdesc.flags & STUDIO_DELTA )
  922. {
  923. // do as many as we can four at a time, then take care of stragglers.
  924. for ( i = 0; i < nBoneCountRoundedFour; i+=4 )
  925. {
  926. // drag the next cache line in
  927. PREFETCH360(q1,i*16 + 128);
  928. PREFETCH360(pos1,i*16 + 128);
  929. PREFETCH360(q2,i*16 + 128);
  930. PREFETCH360(pos2,i*16 + 128);
  931. fltx4 weightfour = LoadAlignedSIMD(pS2+i); // four weights
  932. FourQuaternions q1four, q2four;
  933. FourQuaternions result;
  934. q1four.LoadAndSwizzleAligned(q1+i); // four quaternions
  935. q2four.LoadAndSwizzleAligned(q2+i); // four quaternions
  936. if ( seqdesc.flags & STUDIO_POST )
  937. {
  938. // result = q1 * ( weight * q2 )
  939. result = q1four.MulAc(weightfour, q2four);
  940. }
  941. else
  942. {
  943. // result = ( s * q1 ) * q2
  944. result = q2four.ScaleMul(weightfour, q1four);
  945. }
  946. // mask out unused channels, replacing them with original data
  947. {
  948. bi32x4 tinyScales = CmpLeSIMD( weightfour, Four_Zeros );
  949. result.x = MaskedAssign(tinyScales, q1four.x, result.x);
  950. result.y = MaskedAssign(tinyScales, q1four.y, result.y);
  951. result.z = MaskedAssign(tinyScales, q1four.z, result.z);
  952. result.w = MaskedAssign(tinyScales, q1four.w, result.w);
  953. }
  954. result.SwizzleAndStoreAlignedMasked(q1+i, CmpGtSIMD(weightfour,Four_Zeros) );
  955. fltx4 originalpos1simd[4], pos1simd[4], pos2simd[4];
  956. originalpos1simd[0] = pos1simd[0] = LoadUnalignedSIMD(pos1[i+0].Base());
  957. originalpos1simd[1] = pos1simd[1] = LoadUnalignedSIMD(pos1[i+1].Base());
  958. originalpos1simd[2] = pos1simd[2] = LoadUnalignedSIMD(pos1[i+2].Base());
  959. originalpos1simd[3] = pos1simd[3] = LoadUnalignedSIMD(pos1[i+3].Base());
  960. pos2simd[0] = LoadUnalignedSIMD(pos2[i+0].Base());
  961. pos2simd[1] = LoadUnalignedSIMD(pos2[i+1].Base());
  962. pos2simd[2] = LoadUnalignedSIMD(pos2[i+2].Base());
  963. pos2simd[3] = LoadUnalignedSIMD(pos2[i+3].Base());
  964. fltx4 splatweights[4] = { SplatXSIMD(weightfour),
  965. SplatYSIMD(weightfour),
  966. SplatZSIMD(weightfour),
  967. SplatWSIMD(weightfour) };
  968. fltx4 Zero = Four_Zeros;
  969. pos1simd[0] = MaddSIMD(pos2simd[0], splatweights[0], pos1simd[0] );
  970. splatweights[0] = ( fltx4 ) CmpGtSIMD(splatweights[0], Zero);
  971. pos1simd[1] = MaddSIMD(pos2simd[1], splatweights[1], pos1simd[1] );
  972. splatweights[1] = ( fltx4 ) CmpGtSIMD(splatweights[1], Zero);
  973. pos1simd[2] = MaddSIMD(pos2simd[2], splatweights[2], pos1simd[2] );
  974. splatweights[2] = ( fltx4 ) CmpGtSIMD(splatweights[2], Zero);
  975. pos1simd[3] = MaddSIMD(pos2simd[3], splatweights[3], pos1simd[3] );
  976. splatweights[3] = ( fltx4 ) CmpGtSIMD(splatweights[3], Zero);
  977. // mask out unweighted bones
  978. /*
  979. if (pS2[i+0] > 0)
  980. StoreUnaligned3SIMD( pos1[i + 0].Base(), pos1simd[0] );
  981. if (pS2[i+1] > 0)
  982. StoreUnaligned3SIMD( pos1[i + 1].Base(), pos1simd[1] );
  983. if (pS2[i+2] > 0)
  984. StoreUnaligned3SIMD( pos1[i + 2].Base(), pos1simd[2] );
  985. if (pS2[i+3] > 0)
  986. StoreUnaligned3SIMD( pos1[i + 3].Base(), pos1simd[3] );
  987. */
  988. StoreUnaligned3SIMD( pos1[i + 0].Base(), MaskedAssign( ( bi32x4 ) splatweights[0], pos1simd[0], originalpos1simd[0] ) );
  989. StoreUnaligned3SIMD( pos1[i + 1].Base(), MaskedAssign( ( bi32x4 ) splatweights[1], pos1simd[1], originalpos1simd[1] ) );
  990. StoreUnaligned3SIMD( pos1[i + 2].Base(), MaskedAssign( ( bi32x4 ) splatweights[2], pos1simd[2], originalpos1simd[2] ) );
  991. StoreUnaligned3SIMD( pos1[i + 3].Base(), MaskedAssign( ( bi32x4 ) splatweights[3], pos1simd[3], originalpos1simd[3] ) );
  992. }
  993. // take care of stragglers
  994. for ( false ; i < nBoneCount; i++ )
  995. {
  996. weight = pS2[i];
  997. if ( weight <= 0.0f )
  998. continue;
  999. if ( seqdesc.flags & STUDIO_POST )
  1000. {
  1001. #ifndef _X360
  1002. QuaternionMA( q1[i], weight, q2[i], q1[i] );
  1003. #else
  1004. fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() );
  1005. fltx4 q2simd = LoadAlignedSIMD( q2[i] );
  1006. fltx4 result = QuaternionMASIMD( q1simd, weight, q2simd );
  1007. StoreUnalignedSIMD( q1[i].Base(), result );
  1008. #endif
  1009. // FIXME: are these correct?
  1010. pos1[i][0] = pos1[i][0] + pos2[i][0] * weight;
  1011. pos1[i][1] = pos1[i][1] + pos2[i][1] * weight;
  1012. pos1[i][2] = pos1[i][2] + pos2[i][2] * weight;
  1013. }
  1014. else
  1015. {
  1016. #ifndef _X360
  1017. QuaternionSM( weight, q2[i], q1[i], q1[i] );
  1018. #else
  1019. fltx4 q1simd = LoadUnalignedSIMD( q1[i].Base() );
  1020. fltx4 q2simd = LoadAlignedSIMD( q2[i] );
  1021. fltx4 result = QuaternionSMSIMD( weight, q2simd, q1simd );
  1022. StoreUnalignedSIMD( q1[i].Base(), result );
  1023. #endif
  1024. // FIXME: are these correct?
  1025. pos1[i][0] = pos1[i][0] + pos2[i][0] * weight;
  1026. pos1[i][1] = pos1[i][1] + pos2[i][1] * weight;
  1027. pos1[i][2] = pos1[i][2] + pos2[i][2] * weight;
  1028. }
  1029. }
  1030. return;
  1031. }
  1032. //// SLERP PHASE
  1033. // Some bones need to be slerped with alignment.
  1034. // Others do not.
  1035. // Some need to be ignored altogether.
  1036. // Build arrays indicating which are which.
  1037. // This is the corral approach. Another approach
  1038. // would be to compute both the aligned and unaligned
  1039. // slerps of each bone in the first pass through the
  1040. // array, and then do a masked selection of each
  1041. // based on the masks. However there really isn't
  1042. // a convenient way to turn the int flags that
  1043. // specify which approach to take, into fltx4 masks.
  1044. // float * RESTRICT pS2 = (float*)stackalloc( nBoneCount * sizeof(float) );
  1045. int * RESTRICT aBonesSlerpAlign = (int *)stackalloc(nBoneCount * sizeof(int));
  1046. float * RESTRICT aBonesSlerpAlignWeights = (float *)stackalloc(nBoneCount * sizeof(float));
  1047. int * RESTRICT aBonesSlerpNoAlign = (int *)stackalloc(nBoneCount * sizeof(int));
  1048. float * RESTRICT aBonesSlerpNoAlignWeights = (float *)stackalloc(nBoneCount * sizeof(float));
  1049. int numBonesSlerpAlign = 0;
  1050. int numBonesSlerpNoAlign = 0;
  1051. // BoneQuaternionAligned * RESTRICT testOutput = (BoneQuaternionAligned *)stackalloc(nBoneCount * sizeof(BoneQuaternionAligned));
  1052. // sweep forward through the array and determine where to corral each bone.
  1053. for ( i = 0 ; i < nBoneCount ; ++i )
  1054. {
  1055. float weight = pS2[i];
  1056. if (weight == 1.0f)
  1057. {
  1058. q1[i] = q2[i];
  1059. pos1[i] = pos2[i];
  1060. }
  1061. else if (weight > 0.0f) // ignore small bones
  1062. {
  1063. if ( pStudioHdr->boneFlags(i) & BONE_FIXED_ALIGNMENT )
  1064. {
  1065. aBonesSlerpNoAlign[numBonesSlerpNoAlign] = i;
  1066. aBonesSlerpNoAlignWeights[numBonesSlerpNoAlign] = weight;
  1067. ++numBonesSlerpNoAlign;
  1068. }
  1069. else
  1070. {
  1071. aBonesSlerpAlign[numBonesSlerpAlign] = i;
  1072. aBonesSlerpAlignWeights[numBonesSlerpAlign] = weight;
  1073. ++numBonesSlerpAlign;
  1074. }
  1075. }
  1076. }
  1077. // okay, compute all the aligned, and all the unaligned bones, four at
  1078. // a time if possible.
  1079. const fltx4 One = Four_Ones;
  1080. /////////////////
  1081. // // // Aligned!
  1082. nBoneCountRoundedFour = (numBonesSlerpAlign) & ~3;
  1083. for (i = 0 ; i < nBoneCountRoundedFour ; i+=4 )
  1084. {
  1085. // drag the next cache line in
  1086. PREFETCH360(q1, i*16 + 128);
  1087. PREFETCH360(pos1, i*sizeof(*pos1) + 128);
  1088. PREFETCH360(q2, i*16 + 128);
  1089. PREFETCH360(pos2, i*sizeof(*pos2) + 128);
  1090. fltx4 weights = LoadAlignedSIMD( aBonesSlerpAlignWeights+i );
  1091. fltx4 oneMinusWeight = SubSIMD(One, weights);
  1092. // position component:
  1093. // pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * weight;
  1094. fltx4 pos1simd[4];
  1095. fltx4 pos2simd[4];
  1096. pos1simd[0] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+0]].Base());
  1097. pos1simd[1] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+1]].Base());
  1098. pos1simd[2] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+2]].Base());
  1099. pos1simd[3] = LoadUnaligned3SIMD(pos1[aBonesSlerpAlign[i+3]].Base());
  1100. pos2simd[0] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+0]].Base());
  1101. pos2simd[1] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+1]].Base());
  1102. pos2simd[2] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+2]].Base());
  1103. pos2simd[3] = LoadUnaligned3SIMD(pos2[aBonesSlerpAlign[i+3]].Base());
  1104. pos1simd[0] = MulSIMD( SplatXSIMD(oneMinusWeight) , pos1simd[0] );
  1105. pos1simd[1] = MulSIMD( SplatYSIMD(oneMinusWeight) , pos1simd[1] );
  1106. pos1simd[2] = MulSIMD( SplatZSIMD(oneMinusWeight) , pos1simd[2] );
  1107. pos1simd[3] = MulSIMD( SplatWSIMD(oneMinusWeight) , pos1simd[3] );
  1108. fltx4 posWriteMasks[4]; // don't overwrite where there was zero weight
  1109. {
  1110. fltx4 splatweights[4];
  1111. fltx4 Zero = Four_Zeros;
  1112. splatweights[0] = SplatXSIMD(weights);
  1113. splatweights[1] = SplatYSIMD(weights);
  1114. splatweights[2] = SplatZSIMD(weights);
  1115. splatweights[3] = SplatWSIMD(weights);
  1116. pos1simd[0] = MaddSIMD( splatweights[0] , pos2simd[0], pos1simd[0] );
  1117. posWriteMasks[0] = ( fltx4 ) CmpGtSIMD(splatweights[0], Zero);
  1118. pos1simd[1] = MaddSIMD( splatweights[1] , pos2simd[1], pos1simd[1] );
  1119. posWriteMasks[1] = ( fltx4 ) CmpGtSIMD(splatweights[1], Zero);
  1120. pos1simd[2] = MaddSIMD( splatweights[2] , pos2simd[2], pos1simd[2] );
  1121. posWriteMasks[2] = ( fltx4 ) CmpGtSIMD(splatweights[2], Zero);
  1122. pos1simd[3] = MaddSIMD( splatweights[3] , pos2simd[3], pos1simd[3] );
  1123. posWriteMasks[3] = ( fltx4 ) CmpGtSIMD(splatweights[3], Zero);
  1124. }
  1125. FourQuaternions q1four, q2four, result;
  1126. q1four.LoadAndSwizzleAligned( q1 + aBonesSlerpAlign[i+0],
  1127. q1 + aBonesSlerpAlign[i+1],
  1128. q1 + aBonesSlerpAlign[i+2],
  1129. q1 + aBonesSlerpAlign[i+3] );
  1130. #if 0
  1131. // FIXME: the SIMD slerp doesn't handle quaternions that have opposite signs
  1132. q2four.LoadAndSwizzleAligned( q2 + aBonesSlerpAlign[i+0],
  1133. q2 + aBonesSlerpAlign[i+1],
  1134. q2 + aBonesSlerpAlign[i+2],
  1135. q2 + aBonesSlerpAlign[i+3] );
  1136. result = q2four.Slerp(q1four, oneMinusWeight);
  1137. #else
  1138. // force the quaternions to be the same sign (< 180 degree separation)
  1139. BoneQuaternionAligned q20, q21, q22, q23;
  1140. QuaternionAlign( q1[aBonesSlerpAlign[i+0]], q2[aBonesSlerpAlign[i+0]], q20 );
  1141. QuaternionAlign( q1[aBonesSlerpAlign[i+1]], q2[aBonesSlerpAlign[i+1]], q21 );
  1142. QuaternionAlign( q1[aBonesSlerpAlign[i+2]], q2[aBonesSlerpAlign[i+2]], q22 );
  1143. QuaternionAlign( q1[aBonesSlerpAlign[i+3]], q2[aBonesSlerpAlign[i+3]], q23 );
  1144. q2four.LoadAndSwizzleAligned( &q20, &q21, &q22, &q23 );
  1145. result = q2four.SlerpNoAlign(q1four, oneMinusWeight);
  1146. #endif
  1147. result.SwizzleAndStoreAligned( q1 + aBonesSlerpAlign[i+0],
  1148. q1 + aBonesSlerpAlign[i+1],
  1149. q1 + aBonesSlerpAlign[i+2],
  1150. q1 + aBonesSlerpAlign[i+3] );
  1151. StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+0]].Base(), pos1simd[0] );
  1152. StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+1]].Base(), pos1simd[1] );
  1153. StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+2]].Base(), pos1simd[2] );
  1154. StoreUnaligned3SIMD( pos1[aBonesSlerpAlign[i+3]].Base(), pos1simd[3] );
  1155. }
  1156. // handle stragglers
  1157. for ( i ; i < numBonesSlerpAlign ; ++i )
  1158. {
  1159. BoneQuaternionAligned q3;
  1160. weight = aBonesSlerpAlignWeights[i];
  1161. int k = aBonesSlerpAlign[i];
  1162. float s1 = 1.0 - weight;
  1163. #ifdef _X360
  1164. fltx4 q1simd, q2simd, result;
  1165. q1simd = LoadAlignedSIMD( q1[k].Base() );
  1166. q2simd = LoadAlignedSIMD( q2[k] );
  1167. #endif
  1168. #ifndef _X360
  1169. QuaternionSlerp( q2[k], q1[k], s1, q3 );
  1170. #else
  1171. result = QuaternionSlerpSIMD( q2simd, q1simd, s1 );
  1172. #endif
  1173. #ifndef _X360
  1174. q1[k][0] = q3[0];
  1175. q1[k][1] = q3[1];
  1176. q1[k][2] = q3[2];
  1177. q1[k][3] = q3[3];
  1178. #else
  1179. StoreAlignedSIMD( q1[k].Base(), result );
  1180. #endif
  1181. pos1[k][0] = pos1[k][0] * s1 + pos2[k][0] * weight;
  1182. pos1[k][1] = pos1[k][1] * s1 + pos2[k][1] * weight;
  1183. pos1[k][2] = pos1[k][2] * s1 + pos2[k][2] * weight;
  1184. }
  1185. ///////////////////
  1186. // // // Unaligned!
  1187. nBoneCountRoundedFour = (numBonesSlerpNoAlign) & ~3;
  1188. for (i = 0 ; i < nBoneCountRoundedFour ; i+=4 )
  1189. {
  1190. // drag the next cache line in
  1191. PREFETCH360(q1, i*16 + 128);
  1192. PREFETCH360(pos1, i*sizeof(*pos1) + 128);
  1193. PREFETCH360(q2, i*16 + 128);
  1194. PREFETCH360(pos2, i*sizeof(*pos2) + 128);
  1195. fltx4 weights = LoadAlignedSIMD( aBonesSlerpNoAlignWeights+i );
  1196. fltx4 oneMinusWeight = SubSIMD(One, weights);
  1197. // position component:
  1198. // pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * weight;
  1199. fltx4 pos1simd[4];
  1200. fltx4 pos2simd[4];
  1201. pos1simd[0] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+0]].Base());
  1202. pos1simd[1] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+1]].Base());
  1203. pos1simd[2] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+2]].Base());
  1204. pos1simd[3] = LoadUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+3]].Base());
  1205. pos2simd[0] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+0]].Base());
  1206. pos2simd[1] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+1]].Base());
  1207. pos2simd[2] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+2]].Base());
  1208. pos2simd[3] = LoadUnaligned3SIMD(pos2[aBonesSlerpNoAlign[i+3]].Base());
  1209. pos1simd[0] = MulSIMD( SplatXSIMD(oneMinusWeight) , pos1simd[0] );
  1210. pos1simd[1] = MulSIMD( SplatYSIMD(oneMinusWeight) , pos1simd[1] );
  1211. pos1simd[2] = MulSIMD( SplatZSIMD(oneMinusWeight) , pos1simd[2] );
  1212. pos1simd[3] = MulSIMD( SplatWSIMD(oneMinusWeight) , pos1simd[3] );
  1213. pos1simd[0] = MaddSIMD( SplatXSIMD(weights) , pos2simd[0], pos1simd[0] );
  1214. pos1simd[1] = MaddSIMD( SplatYSIMD(weights) , pos2simd[1], pos1simd[1] );
  1215. pos1simd[2] = MaddSIMD( SplatZSIMD(weights) , pos2simd[2], pos1simd[2] );
  1216. pos1simd[3] = MaddSIMD( SplatWSIMD(weights) , pos2simd[3], pos1simd[3] );
  1217. FourQuaternions q1four, q2four, result;
  1218. q1four.LoadAndSwizzleAligned( q1 + aBonesSlerpNoAlign[i+0],
  1219. q1 + aBonesSlerpNoAlign[i+1],
  1220. q1 + aBonesSlerpNoAlign[i+2],
  1221. q1 + aBonesSlerpNoAlign[i+3] );
  1222. q2four.LoadAndSwizzleAligned( q2 + aBonesSlerpNoAlign[i+0],
  1223. q2 + aBonesSlerpNoAlign[i+1],
  1224. q2 + aBonesSlerpNoAlign[i+2],
  1225. q2 + aBonesSlerpNoAlign[i+3] );
  1226. result = q2four.SlerpNoAlign(q1four, oneMinusWeight);
  1227. result.SwizzleAndStoreAligned( q1 + aBonesSlerpNoAlign[i+0],
  1228. q1 + aBonesSlerpNoAlign[i+1],
  1229. q1 + aBonesSlerpNoAlign[i+2],
  1230. q1 + aBonesSlerpNoAlign[i+3] );
  1231. StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+0]].Base(), pos1simd[0]);
  1232. StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+1]].Base(), pos1simd[1]);
  1233. StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+2]].Base(), pos1simd[2]);
  1234. StoreUnaligned3SIMD(pos1[aBonesSlerpNoAlign[i+3]].Base(), pos1simd[3]);
  1235. }
  1236. // handle stragglers
  1237. for ( i ; i < numBonesSlerpNoAlign ; ++i )
  1238. {
  1239. weight = aBonesSlerpNoAlignWeights[i];
  1240. int k = aBonesSlerpNoAlign[i];
  1241. float s1 = 1.0 - weight;
  1242. #ifdef _X360
  1243. fltx4 q1simd, q2simd, result;
  1244. q1simd = LoadAlignedSIMD( q1[k].Base() );
  1245. q2simd = LoadAlignedSIMD( q2[k] );
  1246. #endif
  1247. #ifndef _X360
  1248. BoneQuaternionAligned q3;
  1249. QuaternionSlerpNoAlign( q2[k], q1[k], s1, q3 );
  1250. #else
  1251. result = QuaternionSlerpNoAlignSIMD( q2simd, q1simd, s1 );
  1252. #endif
  1253. #ifndef _X360
  1254. q1[k][0] = q3[0];
  1255. q1[k][1] = q3[1];
  1256. q1[k][2] = q3[2];
  1257. q1[k][3] = q3[3];
  1258. #else
  1259. StoreAlignedSIMD( q1[k].Base(), result );
  1260. #endif
  1261. pos1[k][0] = pos1[k][0] * s1 + pos2[k][0] * weight;
  1262. pos1[k][1] = pos1[k][1] * s1 + pos2[k][1] * weight;
  1263. pos1[k][2] = pos1[k][2] * s1 + pos2[k][2] * weight;
  1264. }
  1265. }
  1266. #if PARANOID_SIMD_TIMING_TEST
  1267. static ConVar cl_bones_simd_timing_iter( "cl_bones_simd_timing_iter", "100", FCVAR_REPLICATED, "number of times to run SlerpBones." );
  1268. void SlerpBones(
  1269. const CStudioHdr *pStudioHdr,
  1270. Quaternion q1[MAXSTUDIOBONES],
  1271. BoneVector pos1[MAXSTUDIOBONES],
  1272. mstudioseqdesc_t &seqdesc, // source of q2 and pos2
  1273. int sequence,
  1274. const BoneQuaternionAligned q2[MAXSTUDIOBONES],
  1275. const BoneVector pos2[MAXSTUDIOBONES],
  1276. float s,
  1277. int boneMask )
  1278. {
  1279. BONE_PROFILE_FUNC();
  1280. // copy off the input arrays for safety
  1281. int numBones = pStudioHdr->numbones();
  1282. BoneQuaternionAligned fake_q1[MAXSTUDIOBONES];
  1283. BoneVector fake_pos1[MAXSTUDIOBONES];
  1284. bool version = cl_bones_simd_timing_version.GetBool();
  1285. // fruitlessly run as many times as specified
  1286. for (int i = cl_bones_simd_timing_iter.GetInt() ; i > 0 ; --i )
  1287. {
  1288. memcpy( fake_q1, q1, numBones * sizeof(Quaternion) );
  1289. memcpy( fake_pos1, pos1, numBones * sizeof(BoneVector) );
  1290. if (version) // 1 = simd 0 = scalar
  1291. {
  1292. SlerpBonesSpeedy(pStudioHdr,
  1293. fake_q1,
  1294. fake_pos1,
  1295. seqdesc,
  1296. sequence,
  1297. q2,
  1298. pos2,
  1299. s,
  1300. boneMask
  1301. );
  1302. }
  1303. else
  1304. {
  1305. SlerpBonesSlow(pStudioHdr,
  1306. fake_q1,
  1307. fake_pos1,
  1308. seqdesc,
  1309. sequence,
  1310. q2,
  1311. pos2,
  1312. s,
  1313. boneMask
  1314. );
  1315. }
  1316. }
  1317. // run once for real
  1318. if (version) // 1 = simd 0 = scalar
  1319. {
  1320. SlerpBonesSpeedy(pStudioHdr,
  1321. static_cast<BoneQuaternionAligned *>(q1),
  1322. pos1,
  1323. seqdesc,
  1324. sequence,
  1325. q2,
  1326. pos2,
  1327. s,
  1328. boneMask
  1329. );
  1330. }
  1331. else
  1332. {
  1333. SlerpBonesSlow(pStudioHdr,
  1334. q1,
  1335. pos1,
  1336. seqdesc,
  1337. sequence,
  1338. q2,
  1339. pos2,
  1340. s,
  1341. boneMask
  1342. );
  1343. }
  1344. }
  1345. #endif
  1346. template <int N>
  1347. struct GetLog2_t
  1348. {};
  1349. template<>
  1350. struct GetLog2_t<0x00100000>
  1351. {
  1352. enum {kLog2 = 20};
  1353. };
  1354. inline void AlwaysAssert(bool condition)
  1355. {
  1356. Assert(condition);
  1357. }
  1358. bool IsInList(int value, const int *pBegin, const int *pEnd)
  1359. {
  1360. for(const int *p = pBegin; p < pEnd; ++p)
  1361. if(*p == value)
  1362. return true;
  1363. return false;
  1364. }
  1365. //CLinkedMiniProfiler g_lmp_BlendBones1("BlendBones1",&g_pPhysicsMiniProfilers);
  1366. //CLinkedMiniProfiler g_lmp_BlendBones2("BlendBones2",&g_pPhysicsMiniProfilers);
  1367. ConVar g_cv_BlendBonesMode("BlendBonesMode", "2", FCVAR_REPLICATED);
  1368. //---------------------------------------------------------------------
  1369. // Make sure quaternions are within 180 degrees of one another, if not, reverse q
  1370. //---------------------------------------------------------------------
  1371. FORCEINLINE fltx4 BoneQuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
  1372. {
  1373. // decide if one of the quaternions is backwards
  1374. bi32x4 cmp = CmpLtSIMD( Dot4SIMD(p,q), Four_Zeros );
  1375. fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
  1376. return result;
  1377. }
  1378. // SSE + X360 implementation
  1379. FORCEINLINE fltx4 BoneQuaternionNormalizeSIMD( const fltx4 &q )
  1380. {
  1381. fltx4 radius, result;
  1382. bi32x4 mask;
  1383. radius = Dot4SIMD( q, q );
  1384. mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
  1385. result = ReciprocalSqrtSIMD( radius );
  1386. result = MulSIMD( result, q );
  1387. return MaskedAssign( mask, q, result ); // if radius was 0, just return q
  1388. }
  1389. //-----------------------------------------------------------------------------
  1390. // Purpose: Inter-animation blend. Assumes both types are identical.
  1391. // blend together q1,pos1 with q2,pos2. Return result in q1,pos1.
  1392. // 0 returns q1, pos1. 1 returns q2, pos2
  1393. //-----------------------------------------------------------------------------
  1394. void BlendBones(
  1395. const CStudioHdr *pStudioHdr,
  1396. BoneQuaternionAligned q1[MAXSTUDIOBONES],
  1397. BoneVector pos1[MAXSTUDIOBONES],
  1398. mstudioseqdesc_t &seqdesc,
  1399. int sequence,
  1400. const BoneQuaternionAligned q2[MAXSTUDIOBONES],
  1401. const BoneVector pos2[MAXSTUDIOBONES],
  1402. float s,
  1403. int boneMask )
  1404. {
  1405. AlwaysAssert(0 == ((uintp(q1)|uintp(pos1)|uintp(q2)|uintp(pos2)) & 0xF));
  1406. BONE_PROFILE_FUNC(); // in: x360: up to 1.67 ms
  1407. int i, j;
  1408. Quaternion q3;
  1409. virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel();
  1410. const virtualgroup_t *pSeqGroup = NULL;
  1411. if (pVModel)
  1412. {
  1413. pSeqGroup = pVModel->pSeqGroup( sequence );
  1414. }
  1415. if (s <= 0)
  1416. {
  1417. Assert(0); // shouldn't have been called
  1418. return;
  1419. }
  1420. else if (s >= 1.0)
  1421. {
  1422. //CMiniProfilerGuard mpguard(&g_lmp_BlendBones1, pStudioHdr->numbones());
  1423. Assert(0); // shouldn't have been called
  1424. for (i = 0; i < pStudioHdr->numbones(); i++)
  1425. {
  1426. // skip unused bones
  1427. if (!(pStudioHdr->boneFlags(i) & boneMask))
  1428. {
  1429. continue;
  1430. }
  1431. if (pSeqGroup)
  1432. {
  1433. j = pSeqGroup->boneMap[i];
  1434. }
  1435. else
  1436. {
  1437. j = i;
  1438. }
  1439. if (j >= 0 && seqdesc.weight( j ) > 0.0)
  1440. {
  1441. q1[i] = q2[i];
  1442. pos1[i] = pos2[i];
  1443. }
  1444. }
  1445. return;
  1446. }
  1447. float s2 = s;
  1448. float s1 = 1.0 - s2;
  1449. //CMiniProfilerGuard mpguard(&g_lmp_BlendBones2,pStudioHdr->numbones()); // 130-180 ticks without profilers; 167-190 ticks with all profilers on
  1450. int nMode = g_cv_BlendBonesMode.GetInt();
  1451. #ifndef DEDICATED
  1452. if(nMode)
  1453. {
  1454. const int numBones = pStudioHdr->numbones();
  1455. const int *RESTRICT pBonePseudoWeight = (int*)seqdesc.pBoneweight(0); // we'll treat floats as ints to check for > 0.0
  1456. int *RESTRICT pActiveBones = (int*)stackalloc(numBones * sizeof(int) * 2), *RESTRICT pActiveBonesEnd = pActiveBones;
  1457. {
  1458. BONE_PROFILE_LOOP(BlendBoneLoop2a,numBones); // 20 ticks straight; 12-14 ticks 4 at a time; 14-19 ticks 8 at a time (compiler generated code)
  1459. i = 0;
  1460. #ifdef _X360 // on PC, this is slower
  1461. for(; i+3 < numBones; i+=4)
  1462. {
  1463. int isBoneActiveA = pStudioHdr->boneFlags(i ) & boneMask;
  1464. int isBoneActiveB = pStudioHdr->boneFlags(i+1) & boneMask;
  1465. int isBoneActiveC = pStudioHdr->boneFlags(i+2) & boneMask;
  1466. int isBoneActiveD = pStudioHdr->boneFlags(i+3) & boneMask;
  1467. isBoneActiveA = isBoneActiveA | -isBoneActiveA; // the high bit is now 1 iff the flags check
  1468. isBoneActiveB = isBoneActiveB | -isBoneActiveB; // the high bit is now 1 iff the flags check
  1469. isBoneActiveC = isBoneActiveC | -isBoneActiveC; // the high bit is now 1 iff the flags check
  1470. isBoneActiveD = isBoneActiveD | -isBoneActiveD; // the high bit is now 1 iff the flags check
  1471. isBoneActiveA = _rotl(isBoneActiveA,1) & 1; // now it's either 0 or 1
  1472. isBoneActiveB = _rotl(isBoneActiveB,1) & 1; // now it's either 0 or 1
  1473. isBoneActiveC = _rotl(isBoneActiveC,1) & 1; // now it's either 0 or 1
  1474. isBoneActiveD = _rotl(isBoneActiveD,1) & 1; // now it's either 0 or 1
  1475. *pActiveBonesEnd = i+0;
  1476. pActiveBonesEnd += isBoneActiveA;
  1477. *pActiveBonesEnd = i+1;
  1478. pActiveBonesEnd += isBoneActiveB;
  1479. *pActiveBonesEnd = i+2;
  1480. pActiveBonesEnd += isBoneActiveC;
  1481. *pActiveBonesEnd = i+3;
  1482. pActiveBonesEnd += isBoneActiveD;
  1483. }
  1484. #endif
  1485. for(; i < numBones; ++i)
  1486. {
  1487. *pActiveBonesEnd = i;
  1488. int isBoneActive = pStudioHdr->boneFlags(i) & boneMask;
  1489. isBoneActive = isBoneActive | -isBoneActive; // the high bit is now 1 iff the flags check
  1490. isBoneActive = _rotl(isBoneActive,1) & 1; // now it's either 0 or 1
  1491. pActiveBonesEnd += isBoneActive;
  1492. }
  1493. }
  1494. // now we have a list of bones whose flags & mask != 0
  1495. // we need to create bone pay
  1496. if(pSeqGroup)
  1497. {
  1498. int *pEnd = pActiveBones;
  1499. {
  1500. BONE_PROFILE_LOOP(BlendBoneLoop2b,pActiveBonesEnd - pActiveBones);//21-25 straight; 16-18 4 at a time;
  1501. int *RESTRICT pActiveBone = pActiveBones;
  1502. #ifdef _X360 // on PC, this is slower
  1503. for(; pActiveBone + 3 < pActiveBonesEnd; pActiveBone += 4)
  1504. {
  1505. int nActiveBoneA = pActiveBone[0];
  1506. int nActiveBoneB = pActiveBone[1];
  1507. int nActiveBoneC = pActiveBone[2];
  1508. int nActiveBoneD = pActiveBone[3];
  1509. int nMappedBoneA = pSeqGroup->boneMap[nActiveBoneA];
  1510. int nMappedBoneB = pSeqGroup->boneMap[nActiveBoneB];
  1511. int nMappedBoneC = pSeqGroup->boneMap[nActiveBoneC];
  1512. int nMappedBoneD = pSeqGroup->boneMap[nActiveBoneD];
  1513. pEnd[numBones] = nMappedBoneA;
  1514. *pEnd = nActiveBoneA;
  1515. pEnd += _rotl(~nMappedBoneA,1) & 1; // if nMappedBone < 0, don't advance the end
  1516. pEnd[numBones] = nMappedBoneB;
  1517. *pEnd = nActiveBoneB;
  1518. pEnd += _rotl(~nMappedBoneB,1) & 1; // if nMappedBone < 0, don't advance the end
  1519. pEnd[numBones] = nMappedBoneC;
  1520. *pEnd = nActiveBoneC;
  1521. pEnd += _rotl(~nMappedBoneC,1) & 1; // if nMappedBone < 0, don't advance the end
  1522. pEnd[numBones] = nMappedBoneD;
  1523. *pEnd = nActiveBoneD;
  1524. pEnd += _rotl(~nMappedBoneD,1) & 1; // if nMappedBone < 0, don't advance the end
  1525. }
  1526. #endif
  1527. for(; pActiveBone < pActiveBonesEnd; ++pActiveBone)
  1528. {
  1529. int nActiveBone = *pActiveBone;
  1530. int nMappedBone = pSeqGroup->boneMap[nActiveBone];
  1531. pEnd[numBones] = nMappedBone;
  1532. *pEnd = nActiveBone;
  1533. pEnd += _rotl(~nMappedBone,1) & 1; // if nMappedBone < 0, don't advance the end
  1534. }
  1535. }
  1536. pActiveBonesEnd = pEnd; // the new end of the array of active bones, with negatively-mapped bones taken out
  1537. // now get rid of non-positively-weighted bones
  1538. pEnd = pActiveBones;
  1539. {
  1540. BONE_PROFILE_LOOP(BlendBoneLoop2c,pActiveBonesEnd - pActiveBones);//18-23 straight; 14-17 ticks 4 at a time
  1541. int *RESTRICT pActiveBone = pActiveBones;
  1542. #ifdef _X360 // on PC, this is slower
  1543. int *RESTRICT pMappedBone = pActiveBones+numBones;
  1544. for(; pActiveBone+3 < pActiveBonesEnd; pActiveBone += 4, pMappedBone += 4)
  1545. {
  1546. int nActiveBoneA = pActiveBone[0];
  1547. int nActiveBoneB = pActiveBone[1];
  1548. int nActiveBoneC = pActiveBone[2];
  1549. int nActiveBoneD = pActiveBone[3];
  1550. int nMappedBoneA = pMappedBone[0];
  1551. int nMappedBoneB = pMappedBone[1];
  1552. int nMappedBoneC = pMappedBone[2];
  1553. int nMappedBoneD = pMappedBone[3];
  1554. int pseudoWeightA = pBonePseudoWeight[nMappedBoneA];
  1555. int pseudoWeightB = pBonePseudoWeight[nMappedBoneB];
  1556. int pseudoWeightC = pBonePseudoWeight[nMappedBoneC];
  1557. int pseudoWeightD = pBonePseudoWeight[nMappedBoneD];
  1558. *pEnd = nActiveBoneA;
  1559. pEnd += _rotl(-pseudoWeightA, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
  1560. *pEnd = nActiveBoneB;
  1561. pEnd += _rotl(-pseudoWeightB, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
  1562. *pEnd = nActiveBoneC;
  1563. pEnd += _rotl(-pseudoWeightC, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
  1564. *pEnd = nActiveBoneD;
  1565. pEnd += _rotl(-pseudoWeightD, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
  1566. }
  1567. #endif
  1568. for(; pActiveBone < pActiveBonesEnd; ++pActiveBone)
  1569. {
  1570. int nActiveBone = *pActiveBone;
  1571. int nMappedBone = pActiveBone[numBones];
  1572. int pseudoWeight = pBonePseudoWeight[nMappedBone];
  1573. *pEnd = nActiveBone;
  1574. pEnd += _rotl(-pseudoWeight, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
  1575. }
  1576. }
  1577. pActiveBonesEnd = pEnd;
  1578. }
  1579. else
  1580. {
  1581. // one mapping stage off
  1582. // now get rid of non-positively-weighted bones
  1583. int *pEnd = pActiveBones;
  1584. {BONE_PROFILE_LOOP(BlendBoneLoop2d,pActiveBonesEnd-pActiveBones);//20-50
  1585. for(int *RESTRICT pActiveBone = pActiveBones; pActiveBone < pActiveBonesEnd; ++pActiveBone)
  1586. {
  1587. int nActiveBone = *pActiveBone;
  1588. int pseudoWeight = pBonePseudoWeight[nActiveBone];
  1589. *pEnd = nActiveBone;
  1590. pEnd += _rotl(-pseudoWeight, 1) & 1; // pseudoWeight must be strictly positive to advance and let this bone stay
  1591. }}
  1592. pActiveBonesEnd = pEnd;
  1593. }
  1594. enum
  1595. {
  1596. nBoneFixedAlignmentShift = GetLog2_t<BONE_FIXED_ALIGNMENT>::kLog2
  1597. };
  1598. // NOTE: When merging back to main, enable this code because Fixed-Alignment is not used in L4D, but may be used in main
  1599. fltx4 scale1 = ReplicateX4( s1 );
  1600. fltx4 scale2 = SubSIMD( Four_Ones, scale1 );
  1601. //fltx4 maskW = LoadAlignedSIMD( (const float *)(g_SIMD_ComponentMask[3]) );
  1602. // pass through all active bones to blend them; those that need it are already aligned
  1603. {
  1604. // 120-155 ticks 4 horizontal at a time; 130 ticks with 1 dot quaternion alignment
  1605. //
  1606. BONE_PROFILE_LOOP(BlendBoneLoop2g,pActiveBonesEnd-pActiveBones);
  1607. const int *RESTRICT p = pActiveBones, *RESTRICT pNext;
  1608. #if 0//ndef _X360
  1609. // swizzled (vertical) 4 at a time processing
  1610. for(; (pNext = p+4) < pActiveBonesEnd; p = pNext)
  1611. {
  1612. int nBoneA = p[0], nBoneB = p[1], nBoneC = p[2], nBoneD = p[3];
  1613. BoneQuaternionAligned *RESTRICT pq1A = &q1[nBoneA];
  1614. BoneQuaternionAligned *RESTRICT pq1B = &q1[nBoneB];
  1615. BoneQuaternionAligned *RESTRICT pq1C = &q1[nBoneC];
  1616. BoneQuaternionAligned *RESTRICT pq1D = &q1[nBoneD];
  1617. const BoneQuaternionAligned *RESTRICT pq2A = &q2[nBoneA];
  1618. const BoneQuaternionAligned *RESTRICT pq2B = &q2[nBoneB];
  1619. const BoneQuaternionAligned *RESTRICT pq2C = &q2[nBoneC];
  1620. const BoneQuaternionAligned *RESTRICT pq2D = &q2[nBoneD];
  1621. float *pp1A = pos1[nBoneA].Base();
  1622. float *pp1B = pos1[nBoneB].Base();
  1623. float *pp1C = pos1[nBoneC].Base();
  1624. float *pp1D = pos1[nBoneD].Base();
  1625. const float *pp2A = pos2[nBoneA].Base();
  1626. const float *pp2B = pos2[nBoneB].Base();
  1627. const float *pp2C = pos2[nBoneC].Base();
  1628. const float *pp2D = pos2[nBoneD].Base();
  1629. FourQuaternions four4q1, four4q2;
  1630. four4q1.LoadAndSwizzleAligned(pq1A,pq1B,pq1C,pq1D);
  1631. four4q2.LoadAndSwizzleAligned(pq2A,pq2B,pq2C,pq2D);
  1632. FourVectors four4Pos1, four4Pos2;
  1633. four4Pos1.LoadAndSwizzleUnaligned(pp1A,pp1B,pp1C,pp1D);
  1634. four4Pos2.LoadAndSwizzleUnaligned(pp2A,pp2B,pp2C,pp2D);
  1635. four4q1 = QuaternionAlign(four4q2, four4q1);
  1636. FourQuaternions four4Blended = QuaternionNormalize(Madd( four4q1, scale1, Mul( four4q2 , scale2 )));
  1637. // now blend the linear parts
  1638. FourVectors f4PosBlended = Madd(four4Pos1, scale1, Mul(four4Pos2, scale2));
  1639. f4PosBlended.TransposeOntoUnaligned3(*(fltx4*)pp1A, *(fltx4*)pp1B, *(fltx4*)pp1C, *(fltx4*)pp1D);
  1640. four4Blended.SwizzleAndStoreAligned(pq1A,pq1B,pq1C,pq1D);
  1641. }
  1642. #else
  1643. // horizontal 4 at a time processing
  1644. for(; (pNext = p+4) < pActiveBonesEnd; p = pNext)
  1645. {
  1646. int nBoneA = p[0], nBoneB = p[1], nBoneC = p[2], nBoneD = p[3];
  1647. //PREFETCH_CACHE_LINE(&q1[nBoneD+2],0);
  1648. //PREFETCH_CACHE_LINE(&q2[nBoneD+2],0);
  1649. //PREFETCH_CACHE_LINE(&pos1[nBoneD+2],0);
  1650. //PREFETCH_CACHE_LINE(&pos2[nBoneD+2],0);
  1651. float *RESTRICT pq1A = q1[nBoneA].Base(), *pp1A = pos1[nBoneA].Base();
  1652. float *RESTRICT pq1B = q1[nBoneB].Base(), *pp1B = pos1[nBoneB].Base();
  1653. float *RESTRICT pq1C = q1[nBoneC].Base(), *pp1C = pos1[nBoneC].Base();
  1654. float *RESTRICT pq1D = q1[nBoneD].Base(), *pp1D = pos1[nBoneD].Base();
  1655. const float *RESTRICT pq2A = q2[nBoneA].Base(), *pp2A = pos2[nBoneA].Base();
  1656. const float *RESTRICT pq2B = q2[nBoneB].Base(), *pp2B = pos2[nBoneB].Base();
  1657. const float *RESTRICT pq2C = q2[nBoneC].Base(), *pp2C = pos2[nBoneC].Base();
  1658. const float *RESTRICT pq2D = q2[nBoneD].Base(), *pp2D = pos2[nBoneD].Base();
  1659. fltx4 f4q1A = LoadAlignedSIMD(pq1A), f4q2A = LoadAlignedSIMD(pq2A);
  1660. fltx4 f4q1B = LoadAlignedSIMD(pq1B), f4q2B = LoadAlignedSIMD(pq2B);
  1661. fltx4 f4q1C = LoadAlignedSIMD(pq1C), f4q2C = LoadAlignedSIMD(pq2C);
  1662. fltx4 f4q1D = LoadAlignedSIMD(pq1D), f4q2D = LoadAlignedSIMD(pq2D);
  1663. fltx4 f4Pos1A = LoadUnaligned3SIMD(pp1A), f4Pos2A = LoadUnaligned3SIMD(pp2A);
  1664. fltx4 f4Pos1B = LoadUnaligned3SIMD(pp1B), f4Pos2B = LoadUnaligned3SIMD(pp2B);
  1665. fltx4 f4Pos1C = LoadUnaligned3SIMD(pp1C), f4Pos2C = LoadUnaligned3SIMD(pp2C);
  1666. fltx4 f4Pos1D = LoadUnaligned3SIMD(pp1D), f4Pos2D = LoadUnaligned3SIMD(pp2D);
  1667. f4q1A = BoneQuaternionAlignSIMD(f4q2A, f4q1A);
  1668. f4q1B = BoneQuaternionAlignSIMD(f4q2B, f4q1B);
  1669. f4q1C = BoneQuaternionAlignSIMD(f4q2C, f4q1C);
  1670. f4q1D = BoneQuaternionAlignSIMD(f4q2D, f4q1D);
  1671. fltx4 f4BlendedA = MulSIMD( scale2, f4q2A );
  1672. fltx4 f4BlendedB = MulSIMD( scale2, f4q2B );
  1673. fltx4 f4BlendedC = MulSIMD( scale2, f4q2C );
  1674. fltx4 f4BlendedD = MulSIMD( scale2, f4q2D );
  1675. f4BlendedA = MaddSIMD( scale1, f4q1A, f4BlendedA );
  1676. f4BlendedB = MaddSIMD( scale1, f4q1B, f4BlendedB );
  1677. f4BlendedC = MaddSIMD( scale1, f4q1C, f4BlendedC );
  1678. f4BlendedD = MaddSIMD( scale1, f4q1D, f4BlendedD );
  1679. f4BlendedA = BoneQuaternionNormalizeSIMD(f4BlendedA);
  1680. f4BlendedB = BoneQuaternionNormalizeSIMD(f4BlendedB);
  1681. f4BlendedC = BoneQuaternionNormalizeSIMD(f4BlendedC);
  1682. f4BlendedD = BoneQuaternionNormalizeSIMD(f4BlendedD);
  1683. // now blend the linear parts
  1684. fltx4 f4PosBlendedA = MaddSIMD(scale1, f4Pos1A, MulSIMD(scale2,f4Pos2A));
  1685. fltx4 f4PosBlendedB = MaddSIMD(scale1, f4Pos1B, MulSIMD(scale2,f4Pos2B));
  1686. fltx4 f4PosBlendedC = MaddSIMD(scale1, f4Pos1C, MulSIMD(scale2,f4Pos2C));
  1687. fltx4 f4PosBlendedD = MaddSIMD(scale1, f4Pos1D, MulSIMD(scale2,f4Pos2D));
  1688. //f4PosBlended = MaskedAssign(maskW, f4Pos1, f4PosBlended);
  1689. StoreAlignedSIMD(pq1A,f4BlendedA);
  1690. StoreUnaligned3SIMD(pp1A, f4PosBlendedA);
  1691. StoreAlignedSIMD(pq1B,f4BlendedB);
  1692. StoreUnaligned3SIMD(pp1B, f4PosBlendedB);
  1693. StoreAlignedSIMD(pq1C,f4BlendedC);
  1694. StoreUnaligned3SIMD(pp1C, f4PosBlendedC);
  1695. StoreAlignedSIMD(pq1D,f4BlendedD);
  1696. StoreUnaligned3SIMD(pp1D, f4PosBlendedD);
  1697. }
  1698. #endif
  1699. for(; p < pActiveBonesEnd; ++p)
  1700. {
  1701. int nBone = *p;
  1702. float *RESTRICT pq1 = q1[nBone].Base(), *RESTRICT pp1 = pos1[nBone].Base();
  1703. const float *RESTRICT pq2 = q2[nBone].Base(), *RESTRICT pp2 = pos2[nBone].Base();
  1704. fltx4 f4q1 = LoadAlignedSIMD(pq1), f4q2 = LoadAlignedSIMD(pq2);
  1705. fltx4 f4Pos1 = LoadUnaligned3SIMD(pp1), f4Pos2 = LoadUnaligned3SIMD(pp2);
  1706. f4q1 = BoneQuaternionAlignSIMD(f4q2, f4q1);
  1707. fltx4 f4Blended = MulSIMD( scale2, f4q2 );
  1708. f4Blended = MaddSIMD( scale1, f4q1, f4Blended );
  1709. f4Blended = BoneQuaternionNormalizeSIMD(f4Blended);
  1710. // now blend the linear parts
  1711. fltx4 f4PosBlended = MaddSIMD(scale1, f4Pos1, MulSIMD(scale2,f4Pos2));
  1712. //f4PosBlended = MaskedAssign(maskW, f4Pos1, f4PosBlended);
  1713. StoreAlignedSIMD(pq1,f4Blended);
  1714. StoreUnaligned3SIMD(pp1, f4PosBlended);
  1715. }
  1716. }
  1717. }
  1718. else
  1719. #endif // POSIX
  1720. {
  1721. // 360-400 ticks per loop pass
  1722. // there are usually 40-100 bones on average in a frame
  1723. for (i = 0; i < pStudioHdr->numbones(); i++)
  1724. {
  1725. // skip unused bones
  1726. if (!(pStudioHdr->boneFlags(i) & boneMask))
  1727. {
  1728. continue;
  1729. }
  1730. if (pSeqGroup)
  1731. {
  1732. j = pSeqGroup->boneMap[i];
  1733. }
  1734. else
  1735. {
  1736. j = i;
  1737. }
  1738. if (j >= 0 && seqdesc.weight( j ) > 0.0)
  1739. {
  1740. if (pStudioHdr->boneFlags(i) & BONE_FIXED_ALIGNMENT)
  1741. {
  1742. QuaternionBlendNoAlign( q2[i], q1[i], s1, q3 );
  1743. }
  1744. else
  1745. {
  1746. QuaternionBlend( q2[i], q1[i], s1, q3 );
  1747. }
  1748. q1[i][0] = q3[0];
  1749. q1[i][1] = q3[1];
  1750. q1[i][2] = q3[2];
  1751. q1[i][3] = q3[3];
  1752. pos1[i][0] = pos1[i][0] * s1 + pos2[i][0] * s2;
  1753. pos1[i][1] = pos1[i][1] * s1 + pos2[i][1] * s2;
  1754. pos1[i][2] = pos1[i][2] * s1 + pos2[i][2] * s2;
  1755. }
  1756. }
  1757. }
  1758. }
  1759. //-----------------------------------------------------------------------------
  1760. // Purpose: Scale a set of bones. Must be of type delta
  1761. //-----------------------------------------------------------------------------
  1762. void ScaleBones(
  1763. const CStudioHdr *pStudioHdr,
  1764. BoneQuaternion q1[MAXSTUDIOBONES],
  1765. BoneVector pos1[MAXSTUDIOBONES],
  1766. int sequence,
  1767. float s,
  1768. int boneMask )
  1769. {
  1770. BONE_PROFILE_FUNC();
  1771. int i, j;
  1772. Quaternion q3;
  1773. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( sequence );
  1774. virtualmodel_t *pVModel = pStudioHdr->GetVirtualModel();
  1775. const virtualgroup_t *pSeqGroup = NULL;
  1776. if (pVModel)
  1777. {
  1778. pSeqGroup = pVModel->pSeqGroup( sequence );
  1779. }
  1780. float s2 = s;
  1781. float s1 = 1.0 - s2;
  1782. for (i = 0; i < pStudioHdr->numbones(); i++)
  1783. {
  1784. // skip unused bones
  1785. if (!(pStudioHdr->boneFlags(i) & boneMask))
  1786. {
  1787. continue;
  1788. }
  1789. if (pSeqGroup)
  1790. {
  1791. j = pSeqGroup->boneMap[i];
  1792. }
  1793. else
  1794. {
  1795. j = i;
  1796. }
  1797. if (j >= 0 && seqdesc.weight( j ) > 0.0)
  1798. {
  1799. QuaternionIdentityBlend( q1[i], s1, q1[i] );
  1800. VectorScale( pos1[i], s2, pos1[i] );
  1801. }
  1802. }
  1803. }
  1804. //-----------------------------------------------------------------------------
  1805. // Purpose: resolve a global pose parameter to the specific setting for this sequence
  1806. //-----------------------------------------------------------------------------
  1807. int Studio_LocalPoseParameter( const CStudioHdr *pStudioHdr, const float poseParameter[], mstudioseqdesc_t &seqdesc, int iSequence, int iLocalIndex, float &flSetting )
  1808. {
  1809. BONE_PROFILE_FUNC();
  1810. int iPose = pStudioHdr->GetSharedPoseParameter( iSequence, seqdesc.paramindex[iLocalIndex] );
  1811. if (iPose == -1)
  1812. {
  1813. flSetting = 0;
  1814. return 0;
  1815. }
  1816. const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iPose );
  1817. float flValue = poseParameter[iPose];
  1818. if (Pose.loop)
  1819. {
  1820. float wrap = (Pose.start + Pose.end) / 2.0 + Pose.loop / 2.0;
  1821. float shift = Pose.loop - wrap;
  1822. flValue = flValue - Pose.loop * floor((flValue + shift) / Pose.loop);
  1823. }
  1824. int nIndex = 0;
  1825. if (seqdesc.posekeyindex == 0)
  1826. {
  1827. float flLocalStart = ((float)seqdesc.paramstart[iLocalIndex] - Pose.start) / (Pose.end - Pose.start);
  1828. float flLocalEnd = ((float)seqdesc.paramend[iLocalIndex] - Pose.start) / (Pose.end - Pose.start);
  1829. // convert into local range
  1830. flSetting = (flValue - flLocalStart) / (flLocalEnd - flLocalStart);
  1831. // clamp. This shouldn't ever need to happen if it's looping.
  1832. if (flSetting < 0)
  1833. flSetting = 0;
  1834. if (flSetting > 1)
  1835. flSetting = 1;
  1836. nIndex = 0;
  1837. if (seqdesc.groupsize[iLocalIndex] > 2 )
  1838. {
  1839. // estimate index
  1840. nIndex = (int)(flSetting * (seqdesc.groupsize[iLocalIndex] - 1));
  1841. if (nIndex == seqdesc.groupsize[iLocalIndex] - 1)
  1842. {
  1843. nIndex = seqdesc.groupsize[iLocalIndex] - 2;
  1844. }
  1845. flSetting = flSetting * (seqdesc.groupsize[iLocalIndex] - 1) - nIndex;
  1846. }
  1847. }
  1848. else
  1849. {
  1850. flValue = flValue * (Pose.end - Pose.start) + Pose.start;
  1851. nIndex = 0;
  1852. // FIXME: this needs to be 2D
  1853. // FIXME: this shouldn't be a linear search
  1854. while (1)
  1855. {
  1856. flSetting = (flValue - seqdesc.poseKey( iLocalIndex, nIndex )) / (seqdesc.poseKey( iLocalIndex, nIndex + 1 ) - seqdesc.poseKey( iLocalIndex, nIndex ));
  1857. /*
  1858. if (index > 0 && flSetting < 0.0)
  1859. {
  1860. index--;
  1861. continue;
  1862. }
  1863. else
  1864. */
  1865. if (nIndex < seqdesc.groupsize[iLocalIndex] - 2 && flSetting > 1.0)
  1866. {
  1867. nIndex++;
  1868. continue;
  1869. }
  1870. break;
  1871. }
  1872. // clamp.
  1873. if (flSetting < 0.0f)
  1874. flSetting = 0.0f;
  1875. if (flSetting > 1.0f)
  1876. flSetting = 1.0f;
  1877. }
  1878. return nIndex;
  1879. }
  1880. void Studio_CalcBoneToBoneTransform( const CStudioHdr *pStudioHdr, int inputBoneIndex, int outputBoneIndex, matrix3x4_t& matrixOut )
  1881. {
  1882. const mstudiobone_t *pbone = pStudioHdr->pBone( inputBoneIndex );
  1883. matrix3x4a_t inputToPose;
  1884. MatrixInvert( pbone->poseToBone, inputToPose );
  1885. ConcatTransforms( pStudioHdr->pBone( outputBoneIndex )->poseToBone, inputToPose, matrixOut );
  1886. }
  1887. //-----------------------------------------------------------------------------
  1888. // Purpose: Lookup a bone controller
  1889. //-----------------------------------------------------------------------------
  1890. static mstudiobonecontroller_t* FindController( const CStudioHdr *pStudioHdr, int iController)
  1891. {
  1892. // find first controller that matches the index
  1893. for (int i = 0; i < pStudioHdr->numbonecontrollers(); i++)
  1894. {
  1895. if (pStudioHdr->pBonecontroller( i )->inputfield == iController)
  1896. return pStudioHdr->pBonecontroller( i );
  1897. }
  1898. return NULL;
  1899. }
  1900. //-----------------------------------------------------------------------------
  1901. // Purpose: converts a ranged bone controller value into a 0..1 encoded value
  1902. // Output: ctlValue contains 0..1 encoding.
  1903. // returns clamped ranged value
  1904. //-----------------------------------------------------------------------------
  1905. float Studio_SetController( const CStudioHdr *pStudioHdr, int iController, float flValue, float &ctlValue )
  1906. {
  1907. BONE_PROFILE_FUNC();
  1908. if (! pStudioHdr)
  1909. return flValue;
  1910. mstudiobonecontroller_t *pbonecontroller = FindController(pStudioHdr, iController);
  1911. if(!pbonecontroller)
  1912. {
  1913. ctlValue = 0;
  1914. return flValue;
  1915. }
  1916. // wrap 0..360 if it's a rotational controller
  1917. if (pbonecontroller->type & (STUDIO_XR | STUDIO_YR | STUDIO_ZR))
  1918. {
  1919. // ugly hack, invert value if end < start
  1920. if (pbonecontroller->end < pbonecontroller->start)
  1921. flValue = -flValue;
  1922. // does the controller not wrap?
  1923. if (pbonecontroller->start + 359.0 >= pbonecontroller->end)
  1924. {
  1925. if (flValue > ((pbonecontroller->start + pbonecontroller->end) / 2.0) + 180)
  1926. flValue = flValue - 360;
  1927. if (flValue < ((pbonecontroller->start + pbonecontroller->end) / 2.0) - 180)
  1928. flValue = flValue + 360;
  1929. }
  1930. else
  1931. {
  1932. if (flValue > 360)
  1933. flValue = flValue - (int)(flValue / 360.0) * 360.0;
  1934. else if (flValue < 0)
  1935. flValue = flValue + (int)((flValue / -360.0) + 1) * 360.0;
  1936. }
  1937. }
  1938. ctlValue = (flValue - pbonecontroller->start) / (pbonecontroller->end - pbonecontroller->start);
  1939. if (ctlValue < 0) ctlValue = 0;
  1940. if (ctlValue > 1) ctlValue = 1;
  1941. float flReturnVal = ((1.0 - ctlValue)*pbonecontroller->start + ctlValue *pbonecontroller->end);
  1942. // ugly hack, invert value if a rotational controller and end < start
  1943. if (pbonecontroller->type & (STUDIO_XR | STUDIO_YR | STUDIO_ZR) &&
  1944. pbonecontroller->end < pbonecontroller->start )
  1945. {
  1946. flReturnVal *= -1;
  1947. }
  1948. return flReturnVal;
  1949. }
  1950. //-----------------------------------------------------------------------------
  1951. // Purpose: converts a 0..1 encoded bone controller value into a ranged value
  1952. // Output: returns ranged value
  1953. //-----------------------------------------------------------------------------
  1954. float Studio_GetController( const CStudioHdr *pStudioHdr, int iController, float ctlValue )
  1955. {
  1956. if (!pStudioHdr)
  1957. return 0.0;
  1958. mstudiobonecontroller_t *pbonecontroller = FindController(pStudioHdr, iController);
  1959. if(!pbonecontroller)
  1960. return 0;
  1961. return ctlValue * (pbonecontroller->end - pbonecontroller->start) + pbonecontroller->start;
  1962. }
  1963. //-----------------------------------------------------------------------------
  1964. // Purpose: Calculates default values for the pose parameters
  1965. // Output: fills in an array
  1966. //-----------------------------------------------------------------------------
  1967. void Studio_CalcDefaultPoseParameters( const CStudioHdr *pStudioHdr, float flPoseParameter[], int nCount )
  1968. {
  1969. int nPoseCount = pStudioHdr->GetNumPoseParameters();
  1970. int nNumParams = MIN( nCount, MAXSTUDIOPOSEPARAM );
  1971. for ( int i = 0; i < nNumParams; ++i )
  1972. {
  1973. // Default to middle of the pose parameter range
  1974. flPoseParameter[ i ] = 0.5f;
  1975. if ( i < nPoseCount )
  1976. {
  1977. const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( i );
  1978. // Want to try for a zero state. If one doesn't exist set it to .5 by default.
  1979. if ( Pose.start < 0.0f && Pose.end > 0.0f )
  1980. {
  1981. float flPoseDelta = Pose.end - Pose.start;
  1982. flPoseParameter[i] = -Pose.start / flPoseDelta;
  1983. }
  1984. }
  1985. }
  1986. }
  1987. //-----------------------------------------------------------------------------
  1988. // Purpose: converts a ranged pose parameter value into a 0..1 encoded value
  1989. // Output: ctlValue contains 0..1 encoding.
  1990. // returns clamped ranged value
  1991. //-----------------------------------------------------------------------------
  1992. float Studio_SetPoseParameter( const CStudioHdr *pStudioHdr, int iParameter, float flValue, float &ctlValue )
  1993. {
  1994. if (iParameter < 0 || iParameter >= pStudioHdr->GetNumPoseParameters())
  1995. {
  1996. ctlValue = 0;
  1997. return 0;
  1998. }
  1999. const mstudioposeparamdesc_t &PoseParam = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iParameter );
  2000. Assert( IsFinite( flValue ) );
  2001. if (PoseParam.loop)
  2002. {
  2003. float wrap = (PoseParam.start + PoseParam.end) / 2.0 + PoseParam.loop / 2.0;
  2004. float shift = PoseParam.loop - wrap;
  2005. flValue = flValue - PoseParam.loop * floor((flValue + shift) / PoseParam.loop);
  2006. }
  2007. ctlValue = (flValue - PoseParam.start) / (PoseParam.end - PoseParam.start);
  2008. if (ctlValue < 0) ctlValue = 0;
  2009. if (ctlValue > 1) ctlValue = 1;
  2010. Assert( IsFinite( ctlValue ) );
  2011. return ctlValue * (PoseParam.end - PoseParam.start) + PoseParam.start;
  2012. }
  2013. //-----------------------------------------------------------------------------
  2014. // Purpose: converts a 0..1 encoded pose parameter value into a ranged value
  2015. // Output: returns ranged value
  2016. //-----------------------------------------------------------------------------
  2017. float Studio_GetPoseParameter( const CStudioHdr *pStudioHdr, int iParameter, float ctlValue )
  2018. {
  2019. if (iParameter < 0 || iParameter >= pStudioHdr->GetNumPoseParameters())
  2020. {
  2021. return 0;
  2022. }
  2023. const mstudioposeparamdesc_t &PoseParam = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iParameter );
  2024. return ctlValue * (PoseParam.end - PoseParam.start) + PoseParam.start;
  2025. }
  2026. #pragma warning (disable : 4701)
  2027. static int ClipRayToCapsule( const Ray_t &ray, mstudiobbox_t *pbox, matrix3x4_t& matrix, trace_t &tr )
  2028. {
  2029. BONE_PROFILE_FUNC();
  2030. Vector vecCapsuleCenters[ 2 ];
  2031. VectorTransform( pbox->bbmin, matrix, vecCapsuleCenters[0] );
  2032. VectorTransform( pbox->bbmax, matrix, vecCapsuleCenters[1] );
  2033. CShapeCastResult cast;
  2034. Assert( tr.fraction >= 0 && tr.fraction <= 1.0f );
  2035. CastCapsuleRay( cast, ray.m_Start /*+start offset?*/, ray.m_Delta * tr.fraction, vecCapsuleCenters, pbox->flCapsuleRadius );
  2036. if ( cast.DidHit() )
  2037. {
  2038. tr.fraction *= cast.m_flHitTime;
  2039. if ( cast.m_bStartInSolid )
  2040. {
  2041. tr.startsolid = true;
  2042. // tr.allsolid - not computed yet
  2043. }
  2044. // tr.contents, dispFlags - not computed yet
  2045. tr.endpos = cast.m_vHitPoint;
  2046. tr.plane.normal = cast.m_vHitNormal;
  2047. //extern IVDebugOverlay *debugoverlay;
  2048. //debugoverlay->AddCapsuleOverlay( vecCapsuleCenters[ 0 ], vecCapsuleCenters[ 1 ], pbox->flCapsuleRadius, 0, 255, 0, 255, 10 );
  2049. //debugoverlay->AddLineOverlay( ray.m_Start /*+offset?*/, cast.m_vHitPoint, 0, 0, 255, 200, 0.25f, 10 );
  2050. //debugoverlay->AddLineOverlay( cast.m_vHitPoint, cast.m_vHitPoint + 4 * cast.m_vHitNormal, 0, 255, 0, 200, 0.25f, 10 );
  2051. // plane.dist and others are not computed yet
  2052. return 0; // hitside is not computed (yet?)
  2053. }
  2054. return -1;
  2055. }
  2056. //-----------------------------------------------------------------------------
  2057. // Purpose:
  2058. //-----------------------------------------------------------------------------
  2059. static int ClipRayToHitbox( const Ray_t &ray, mstudiobbox_t *pbox, matrix3x4_t& matrix, trace_t &tr )
  2060. {
  2061. const float flProjEpsilon = 0.01f;
  2062. BONE_PROFILE_FUNC();
  2063. if ( pbox->flCapsuleRadius > 0 )
  2064. {
  2065. return ClipRayToCapsule( ray, pbox, matrix, tr );
  2066. }
  2067. // scale by current t so hits shorten the ray and increase the likelihood of early outs
  2068. Vector delta2;
  2069. VectorScale( ray.m_Delta, (0.5f * tr.fraction), delta2 );
  2070. // OPTIMIZE: Store this in the box instead of computing it here
  2071. // compute center in local space
  2072. Vector boxextents;
  2073. boxextents.x = (pbox->bbmin.x + pbox->bbmax.x) * 0.5;
  2074. boxextents.y = (pbox->bbmin.y + pbox->bbmax.y) * 0.5;
  2075. boxextents.z = (pbox->bbmin.z + pbox->bbmax.z) * 0.5;
  2076. // transform to world space
  2077. Vector boxCenter;
  2078. VectorTransform( boxextents, matrix, boxCenter );
  2079. // calc extents from local center
  2080. boxextents.x = pbox->bbmax.x - boxextents.x;
  2081. boxextents.y = pbox->bbmax.y - boxextents.y;
  2082. boxextents.z = pbox->bbmax.z - boxextents.z;
  2083. // OPTIMIZE: This is optimized for world space. If the transform is fast enough, it may make more
  2084. // sense to just xform and call UTIL_ClipToBox() instead. MEASURE THIS.
  2085. // save the extents of the ray along
  2086. Vector extent, uextent;
  2087. Vector segmentCenter;
  2088. segmentCenter.x = ray.m_Start.x + delta2.x - boxCenter.x;
  2089. segmentCenter.y = ray.m_Start.y + delta2.y - boxCenter.y;
  2090. segmentCenter.z = ray.m_Start.z + delta2.z - boxCenter.z;
  2091. extent.Init();
  2092. // check box axes for separation
  2093. for ( int j = 0; j < 3; j++ )
  2094. {
  2095. extent[j] = delta2.x * matrix[0][j] + delta2.y * matrix[1][j] + delta2.z * matrix[2][j];
  2096. uextent[j] = fabsf(extent[j]);
  2097. float coord = segmentCenter.x * matrix[0][j] + segmentCenter.y * matrix[1][j] + segmentCenter.z * matrix[2][j];
  2098. coord = fabsf(coord);
  2099. if ( coord > (boxextents[j] + uextent[j]) )
  2100. return -1;
  2101. }
  2102. // now check cross axes for separation
  2103. float tmp, cextent;
  2104. Vector cross;
  2105. CrossProduct( delta2, segmentCenter, cross );
  2106. cextent = cross.x * matrix[0][0] + cross.y * matrix[1][0] + cross.z * matrix[2][0];
  2107. cextent = fabsf(cextent);
  2108. tmp = boxextents[1]*uextent[2] + boxextents[2]*uextent[1];
  2109. tmp = MAX(tmp, flProjEpsilon);
  2110. if ( cextent > tmp )
  2111. return -1;
  2112. cextent = cross.x * matrix[0][1] + cross.y * matrix[1][1] + cross.z * matrix[2][1];
  2113. cextent = fabsf(cextent);
  2114. tmp = boxextents[0]*uextent[2] + boxextents[2]*uextent[0];
  2115. tmp = MAX(tmp, flProjEpsilon);
  2116. if ( cextent > tmp )
  2117. return -1;
  2118. cextent = cross.x * matrix[0][2] + cross.y * matrix[1][2] + cross.z * matrix[2][2];
  2119. cextent = fabsf(cextent);
  2120. tmp = boxextents[0]*uextent[1] + boxextents[1]*uextent[0];
  2121. tmp = MAX(tmp, flProjEpsilon);
  2122. if ( cextent > tmp )
  2123. return -1;
  2124. Vector start;
  2125. // Compute ray start in bone space
  2126. VectorITransform( ray.m_Start, matrix, start );
  2127. // extent is delta2 in bone space, recompute delta in bone space
  2128. VectorScale( extent, 2, extent );
  2129. // delta was prescaled by the current t, so no need to see if this intersection
  2130. // is closer
  2131. trace_t boxTrace;
  2132. if ( !IntersectRayWithBox( start, extent, pbox->bbmin, pbox->bbmax, 0.0f, &boxTrace ) )
  2133. return -1;
  2134. Assert( IsFinite(boxTrace.fraction) );
  2135. tr.fraction *= boxTrace.fraction;
  2136. tr.startsolid = boxTrace.startsolid;
  2137. int hitside = boxTrace.plane.type;
  2138. if ( boxTrace.plane.normal[hitside] >= 0 )
  2139. {
  2140. hitside += 3;
  2141. }
  2142. return hitside;
  2143. }
  2144. #pragma warning (default : 4701)
  2145. //-----------------------------------------------------------------------------
  2146. // Purpose:
  2147. //-----------------------------------------------------------------------------
  2148. bool SweepBoxToStudio( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set,
  2149. matrix3x4_t **hitboxbones, int fContentsMask, trace_t &tr )
  2150. {
  2151. BONE_PROFILE_FUNC();
  2152. tr.fraction = 1.0;
  2153. tr.startsolid = false;
  2154. // OPTIMIZE: Partition these?
  2155. Ray_t clippedRay = ray;
  2156. int hitbox = -1;
  2157. for ( int i = 0; i < set->numhitboxes; i++ )
  2158. {
  2159. mstudiobbox_t *pbox = set->pHitbox(i);
  2160. // Filter based on contents mask
  2161. int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents;
  2162. if ( ( fBoneContents & fContentsMask ) == 0 )
  2163. continue;
  2164. //FIXME: Won't work with scaling!
  2165. trace_t obbTrace;
  2166. if ( IntersectRayWithOBB( clippedRay, *hitboxbones[pbox->bone], pbox->bbmin, pbox->bbmax, 0.0f, &obbTrace ) )
  2167. {
  2168. tr.startpos = obbTrace.startpos;
  2169. tr.endpos = obbTrace.endpos;
  2170. tr.plane = obbTrace.plane;
  2171. tr.startsolid = obbTrace.startsolid;
  2172. tr.allsolid = obbTrace.allsolid;
  2173. // This logic here is to shorten the ray each time to get more early outs
  2174. tr.fraction *= obbTrace.fraction;
  2175. clippedRay.m_Delta *= obbTrace.fraction;
  2176. hitbox = i;
  2177. if (tr.startsolid)
  2178. break;
  2179. }
  2180. }
  2181. if ( hitbox >= 0 )
  2182. {
  2183. tr.hitgroup = set->pHitbox(hitbox)->group;
  2184. tr.hitbox = hitbox;
  2185. const mstudiobone_t *pBone = pStudioHdr->pBone( set->pHitbox(hitbox)->bone );
  2186. tr.contents = pBone->contents | CONTENTS_HITBOX;
  2187. tr.physicsbone = pBone->physicsbone;
  2188. tr.surface.name = "**studio**";
  2189. tr.surface.flags = SURF_HITBOX;
  2190. tr.surface.surfaceProps = pBone->GetSurfaceProp();
  2191. Assert( tr.physicsbone >= 0 );
  2192. return true;
  2193. }
  2194. return false;
  2195. }
  2196. //-----------------------------------------------------------------------------
  2197. // Purpose:
  2198. //-----------------------------------------------------------------------------
  2199. bool TraceToStudio( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set,
  2200. matrix3x4_t **hitboxbones, int fContentsMask, const Vector &vecOrigin, float flScale, trace_t &tr )
  2201. {
  2202. BONE_PROFILE_FUNC();
  2203. if ( !ray.m_IsRay )
  2204. {
  2205. return SweepBoxToStudio( pProps, ray, pStudioHdr, set, hitboxbones, fContentsMask, tr );
  2206. }
  2207. tr.fraction = 1.0;
  2208. tr.startsolid = false;
  2209. // no hit yet
  2210. int hitbox = -1;
  2211. int hitside = -1;
  2212. // OPTIMIZE: Partition these?
  2213. for ( int i = 0; i < set->numhitboxes; i++ )
  2214. {
  2215. mstudiobbox_t *pbox = set->pHitbox(i);
  2216. // Filter based on contents mask
  2217. int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents;
  2218. if ( ( fBoneContents & fContentsMask ) == 0 )
  2219. continue;
  2220. // columns are axes of the bones in world space, translation is in world space
  2221. matrix3x4_t& matrix = *hitboxbones[pbox->bone];
  2222. // Because we're sending in a matrix with scale data, and because the matrix inversion in the hitbox
  2223. // code does not handle that case, we pre-scale the bones and ray down here and do our collision checks
  2224. // in unscaled space. We can then rescale the results afterwards.
  2225. int side = -1;
  2226. if ( flScale < 1.0f-FLT_EPSILON || flScale > 1.0f+FLT_EPSILON )
  2227. {
  2228. matrix3x4_t matScaled;
  2229. MatrixCopy( matrix, matScaled );
  2230. float invScale = 1.0f / flScale;
  2231. Vector vecBoneOrigin;
  2232. MatrixGetColumn( matScaled, 3, vecBoneOrigin );
  2233. // Pre-scale the origin down
  2234. Vector vecNewOrigin = vecBoneOrigin - vecOrigin;
  2235. vecNewOrigin *= invScale;
  2236. vecNewOrigin += vecOrigin;
  2237. MatrixSetColumn( vecNewOrigin, 3, matScaled );
  2238. // Scale it uniformly
  2239. VectorScale( matScaled[0], invScale, matScaled[0] );
  2240. VectorScale( matScaled[1], invScale, matScaled[1] );
  2241. VectorScale( matScaled[2], invScale, matScaled[2] );
  2242. // Pre-scale our ray as well
  2243. Vector vecRayStart = ray.m_Start - vecOrigin;
  2244. vecRayStart *= invScale;
  2245. vecRayStart += vecOrigin;
  2246. Vector vecRayDelta = ray.m_Delta * invScale;
  2247. Ray_t newRay;
  2248. newRay.Init( vecRayStart, vecRayStart + vecRayDelta );
  2249. side = ClipRayToHitbox( newRay, pbox, matScaled, tr );
  2250. }
  2251. else
  2252. {
  2253. side = ClipRayToHitbox( ray, pbox, matrix, tr );
  2254. }
  2255. if ( side >= 0 )
  2256. {
  2257. hitbox = i;
  2258. hitside = side;
  2259. }
  2260. }
  2261. if ( hitbox >= 0 )
  2262. {
  2263. mstudiobbox_t *pbox = set->pHitbox(hitbox);
  2264. VectorMA( ray.m_Start, tr.fraction, ray.m_Delta, tr.endpos );
  2265. tr.hitgroup = set->pHitbox(hitbox)->group;
  2266. tr.hitbox = hitbox;
  2267. const mstudiobone_t *pBone = pStudioHdr->pBone( pbox->bone );
  2268. tr.contents = pBone->contents | CONTENTS_HITBOX;
  2269. tr.physicsbone = pBone->physicsbone;
  2270. tr.surface.name = "**studio**";
  2271. tr.surface.flags = SURF_HITBOX;
  2272. tr.surface.surfaceProps = pBone->GetSurfaceProp();
  2273. Assert( tr.physicsbone >= 0 );
  2274. matrix3x4_t& matrix = *hitboxbones[pbox->bone];
  2275. if ( hitside >= 3 )
  2276. {
  2277. hitside -= 3;
  2278. tr.plane.normal[0] = matrix[0][hitside];
  2279. tr.plane.normal[1] = matrix[1][hitside];
  2280. tr.plane.normal[2] = matrix[2][hitside];
  2281. //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) + pbox->bbmax[hitside];
  2282. }
  2283. else
  2284. {
  2285. tr.plane.normal[0] = -matrix[0][hitside];
  2286. tr.plane.normal[1] = -matrix[1][hitside];
  2287. tr.plane.normal[2] = -matrix[2][hitside];
  2288. //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) - pbox->bbmin[hitside];
  2289. }
  2290. // simpler plane constant equation
  2291. tr.plane.dist = DotProduct( tr.endpos, tr.plane.normal );
  2292. tr.plane.type = 3;
  2293. return true;
  2294. }
  2295. return false;
  2296. }
  2297. //-----------------------------------------------------------------------------
  2298. // Purpose:
  2299. //-----------------------------------------------------------------------------
  2300. bool TraceToStudioCsgoHitgroupsPriority( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set,
  2301. matrix3x4_t **hitboxbones, int fContentsMask, const Vector &vecOrigin, float flScale, trace_t &tr )
  2302. {
  2303. BONE_PROFILE_FUNC();
  2304. if ( !ray.m_IsRay )
  2305. {
  2306. return SweepBoxToStudio( pProps, ray, pStudioHdr, set, hitboxbones, fContentsMask, tr );
  2307. }
  2308. tr.fraction = 1.0;
  2309. tr.startsolid = false;
  2310. //
  2311. // We will collect trace results depending on hit group type of hitboxes
  2312. // and prefer to hit the hitboxes in order of damage.
  2313. //
  2314. enum EHitGroupType_t
  2315. {
  2316. k_EHitGroupType_Head,
  2317. k_EHitGroupType_Stomach,
  2318. k_EHitGroupType_Chest,
  2319. k_EHitGroupType_Arms,
  2320. k_EHitGroupType_General,
  2321. k_EHitGroupType_Legs,
  2322. k_EHitGroupType_Count
  2323. };
  2324. struct HitGroupResult_t
  2325. {
  2326. trace_t m_trHitGroup;
  2327. int m_nHitbox; // index of the hitbox hit, -1 if no it
  2328. int m_nHitSide; // hit side
  2329. };
  2330. // We'll collect results here, initialize to nothing hit
  2331. HitGroupResult_t arrHitGroupResults[ k_EHitGroupType_Count ];
  2332. for ( int j = 0; j < Q_ARRAYSIZE( arrHitGroupResults ); ++ j )
  2333. {
  2334. Q_memcpy( &arrHitGroupResults[j].m_trHitGroup, &tr, sizeof( arrHitGroupResults[j].m_trHitGroup ) );
  2335. arrHitGroupResults[j].m_nHitbox = -1;
  2336. arrHitGroupResults[j].m_nHitSide = -1;
  2337. }
  2338. // OPTIMIZE: Partition these?
  2339. for ( int i = 0; i < set->numhitboxes; i++ )
  2340. {
  2341. mstudiobbox_t *pbox = set->pHitbox(i);
  2342. // Filter based on contents mask
  2343. int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents;
  2344. if ( ( fBoneContents & fContentsMask ) == 0 )
  2345. continue;
  2346. // Collect the results into appropriate hitgroup bucket
  2347. HitGroupResult_t *pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_General ];
  2348. switch ( pbox->group )
  2349. {
  2350. case 1:
  2351. pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Head ];
  2352. break;
  2353. case 3:
  2354. pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Stomach ];
  2355. break;
  2356. case 2:
  2357. pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Chest ];
  2358. break;
  2359. case 4:
  2360. case 5:
  2361. pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Arms ];
  2362. break;
  2363. case 6:
  2364. case 7:
  2365. pHitGroupResult = &arrHitGroupResults[ k_EHitGroupType_Legs ];
  2366. break;
  2367. }
  2368. Assert( IsFinite( pHitGroupResult->m_trHitGroup.fraction ) );
  2369. // columns are axes of the bones in world space, translation is in world space
  2370. matrix3x4_t& matrix = *hitboxbones[pbox->bone];
  2371. // Because we're sending in a matrix with scale data, and because the matrix inversion in the hitbox
  2372. // code does not handle that case, we pre-scale the bones and ray down here and do our collision checks
  2373. // in unscaled space. We can then rescale the results afterwards.
  2374. int side = -1;
  2375. if ( flScale < 1.0f-FLT_EPSILON || flScale > 1.0f+FLT_EPSILON )
  2376. {
  2377. matrix3x4_t matScaled;
  2378. MatrixCopy( matrix, matScaled );
  2379. matrix3x4_t matOrientation;
  2380. AngleMatrix(pbox->angOffsetOrientation, matOrientation);
  2381. MatrixMultiply(matScaled, matOrientation, matScaled);
  2382. float invScale = 1.0f / flScale;
  2383. Vector vecBoneOrigin;
  2384. MatrixGetColumn( matScaled, 3, vecBoneOrigin );
  2385. // Pre-scale the origin down
  2386. Vector vecNewOrigin = vecBoneOrigin - vecOrigin;
  2387. vecNewOrigin *= invScale;
  2388. vecNewOrigin += vecOrigin;
  2389. MatrixSetColumn( vecNewOrigin, 3, matScaled );
  2390. // Scale it uniformly
  2391. VectorScale( matScaled[0], invScale, matScaled[0] );
  2392. VectorScale( matScaled[1], invScale, matScaled[1] );
  2393. VectorScale( matScaled[2], invScale, matScaled[2] );
  2394. // Pre-scale our ray as well
  2395. Vector vecRayStart = ray.m_Start - vecOrigin;
  2396. vecRayStart *= invScale;
  2397. vecRayStart += vecOrigin;
  2398. Vector vecRayDelta = ray.m_Delta * invScale;
  2399. Ray_t newRay;
  2400. newRay.Init( vecRayStart, vecRayStart + vecRayDelta );
  2401. side = ClipRayToHitbox( newRay, pbox, matScaled, pHitGroupResult->m_trHitGroup );
  2402. }
  2403. else
  2404. {
  2405. matrix3x4_t matCopy;
  2406. MatrixCopy( matrix, matCopy );
  2407. matrix3x4_t matOrientation;
  2408. AngleMatrix(pbox->angOffsetOrientation, matOrientation);
  2409. MatrixMultiply(matCopy, matOrientation, matCopy);
  2410. side = ClipRayToHitbox( ray, pbox, matCopy, pHitGroupResult->m_trHitGroup );
  2411. }
  2412. Assert( IsFinite( pHitGroupResult->m_trHitGroup.fraction ) );
  2413. if ( side >= 0 )
  2414. {
  2415. pHitGroupResult->m_nHitbox = i;
  2416. pHitGroupResult->m_nHitSide = side;
  2417. }
  2418. }
  2419. //
  2420. // Now based on bucketing hitbox group results determine which hitbox we will return
  2421. // and copy the trace results to the output parameter.
  2422. //
  2423. int hitbox = -1;
  2424. int hitside = -1;
  2425. // CSGO specific hitbox computation - characters' neck hitbox is classified as a headshot, but
  2426. // it deeply interpenetrates the chest. We don't want players shooting at the middle of the chest
  2427. // to register a headshot by penetrating into neck through chest or stomach, so if we have a
  2428. // headshot trace make sure that it doesn't occur by penetrating chest or stomach.
  2429. if ( arrHitGroupResults[k_EHitGroupType_Head].m_nHitbox >= 0 )
  2430. {
  2431. // We have a potential headshot, check if it's penetrating via stomach or chest
  2432. for ( int j = k_EHitGroupType_Stomach; j <= k_EHitGroupType_Chest; ++ j )
  2433. {
  2434. if ( arrHitGroupResults[j].m_trHitGroup.fraction < arrHitGroupResults[k_EHitGroupType_Head].m_trHitGroup.fraction )
  2435. {
  2436. // The bullet first hit the stomach/chest hitbox, so ignore the headshot
  2437. arrHitGroupResults[k_EHitGroupType_Head].m_nHitbox = -1;
  2438. break;
  2439. }
  2440. }
  2441. }
  2442. // Now pick the hitbox hit with the highest priority for damage
  2443. for ( int j = 0; j < Q_ARRAYSIZE( arrHitGroupResults ); ++ j )
  2444. {
  2445. if ( arrHitGroupResults[j].m_nHitbox >= 0 )
  2446. {
  2447. hitbox = arrHitGroupResults[j].m_nHitbox;
  2448. hitside = arrHitGroupResults[j].m_nHitSide;
  2449. Q_memcpy( &tr, &arrHitGroupResults[j].m_trHitGroup, sizeof( arrHitGroupResults[j].m_trHitGroup ) );
  2450. break;
  2451. }
  2452. }
  2453. if ( hitbox >= 0 )
  2454. {
  2455. mstudiobbox_t *pbox = set->pHitbox(hitbox);
  2456. VectorMA( ray.m_Start, tr.fraction, ray.m_Delta, tr.endpos );
  2457. tr.hitgroup = set->pHitbox(hitbox)->group;
  2458. tr.hitbox = hitbox;
  2459. const mstudiobone_t *pBone = pStudioHdr->pBone( pbox->bone );
  2460. tr.contents = pBone->contents | CONTENTS_HITBOX;
  2461. tr.physicsbone = pBone->physicsbone;
  2462. tr.surface.name = "**studio**";
  2463. tr.surface.flags = SURF_HITBOX;
  2464. tr.surface.surfaceProps = pBone->GetSurfaceProp();
  2465. Assert( tr.physicsbone >= 0 );
  2466. matrix3x4_t& matrix = *hitboxbones[pbox->bone];
  2467. if ( hitside >= 3 )
  2468. {
  2469. hitside -= 3;
  2470. tr.plane.normal[0] = matrix[0][hitside];
  2471. tr.plane.normal[1] = matrix[1][hitside];
  2472. tr.plane.normal[2] = matrix[2][hitside];
  2473. //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) + pbox->bbmax[hitside];
  2474. }
  2475. else
  2476. {
  2477. tr.plane.normal[0] = -matrix[0][hitside];
  2478. tr.plane.normal[1] = -matrix[1][hitside];
  2479. tr.plane.normal[2] = -matrix[2][hitside];
  2480. //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) - pbox->bbmin[hitside];
  2481. }
  2482. // simpler plane constant equation
  2483. tr.plane.dist = DotProduct( tr.endpos, tr.plane.normal );
  2484. tr.plane.type = 3;
  2485. return true;
  2486. }
  2487. return false;
  2488. }
  2489. //-----------------------------------------------------------------------------
  2490. /**
  2491. * TERROR: Version of TraceToStudio that favors certain high-damage hitgroups such as the head
  2492. */
  2493. bool TraceToStudioGrouped( IPhysicsSurfaceProps *pProps, const Ray_t& ray, CStudioHdr *pStudioHdr, mstudiohitboxset_t *set,
  2494. matrix3x4_t **hitboxbones, int fContentsMask, trace_t &tr, const CUtlVector< int > &sortedHitgroups )
  2495. {
  2496. BONE_PROFILE_FUNC();
  2497. if ( !ray.m_IsRay )
  2498. {
  2499. return SweepBoxToStudio( pProps, ray, pStudioHdr, set, hitboxbones, fContentsMask, tr );
  2500. }
  2501. tr.fraction = 1.0;
  2502. tr.startsolid = false;
  2503. // no hit yet
  2504. int hitbox = -1;
  2505. int hitside = -1;
  2506. for ( int n=0; n<sortedHitgroups.Count(); ++n )
  2507. {
  2508. // OPTIMIZE: Partition these?
  2509. for ( int i = 0; i < set->numhitboxes; i++ )
  2510. {
  2511. mstudiobbox_t *pbox = set->pHitbox(i);
  2512. if ( pbox->group != sortedHitgroups[n] )
  2513. continue;
  2514. // Filter based on contents mask
  2515. int fBoneContents = pStudioHdr->pBone( pbox->bone )->contents;
  2516. if ( ( fBoneContents & fContentsMask ) == 0 )
  2517. continue;
  2518. // columns are axes of the bones in world space, translation is in world space
  2519. matrix3x4_t& matrix = *hitboxbones[pbox->bone];
  2520. int side = ClipRayToHitbox( ray, pbox, matrix, tr );
  2521. if ( side >= 0 )
  2522. {
  2523. hitbox = i;
  2524. hitside = side;
  2525. }
  2526. }
  2527. // If a high damage hitgroup was traced, stop here (ignore closer, lower-damage hitgroups)
  2528. if ( hitbox >= 0 )
  2529. {
  2530. break;
  2531. }
  2532. }
  2533. if ( hitbox >= 0 )
  2534. {
  2535. mstudiobbox_t *pbox = set->pHitbox(hitbox);
  2536. VectorMA( ray.m_Start, tr.fraction, ray.m_Delta, tr.endpos );
  2537. tr.hitgroup = set->pHitbox(hitbox)->group;
  2538. tr.hitbox = hitbox;
  2539. const mstudiobone_t *pBone = pStudioHdr->pBone( pbox->bone );
  2540. tr.contents = pBone->contents | CONTENTS_HITBOX;
  2541. tr.physicsbone = pBone->physicsbone;
  2542. tr.surface.surfaceProps = pBone->GetSurfaceProp();
  2543. tr.surface.name = "**studio**";
  2544. tr.surface.flags = SURF_HITBOX;
  2545. Assert( tr.physicsbone >= 0 );
  2546. matrix3x4_t& matrix = *hitboxbones[pbox->bone];
  2547. if ( hitside >= 3 )
  2548. {
  2549. hitside -= 3;
  2550. tr.plane.normal[0] = matrix[0][hitside];
  2551. tr.plane.normal[1] = matrix[1][hitside];
  2552. tr.plane.normal[2] = matrix[2][hitside];
  2553. //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) + pbox->bbmax[hitside];
  2554. }
  2555. else
  2556. {
  2557. tr.plane.normal[0] = -matrix[0][hitside];
  2558. tr.plane.normal[1] = -matrix[1][hitside];
  2559. tr.plane.normal[2] = -matrix[2][hitside];
  2560. //tr.plane.dist = DotProduct( tr.plane.normal, Vector(matrix[0][3], matrix[1][3], matrix[2][3] ) ) - pbox->bbmin[hitside];
  2561. }
  2562. // simpler plane constant equation
  2563. tr.plane.dist = DotProduct( tr.endpos, tr.plane.normal );
  2564. tr.plane.type = 3;
  2565. return true;
  2566. }
  2567. return false;
  2568. }
  2569. //-----------------------------------------------------------------------------
  2570. // Purpose: returns array of animations and weightings for a sequence based on current pose parameters
  2571. //-----------------------------------------------------------------------------
  2572. void Studio_SeqAnims( const CStudioHdr *pStudioHdr, mstudioseqdesc_t &seqdesc, int iSequence, const float poseParameter[], mstudioanimdesc_t *panim[4], float *weight )
  2573. {
  2574. BONE_PROFILE_FUNC();
  2575. #if _DEBUG
  2576. VPROF_INCREMENT_COUNTER("SEQ_ANIMS",1);
  2577. #endif
  2578. if (!pStudioHdr || iSequence >= pStudioHdr->GetNumSeq())
  2579. {
  2580. weight[0] = weight[1] = weight[2] = weight[3] = 0.0;
  2581. return;
  2582. }
  2583. float s0 = 0, s1 = 0;
  2584. int i0 = Studio_LocalPoseParameter( pStudioHdr, poseParameter, seqdesc, iSequence, 0, s0 );
  2585. int i1 = Studio_LocalPoseParameter( pStudioHdr, poseParameter, seqdesc, iSequence, 1, s1 );
  2586. panim[0] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0 , i1 ) ) );
  2587. weight[0] = (1 - s0) * (1 - s1);
  2588. panim[1] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0+1, i1 ) ) );
  2589. weight[1] = (s0) * (1 - s1);
  2590. panim[2] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0 , i1+1 ) ) );
  2591. weight[2] = (1 - s0) * (s1);
  2592. panim[3] = &((CStudioHdr *)pStudioHdr)->pAnimdesc( ((CStudioHdr *)pStudioHdr)->iRelativeAnim( iSequence, seqdesc.anim( i0+1, i1+1 ) ) );
  2593. weight[3] = (s0) * (s1);
  2594. Assert( weight[0] >= 0.0f && weight[1] >= 0.0f && weight[2] >= 0.0f && weight[3] >= 0.0f );
  2595. }
  2596. //-----------------------------------------------------------------------------
  2597. // Purpose: returns max frame number for a sequence
  2598. //-----------------------------------------------------------------------------
  2599. int Studio_MaxFrame( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[] )
  2600. {
  2601. mstudioanimdesc_t *panim[4];
  2602. float weight[4];
  2603. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2604. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2605. float maxFrame = 0;
  2606. for (int i = 0; i < 4; i++)
  2607. {
  2608. if (weight[i] > 0)
  2609. {
  2610. maxFrame += panim[i]->numframes * weight[i];
  2611. }
  2612. }
  2613. if ( maxFrame > 1 )
  2614. maxFrame -= 1;
  2615. // FIXME: why does the weights sometimes not exactly add it 1.0 and this sometimes rounds down?
  2616. return (maxFrame + 0.01);
  2617. }
  2618. //-----------------------------------------------------------------------------
  2619. // Purpose: returns frames per second of a sequence
  2620. //-----------------------------------------------------------------------------
  2621. float Studio_FPS( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[] )
  2622. {
  2623. mstudioanimdesc_t *panim[4];
  2624. float weight[4];
  2625. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2626. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2627. float t = 0;
  2628. for (int i = 0; i < 4; i++)
  2629. {
  2630. if (weight[i] > 0)
  2631. {
  2632. t += panim[i]->fps * weight[i];
  2633. }
  2634. }
  2635. return t;
  2636. }
  2637. //-----------------------------------------------------------------------------
  2638. // Purpose: returns cycles per second of a sequence (cycles/second)
  2639. //-----------------------------------------------------------------------------
  2640. float Studio_CPS( const CStudioHdr *pStudioHdr, mstudioseqdesc_t &seqdesc, int iSequence, const float poseParameter[] )
  2641. {
  2642. mstudioanimdesc_t *panim[4];
  2643. float weight[4];
  2644. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2645. float t = 0;
  2646. for (int i = 0; i < 4; i++)
  2647. {
  2648. if (weight[i] > 0 && panim[i]->numframes > 1)
  2649. {
  2650. t += (panim[i]->fps / (panim[i]->numframes - 1)) * weight[i];
  2651. }
  2652. }
  2653. // FIXME: add support for more than just start 0 and end 0 pose param layers
  2654. for (int j = 0; j < seqdesc.numautolayers; j++)
  2655. {
  2656. mstudioautolayer_t *pLayer = seqdesc.pAutolayer( j );
  2657. if (pLayer->flags & STUDIO_AL_LOCAL)
  2658. continue;
  2659. float layerWeight = 0;
  2660. int iSequenceLocal = pStudioHdr->iRelativeSeq( iSequence, pLayer->iSequence );
  2661. if ( pLayer->start == 0 && pLayer->end == 0 && (pLayer->flags & STUDIO_AL_POSE) )
  2662. {
  2663. int iPose = pStudioHdr->GetSharedPoseParameter( iSequenceLocal, pLayer->iPose );
  2664. if (iPose == -1)
  2665. continue;
  2666. const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iPose );
  2667. float s = poseParameter[ iPose ] * (Pose.end - Pose.start) + Pose.start;
  2668. Assert( (pLayer->tail - pLayer->peak) != 0 );
  2669. s = clamp( (s - pLayer->peak) / (pLayer->tail - pLayer->peak), 0, 1 );
  2670. if (pLayer->flags & STUDIO_AL_SPLINE)
  2671. {
  2672. s = SimpleSpline( s );
  2673. }
  2674. layerWeight = seqdesc.weight(0) * s;
  2675. }
  2676. if ( layerWeight )
  2677. {
  2678. mstudioseqdesc_t &seqdescLocal = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequenceLocal );
  2679. Studio_SeqAnims( pStudioHdr, seqdescLocal, iSequenceLocal, poseParameter, panim, weight );
  2680. float flLocalT = 0;
  2681. for (int i = 0; i < 4; i++)
  2682. {
  2683. if (weight[i] > 0 && panim[i]->numframes > 1)
  2684. {
  2685. flLocalT += (panim[i]->fps / (panim[i]->numframes - 1)) * weight[i];
  2686. }
  2687. }
  2688. if ( flLocalT )
  2689. {
  2690. t = Lerp( layerWeight, t, flLocalT );
  2691. }
  2692. }
  2693. }
  2694. return t;
  2695. }
  2696. //-----------------------------------------------------------------------------
  2697. // Purpose: returns length (in seconds) of a sequence (seconds/cycle)
  2698. //-----------------------------------------------------------------------------
  2699. float Studio_Duration( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[] )
  2700. {
  2701. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2702. float cps = Studio_CPS( pStudioHdr, seqdesc, iSequence, poseParameter );
  2703. if( cps == 0 )
  2704. return 0.0f;
  2705. return 1.0f/cps;
  2706. }
  2707. //-----------------------------------------------------------------------------
  2708. // Purpose: calculate changes in position and angle relative to the start of an animations cycle
  2709. // Output: updated position and angle, relative to the origin
  2710. // returns false if animation is not a movement animation
  2711. //-----------------------------------------------------------------------------
  2712. bool Studio_AnimPosition( mstudioanimdesc_t *panim, float flCycle, Vector &vecPos, QAngle &vecAngle )
  2713. {
  2714. BONE_PROFILE_FUNC();
  2715. float prevframe = 0;
  2716. vecPos.Init( );
  2717. vecAngle.Init( );
  2718. if (panim->nummovements == 0)
  2719. return false;
  2720. int iLoops = 0;
  2721. if (flCycle > 1.0)
  2722. {
  2723. iLoops = (int)flCycle;
  2724. }
  2725. else if (flCycle < 0.0)
  2726. {
  2727. iLoops = (int)flCycle - 1;
  2728. }
  2729. flCycle = flCycle - iLoops;
  2730. float flFrame = flCycle * (panim->numframes - 1);
  2731. for (int i = 0; i < panim->nummovements; i++)
  2732. {
  2733. mstudiomovement_t *pmove = panim->pMovement( i );
  2734. if (pmove->endframe >= flFrame)
  2735. {
  2736. float f = (flFrame - prevframe) / (pmove->endframe - prevframe);
  2737. float d = pmove->v0 * f + 0.5 * (pmove->v1 - pmove->v0) * f * f;
  2738. vecPos = vecPos + d * pmove->vector;
  2739. vecAngle.y = vecAngle.y * (1 - f) + pmove->angle * f;
  2740. if (iLoops != 0)
  2741. {
  2742. mstudiomovement_t *pmove = panim->pMovement( panim->nummovements - 1 );
  2743. vecPos = vecPos + iLoops * pmove->position;
  2744. vecAngle.y = vecAngle.y + iLoops * pmove->angle;
  2745. }
  2746. return true;
  2747. }
  2748. else
  2749. {
  2750. prevframe = pmove->endframe;
  2751. vecPos = pmove->position;
  2752. vecAngle.y = pmove->angle;
  2753. }
  2754. }
  2755. return false;
  2756. }
  2757. //-----------------------------------------------------------------------------
  2758. // Purpose: calculate instantaneous velocity in ips at a given point
  2759. // in the animations cycle
  2760. // Output: velocity vector, relative to identity orientation
  2761. // returns false if animation is not a movement animation
  2762. //-----------------------------------------------------------------------------
  2763. bool Studio_AnimVelocity( mstudioanimdesc_t *panim, float flCycle, Vector &vecVelocity )
  2764. {
  2765. float prevframe = 0;
  2766. float flFrame = flCycle * (panim->numframes - 1);
  2767. flFrame = flFrame - (int)(flFrame / (panim->numframes - 1));
  2768. for (int i = 0; i < panim->nummovements; i++)
  2769. {
  2770. mstudiomovement_t *pmove = panim->pMovement( i );
  2771. if (pmove->endframe >= flFrame)
  2772. {
  2773. float f = (flFrame - prevframe) / (pmove->endframe - prevframe);
  2774. float vel = pmove->v0 * (1 - f) + pmove->v1 * f;
  2775. // scale from per block to per sec velocity
  2776. vel = vel * panim->fps / (pmove->endframe - prevframe);
  2777. vecVelocity = pmove->vector * vel;
  2778. return true;
  2779. }
  2780. else
  2781. {
  2782. prevframe = pmove->endframe;
  2783. }
  2784. }
  2785. return false;
  2786. }
  2787. //-----------------------------------------------------------------------------
  2788. // Purpose: calculate changes in position and angle between two points in an animation cycle
  2789. // Output: updated position and angle, relative to CycleFrom being at the origin
  2790. // returns false if animation is not a movement animation
  2791. //-----------------------------------------------------------------------------
  2792. bool Studio_AnimMovement( mstudioanimdesc_t *panim, float flCycleFrom, float flCycleTo, Vector &deltaPos, QAngle &deltaAngle )
  2793. {
  2794. if (panim->nummovements == 0)
  2795. return false;
  2796. Vector startPos;
  2797. QAngle startA;
  2798. Studio_AnimPosition( panim, flCycleFrom, startPos, startA );
  2799. Vector endPos;
  2800. QAngle endA;
  2801. Studio_AnimPosition( panim, flCycleTo, endPos, endA );
  2802. Vector tmp = endPos - startPos;
  2803. deltaAngle.y = endA.y - startA.y;
  2804. VectorYawRotate( tmp, -startA.y, deltaPos );
  2805. return true;
  2806. }
  2807. //-----------------------------------------------------------------------------
  2808. // Purpose: finds how much of an animation to play to move given linear distance
  2809. //-----------------------------------------------------------------------------
  2810. float Studio_FindAnimDistance( mstudioanimdesc_t *panim, float flDist )
  2811. {
  2812. float prevframe = 0;
  2813. if (flDist <= 0)
  2814. return 0.0;
  2815. for (int i = 0; i < panim->nummovements; i++)
  2816. {
  2817. mstudiomovement_t *pmove = panim->pMovement( i );
  2818. float flMove = (pmove->v0 + pmove->v1) * 0.5;
  2819. if (flMove >= flDist)
  2820. {
  2821. float root1, root2;
  2822. // d = V0 * t + 1/2 (V1-V0) * t^2
  2823. if (SolveQuadratic( 0.5 * (pmove->v1 - pmove->v0), pmove->v0, -flDist, root1, root2 ))
  2824. {
  2825. float cpf = 1.0 / (panim->numframes - 1); // cycles per frame
  2826. return (prevframe + root1 * (pmove->endframe - prevframe)) * cpf;
  2827. }
  2828. return 0.0;
  2829. }
  2830. else
  2831. {
  2832. flDist -= flMove;
  2833. prevframe = pmove->endframe;
  2834. }
  2835. }
  2836. return 1.0;
  2837. }
  2838. //-----------------------------------------------------------------------------
  2839. // Purpose: calculate changes in position and angle between two points in a sequences cycle
  2840. // Output: updated position and angle, relative to CycleFrom being at the origin
  2841. // returns false if sequence is not a movement sequence
  2842. //-----------------------------------------------------------------------------
  2843. bool Studio_SeqMovement( const CStudioHdr *pStudioHdr, int iSequence, float flCycleFrom, float flCycleTo, const float poseParameter[], Vector &deltaPos, QAngle &deltaAngles )
  2844. {
  2845. mstudioanimdesc_t *panim[4];
  2846. float weight[4];
  2847. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2848. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2849. deltaPos.Init( );
  2850. deltaAngles.Init( );
  2851. bool found = false;
  2852. for (int i = 0; i < 4; i++)
  2853. {
  2854. if (weight[i])
  2855. {
  2856. Vector localPos;
  2857. QAngle localAngles;
  2858. localPos.Init();
  2859. localAngles.Init();
  2860. if (Studio_AnimMovement( panim[i], flCycleFrom, flCycleTo, localPos, localAngles ))
  2861. {
  2862. found = true;
  2863. deltaPos = deltaPos + localPos * weight[i];
  2864. // FIXME: this makes no sense
  2865. deltaAngles = deltaAngles + localAngles * weight[i];
  2866. }
  2867. else if (!(panim[i]->flags & STUDIO_DELTA) && panim[i]->nummovements == 0 && seqdesc.weight(0) > 0.0)
  2868. {
  2869. found = true;
  2870. }
  2871. }
  2872. }
  2873. // FIXME: add support for more than just start 0 and end 0 pose param layers (currently no cycle handling or angular delta)
  2874. for (int j = 0; j < seqdesc.numautolayers; j++)
  2875. {
  2876. mstudioautolayer_t *pLayer = seqdesc.pAutolayer( j );
  2877. if (pLayer->flags & STUDIO_AL_LOCAL)
  2878. continue;
  2879. float layerWeight = 0;
  2880. int iSequenceLocal = pStudioHdr->iRelativeSeq( iSequence, pLayer->iSequence );
  2881. if ( pLayer->start == 0 && pLayer->end == 0 && (pLayer->flags & STUDIO_AL_POSE) )
  2882. {
  2883. int iPose = pStudioHdr->GetSharedPoseParameter( iSequenceLocal, pLayer->iPose );
  2884. if (iPose == -1)
  2885. continue;
  2886. const mstudioposeparamdesc_t &Pose = ((CStudioHdr *)pStudioHdr)->pPoseParameter( iPose );
  2887. float s = poseParameter[ iPose ] * (Pose.end - Pose.start) + Pose.start;
  2888. Assert( (pLayer->tail - pLayer->peak) != 0 );
  2889. s = clamp( (s - pLayer->peak) / (pLayer->tail - pLayer->peak), 0, 1 );
  2890. if (pLayer->flags & STUDIO_AL_SPLINE)
  2891. {
  2892. s = SimpleSpline( s );
  2893. }
  2894. layerWeight = seqdesc.weight(0) * s;
  2895. }
  2896. if ( layerWeight )
  2897. {
  2898. Vector layerPos;
  2899. //QAngle layerAngles;
  2900. layerPos.Init();
  2901. //layerAngles.Init();
  2902. bool bLayerFound = false;
  2903. mstudioseqdesc_t &seqdescLocal = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequenceLocal );
  2904. Studio_SeqAnims( pStudioHdr, seqdescLocal, iSequenceLocal, poseParameter, panim, weight );
  2905. for (int i = 0; i < 4; i++)
  2906. {
  2907. if (weight[i])
  2908. {
  2909. Vector localPos;
  2910. QAngle localAngles;
  2911. localPos.Init();
  2912. //localAngles.Init();
  2913. if ( Studio_AnimMovement( panim[i], flCycleFrom, flCycleTo, localPos, localAngles ) )
  2914. {
  2915. bLayerFound = true;
  2916. layerPos = layerPos + localPos * weight[i];
  2917. // FIXME: do angles
  2918. //layerAngles = layerAngles + localAngles * weight[i];
  2919. }
  2920. }
  2921. }
  2922. if ( bLayerFound )
  2923. {
  2924. deltaPos = Lerp( layerWeight, deltaPos, layerPos );
  2925. }
  2926. }
  2927. }
  2928. return found;
  2929. }
  2930. //-----------------------------------------------------------------------------
  2931. // Purpose: calculate changes in position and angle between two points in a sequences cycle
  2932. // Output: updated position and angle, relative to CycleFrom being at the origin
  2933. // returns false if sequence is not a movement sequence
  2934. //-----------------------------------------------------------------------------
  2935. float Studio_SeqMovementAndDuration( const CStudioHdr *pStudioHdr, int iSequence, float flCycleFrom, float flCycleTo, const float poseParameter[], Vector &deltaPos )
  2936. {
  2937. mstudioanimdesc_t *panim[4];
  2938. float weight[4];
  2939. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2940. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2941. deltaPos.Init( );
  2942. Vector localPos;
  2943. QAngle localAngles;
  2944. float t = 0;
  2945. for ( int i = 0; i < 4; i++ )
  2946. {
  2947. if ( weight[i] == 0.0f )
  2948. continue;
  2949. if ( panim[i]->numframes > 1 )
  2950. {
  2951. t += ( panim[i]->fps / ( panim[i]->numframes - 1 ) ) * weight[i];
  2952. }
  2953. if ( Studio_AnimMovement( panim[i], flCycleFrom, flCycleTo, localPos, localAngles ) )
  2954. {
  2955. VectorMA( deltaPos, weight[i], localPos, deltaPos );
  2956. }
  2957. }
  2958. return ( t != 0.0f ) ? 1.0f / t : 0.0f;
  2959. }
  2960. //-----------------------------------------------------------------------------
  2961. // Purpose: calculate instantaneous velocity in ips at a given point in the sequence's cycle
  2962. // Output: velocity vector, relative to identity orientation
  2963. // returns false if sequence is not a movement sequence
  2964. //-----------------------------------------------------------------------------
  2965. bool Studio_SeqVelocity( const CStudioHdr *pStudioHdr, int iSequence, float flCycle, const float poseParameter[], Vector &vecVelocity )
  2966. {
  2967. mstudioanimdesc_t *panim[4];
  2968. float weight[4];
  2969. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2970. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2971. vecVelocity.Init( );
  2972. bool found = false;
  2973. for (int i = 0; i < 4; i++)
  2974. {
  2975. if (weight[i])
  2976. {
  2977. Vector vecLocalVelocity;
  2978. if (Studio_AnimVelocity( panim[i], flCycle, vecLocalVelocity ))
  2979. {
  2980. vecVelocity = vecVelocity + vecLocalVelocity * weight[i];
  2981. found = true;
  2982. }
  2983. }
  2984. }
  2985. return found;
  2986. }
  2987. //-----------------------------------------------------------------------------
  2988. // Purpose: finds how much of an sequence to play to move given linear distance
  2989. //-----------------------------------------------------------------------------
  2990. float Studio_FindSeqDistance( const CStudioHdr *pStudioHdr, int iSequence, const float poseParameter[], float flDist )
  2991. {
  2992. mstudioanimdesc_t *panim[4];
  2993. float weight[4];
  2994. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  2995. Studio_SeqAnims( pStudioHdr, seqdesc, iSequence, poseParameter, panim, weight );
  2996. float flCycle = 0;
  2997. for (int i = 0; i < 4; i++)
  2998. {
  2999. if (weight[i])
  3000. {
  3001. float flLocalCycle = Studio_FindAnimDistance( panim[i], flDist );
  3002. flCycle = flCycle + flLocalCycle * weight[i];
  3003. }
  3004. }
  3005. return flCycle;
  3006. }
  3007. //-----------------------------------------------------------------------------
  3008. // Purpose: lookup attachment by name
  3009. //-----------------------------------------------------------------------------
  3010. int Studio_FindAttachment( const CStudioHdr *pStudioHdr, const char *pAttachmentName )
  3011. {
  3012. if ( pStudioHdr && pStudioHdr->SequencesAvailable() )
  3013. {
  3014. // Extract the bone index from the name
  3015. for (int i = 0; i < pStudioHdr->GetNumAttachments(); i++)
  3016. {
  3017. if (!stricmp(pAttachmentName,((CStudioHdr *)pStudioHdr)->pAttachment(i).pszName( )))
  3018. {
  3019. return i;
  3020. }
  3021. }
  3022. }
  3023. return -1;
  3024. }
  3025. //-----------------------------------------------------------------------------
  3026. // Purpose: lookup attachments by substring. Randomly return one of the matching attachments.
  3027. //-----------------------------------------------------------------------------
  3028. int Studio_FindRandomAttachment( const CStudioHdr *pStudioHdr, const char *pAttachmentName )
  3029. {
  3030. if ( pStudioHdr )
  3031. {
  3032. // First move them all matching attachments into a list
  3033. CUtlVector<int> matchingAttachments;
  3034. // Extract the bone index from the name
  3035. for (int i = 0; i < pStudioHdr->GetNumAttachments(); i++)
  3036. {
  3037. if ( strstr( ((CStudioHdr *)pStudioHdr)->pAttachment(i).pszName(), pAttachmentName ) )
  3038. {
  3039. matchingAttachments.AddToTail(i);
  3040. }
  3041. }
  3042. // Then randomly return one of the attachments
  3043. if ( matchingAttachments.Count() > 0 )
  3044. return matchingAttachments[ RandomInt( 0, matchingAttachments.Count()-1 ) ];
  3045. }
  3046. return -1;
  3047. }
  3048. //-----------------------------------------------------------------------------
  3049. // Purpose: lookup bone by name
  3050. //-----------------------------------------------------------------------------
  3051. int Studio_BoneIndexByName( const CStudioHdr *pStudioHdr, const char *pName )
  3052. {
  3053. // binary search for the bone matching pName
  3054. int start = 0, end = pStudioHdr->numbones()-1;
  3055. const byte *pBoneTable = pStudioHdr->GetBoneTableSortedByName();
  3056. const mstudiobone_t *pbones = pStudioHdr->pBone( 0 );
  3057. while (start <= end)
  3058. {
  3059. int mid = (start + end) >> 1;
  3060. int cmp = Q_stricmp( pbones[pBoneTable[mid]].pszName(), pName );
  3061. if ( cmp < 0 )
  3062. {
  3063. start = mid + 1;
  3064. }
  3065. else if ( cmp > 0 )
  3066. {
  3067. end = mid - 1;
  3068. }
  3069. else
  3070. {
  3071. return pBoneTable[mid];
  3072. }
  3073. }
  3074. return -1;
  3075. }
  3076. const char *Studio_GetDefaultSurfaceProps( CStudioHdr *pstudiohdr )
  3077. {
  3078. return pstudiohdr->pszSurfaceProp();
  3079. }
  3080. float Studio_GetMass( CStudioHdr *pstudiohdr )
  3081. {
  3082. return pstudiohdr->mass();
  3083. }
  3084. //-----------------------------------------------------------------------------
  3085. // Purpose: return pointer to sequence key value buffer
  3086. //-----------------------------------------------------------------------------
  3087. const char *Studio_GetKeyValueText( const CStudioHdr *pStudioHdr, int iSequence )
  3088. {
  3089. if (pStudioHdr && pStudioHdr->SequencesAvailable())
  3090. {
  3091. if (iSequence >= 0 && iSequence < pStudioHdr->GetNumSeq())
  3092. {
  3093. return ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence ).KeyValueText();
  3094. }
  3095. }
  3096. return NULL;
  3097. }
  3098. bool Studio_PrefetchSequence( const CStudioHdr *pStudioHdr, int iSequence )
  3099. {
  3100. bool pendingload = false;
  3101. mstudioseqdesc_t &seqdesc = ((CStudioHdr *)pStudioHdr)->pSeqdesc( iSequence );
  3102. int size0 = seqdesc.groupsize[ 0 ];
  3103. int size1 = seqdesc.groupsize[ 1 ];
  3104. for ( int i = 0; i < size0; ++i )
  3105. {
  3106. for ( int j = 0; j < size1; ++j )
  3107. {
  3108. mstudioanimdesc_t &animdesc = ((CStudioHdr *)pStudioHdr)->pAnimdesc( seqdesc.anim( i, j ) );
  3109. int iFrame = 0;
  3110. byte *panim = animdesc.pAnim( &iFrame );
  3111. if ( !panim )
  3112. {
  3113. pendingload = true;
  3114. }
  3115. }
  3116. }
  3117. // Everything for this sequence is resident?
  3118. return !pendingload;
  3119. }
  3120. //-----------------------------------------------------------------------------
  3121. // Purpose: Drive a flex controller from a component of a bone
  3122. //-----------------------------------------------------------------------------
  3123. void Studio_RunBoneFlexDrivers( float *pflFlexControllerWeights, const CStudioHdr *pStudioHdr, const Vector *pvPositions, const matrix3x4_t *pBoneToWorld, const matrix3x4_t &mRootToWorld )
  3124. {
  3125. bool bRootToWorldInvComputed = false;
  3126. matrix3x4_t mRootToWorldInv;
  3127. matrix3x4_t mParentInv;
  3128. matrix3x4_t mBoneLocal;
  3129. const int nBoneFlexDriverCount = pStudioHdr->BoneFlexDriverCount();
  3130. for ( int i = 0; i < nBoneFlexDriverCount; ++i )
  3131. {
  3132. const mstudioboneflexdriver_t *pBoneFlexDriver = pStudioHdr->BoneFlexDriver( i );
  3133. const mstudiobone_t *pStudioBone = pStudioHdr->pBone( pBoneFlexDriver->m_nBoneIndex );
  3134. const int nControllerCount = pBoneFlexDriver->m_nControlCount;
  3135. if ( pStudioBone->flags & BONE_USED_BY_BONE_MERGE )
  3136. {
  3137. // The local space version of the bone is not available if this is a bonemerged bone
  3138. // so do the slow computation of the local version of the bone from boneToWorld
  3139. if ( pStudioBone->parent < 0 )
  3140. {
  3141. if ( !bRootToWorldInvComputed )
  3142. {
  3143. MatrixInvert( mRootToWorld, mRootToWorldInv );
  3144. bRootToWorldInvComputed = true;
  3145. }
  3146. MatrixMultiply( mRootToWorldInv, pBoneToWorld[ pBoneFlexDriver->m_nBoneIndex ], mBoneLocal );
  3147. }
  3148. else
  3149. {
  3150. MatrixInvert( pBoneToWorld[ pStudioBone->parent ], mParentInv );
  3151. MatrixMultiply( mParentInv, pBoneToWorld[ pBoneFlexDriver->m_nBoneIndex ], mBoneLocal );
  3152. }
  3153. for ( int j = 0; j < nControllerCount; ++j )
  3154. {
  3155. const mstudioboneflexdrivercontrol_t *pController = pBoneFlexDriver->pBoneFlexDriverControl( j );
  3156. const mstudioflexcontroller_t *pFlexController = pStudioHdr->pFlexcontroller( static_cast< LocalFlexController_t >( pController->m_nFlexControllerIndex ) );
  3157. if ( pFlexController->localToGlobal < 0 )
  3158. continue;
  3159. Assert( pController->m_nFlexControllerIndex >= 0 && pController->m_nFlexControllerIndex < pStudioHdr->numflexcontrollers() );
  3160. Assert( pController->m_nBoneComponent >= 0 && pController->m_nBoneComponent <= 2 );
  3161. pflFlexControllerWeights[pFlexController->localToGlobal] =
  3162. RemapValClamped( mBoneLocal[pController->m_nBoneComponent][3], pController->m_flMin, pController->m_flMax, 0.0f, 1.0f );
  3163. }
  3164. }
  3165. else
  3166. {
  3167. // Use the local space version of the bone directly for non-bonemerged bones
  3168. const Vector &position = pvPositions[ pBoneFlexDriver->m_nBoneIndex ];
  3169. for ( int j = 0; j < nControllerCount; ++j )
  3170. {
  3171. const mstudioboneflexdrivercontrol_t *pController = pBoneFlexDriver->pBoneFlexDriverControl( j );
  3172. const mstudioflexcontroller_t *pFlexController = pStudioHdr->pFlexcontroller( static_cast< LocalFlexController_t >( pController->m_nFlexControllerIndex ) );
  3173. if ( pFlexController->localToGlobal < 0 )
  3174. continue;
  3175. Assert( pController->m_nFlexControllerIndex >= 0 && pController->m_nFlexControllerIndex < pStudioHdr->numflexcontrollers() );
  3176. Assert( pController->m_nBoneComponent >= 0 && pController->m_nBoneComponent <= 2 );
  3177. pflFlexControllerWeights[pFlexController->localToGlobal] =
  3178. RemapValClamped( position[pController->m_nBoneComponent], pController->m_flMin, pController->m_flMax, 0.0f, 1.0f );
  3179. }
  3180. }
  3181. }
  3182. }