Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2567 lines
73 KiB

  1. /*++
  2. Copyright (c) Microsoft Corporation. All rights reserved.
  3. Module Name:
  4. xnamathmisc.inl
  5. Abstract:
  6. XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions.
  7. --*/
  8. #if defined(_MSC_VER) && (_MSC_VER > 1000)
  9. #pragma once
  10. #endif
  11. #ifndef __XNAMATHMISC_INL__
  12. #define __XNAMATHMISC_INL__
  13. /****************************************************************************
  14. *
  15. * Quaternion
  16. *
  17. ****************************************************************************/
  18. //------------------------------------------------------------------------------
  19. // Comparison operations
  20. //------------------------------------------------------------------------------
  21. //------------------------------------------------------------------------------
  22. XMFINLINE BOOL XMQuaternionEqual
  23. (
  24. FXMVECTOR Q1,
  25. FXMVECTOR Q2
  26. )
  27. {
  28. return XMVector4Equal(Q1, Q2);
  29. }
  30. //------------------------------------------------------------------------------
  31. XMFINLINE BOOL XMQuaternionNotEqual
  32. (
  33. FXMVECTOR Q1,
  34. FXMVECTOR Q2
  35. )
  36. {
  37. return XMVector4NotEqual(Q1, Q2);
  38. }
  39. //------------------------------------------------------------------------------
  40. XMFINLINE BOOL XMQuaternionIsNaN
  41. (
  42. FXMVECTOR Q
  43. )
  44. {
  45. return XMVector4IsNaN(Q);
  46. }
  47. //------------------------------------------------------------------------------
  48. XMFINLINE BOOL XMQuaternionIsInfinite
  49. (
  50. FXMVECTOR Q
  51. )
  52. {
  53. return XMVector4IsInfinite(Q);
  54. }
  55. //------------------------------------------------------------------------------
  56. XMFINLINE BOOL XMQuaternionIsIdentity
  57. (
  58. FXMVECTOR Q
  59. )
  60. {
  61. #if defined(_XM_NO_INTRINSICS_)
  62. return XMVector4Equal(Q, g_XMIdentityR3.v);
  63. #elif defined(_XM_SSE_INTRINSICS_)
  64. XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
  65. return (_mm_movemask_ps(vTemp)==0x0f) ? true : false;
  66. #else // _XM_VMX128_INTRINSICS_
  67. #endif // _XM_VMX128_INTRINSICS_
  68. }
  69. //------------------------------------------------------------------------------
  70. // Computation operations
  71. //------------------------------------------------------------------------------
  72. //------------------------------------------------------------------------------
  73. XMFINLINE XMVECTOR XMQuaternionDot
  74. (
  75. FXMVECTOR Q1,
  76. FXMVECTOR Q2
  77. )
  78. {
  79. return XMVector4Dot(Q1, Q2);
  80. }
  81. //------------------------------------------------------------------------------
  82. XMFINLINE XMVECTOR XMQuaternionMultiply
  83. (
  84. FXMVECTOR Q1,
  85. FXMVECTOR Q2
  86. )
  87. {
  88. #if defined(_XM_NO_INTRINSICS_)
  89. XMVECTOR NegativeQ1;
  90. XMVECTOR Q2X;
  91. XMVECTOR Q2Y;
  92. XMVECTOR Q2Z;
  93. XMVECTOR Q2W;
  94. XMVECTOR Q1WZYX;
  95. XMVECTOR Q1ZWXY;
  96. XMVECTOR Q1YXWZ;
  97. XMVECTOR Result;
  98. CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
  99. CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
  100. CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
  101. NegativeQ1 = XMVectorNegate(Q1);
  102. Q2W = XMVectorSplatW(Q2);
  103. Q2X = XMVectorSplatX(Q2);
  104. Q2Y = XMVectorSplatY(Q2);
  105. Q2Z = XMVectorSplatZ(Q2);
  106. Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
  107. Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
  108. Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
  109. Result = XMVectorMultiply(Q1, Q2W);
  110. Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
  111. Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
  112. Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
  113. return Result;
  114. #elif defined(_XM_SSE_INTRINSICS_)
  115. static CONST XMVECTORF32 g_ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
  116. static CONST XMVECTORF32 g_ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
  117. static CONST XMVECTORF32 g_ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
  118. // Copy to SSE registers and use as few as possible for x86
  119. XMVECTOR Q2X = Q2;
  120. XMVECTOR Q2Y = Q2;
  121. XMVECTOR Q2Z = Q2;
  122. XMVECTOR vResult = Q2;
  123. // Splat with one instruction
  124. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  125. Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
  126. Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
  127. Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
  128. // Retire Q1 and perform Q1*Q2W
  129. vResult = _mm_mul_ps(vResult,Q1);
  130. XMVECTOR Q1Shuffle = Q1;
  131. // Shuffle the copies of Q1
  132. Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
  133. // Mul by Q1WZYX
  134. Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
  135. Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
  136. // Flip the signs on y and z
  137. Q2X = _mm_mul_ps(Q2X,g_ControlWZYX);
  138. // Mul by Q1ZWXY
  139. Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
  140. Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
  141. // Flip the signs on z and w
  142. Q2Y = _mm_mul_ps(Q2Y,g_ControlZWXY);
  143. // Mul by Q1YXWZ
  144. Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
  145. vResult = _mm_add_ps(vResult,Q2X);
  146. // Flip the signs on x and w
  147. Q2Z = _mm_mul_ps(Q2Z,g_ControlYXWZ);
  148. Q2Y = _mm_add_ps(Q2Y,Q2Z);
  149. vResult = _mm_add_ps(vResult,Q2Y);
  150. return vResult;
  151. #else // _XM_VMX128_INTRINSICS_
  152. #endif // _XM_VMX128_INTRINSICS_
  153. }
  154. //------------------------------------------------------------------------------
  155. XMFINLINE XMVECTOR XMQuaternionLengthSq
  156. (
  157. FXMVECTOR Q
  158. )
  159. {
  160. return XMVector4LengthSq(Q);
  161. }
  162. //------------------------------------------------------------------------------
  163. XMFINLINE XMVECTOR XMQuaternionReciprocalLength
  164. (
  165. FXMVECTOR Q
  166. )
  167. {
  168. return XMVector4ReciprocalLength(Q);
  169. }
  170. //------------------------------------------------------------------------------
  171. XMFINLINE XMVECTOR XMQuaternionLength
  172. (
  173. FXMVECTOR Q
  174. )
  175. {
  176. return XMVector4Length(Q);
  177. }
  178. //------------------------------------------------------------------------------
  179. XMFINLINE XMVECTOR XMQuaternionNormalizeEst
  180. (
  181. FXMVECTOR Q
  182. )
  183. {
  184. return XMVector4NormalizeEst(Q);
  185. }
  186. //------------------------------------------------------------------------------
  187. XMFINLINE XMVECTOR XMQuaternionNormalize
  188. (
  189. FXMVECTOR Q
  190. )
  191. {
  192. return XMVector4Normalize(Q);
  193. }
  194. //------------------------------------------------------------------------------
  195. XMFINLINE XMVECTOR XMQuaternionConjugate
  196. (
  197. FXMVECTOR Q
  198. )
  199. {
  200. #if defined(_XM_NO_INTRINSICS_)
  201. XMVECTOR Result = {
  202. -Q.x,
  203. -Q.y,
  204. -Q.z,
  205. Q.w
  206. };
  207. return Result;
  208. #elif defined(_XM_SSE_INTRINSICS_)
  209. static const XMVECTORF32 g_XMNegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
  210. XMVECTOR Result = _mm_mul_ps(Q,g_XMNegativeOne3);
  211. return Result;
  212. #else // _XM_VMX128_INTRINSICS_
  213. #endif // _XM_VMX128_INTRINSICS_
  214. }
  215. //------------------------------------------------------------------------------
  216. XMFINLINE XMVECTOR XMQuaternionInverse
  217. (
  218. FXMVECTOR Q
  219. )
  220. {
  221. #if defined(_XM_NO_INTRINSICS_)
  222. XMVECTOR Conjugate;
  223. XMVECTOR L;
  224. XMVECTOR Control;
  225. XMVECTOR Result;
  226. CONST XMVECTOR Zero = XMVectorZero();
  227. L = XMVector4LengthSq(Q);
  228. Conjugate = XMQuaternionConjugate(Q);
  229. Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
  230. L = XMVectorReciprocal(L);
  231. Result = XMVectorMultiply(Conjugate, L);
  232. Result = XMVectorSelect(Result, Zero, Control);
  233. return Result;
  234. #elif defined(_XM_SSE_INTRINSICS_)
  235. XMVECTOR Conjugate;
  236. XMVECTOR L;
  237. XMVECTOR Control;
  238. XMVECTOR Result;
  239. XMVECTOR Zero = XMVectorZero();
  240. L = XMVector4LengthSq(Q);
  241. Conjugate = XMQuaternionConjugate(Q);
  242. Control = XMVectorLessOrEqual(L, g_XMEpsilon);
  243. Result = _mm_div_ps(Conjugate,L);
  244. Result = XMVectorSelect(Result, Zero, Control);
  245. return Result;
  246. #else // _XM_VMX128_INTRINSICS_
  247. #endif // _XM_VMX128_INTRINSICS_
  248. }
  249. //------------------------------------------------------------------------------
  250. XMFINLINE XMVECTOR XMQuaternionLn
  251. (
  252. FXMVECTOR Q
  253. )
  254. {
  255. #if defined(_XM_NO_INTRINSICS_)
  256. XMVECTOR Q0;
  257. XMVECTOR QW;
  258. XMVECTOR Theta;
  259. XMVECTOR SinTheta;
  260. XMVECTOR S;
  261. XMVECTOR ControlW;
  262. XMVECTOR Result;
  263. static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  264. QW = XMVectorSplatW(Q);
  265. Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
  266. ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
  267. Theta = XMVectorACos(QW);
  268. SinTheta = XMVectorSin(Theta);
  269. S = XMVectorReciprocal(SinTheta);
  270. S = XMVectorMultiply(Theta, S);
  271. Result = XMVectorMultiply(Q0, S);
  272. Result = XMVectorSelect(Q0, Result, ControlW);
  273. return Result;
  274. #elif defined(_XM_SSE_INTRINSICS_)
  275. static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  276. static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
  277. // Get W only
  278. XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
  279. // W = 0
  280. XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
  281. // Use W if within bounds
  282. XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
  283. XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
  284. ControlW = _mm_and_ps(ControlW,vTemp2);
  285. // Get theta
  286. XMVECTOR vTheta = XMVectorACos(QW);
  287. // Get Sine of theta
  288. vTemp2 = XMVectorSin(vTheta);
  289. // theta/sine of theta
  290. vTheta = _mm_div_ps(vTheta,vTemp2);
  291. // Here's the answer
  292. vTheta = _mm_mul_ps(vTheta,Q0);
  293. // Was W in bounds? If not, return input as is
  294. vTheta = XMVectorSelect(Q0,vTheta,ControlW);
  295. return vTheta;
  296. #else // _XM_VMX128_INTRINSICS_
  297. #endif // _XM_VMX128_INTRINSICS_
  298. }
  299. //------------------------------------------------------------------------------
  300. XMFINLINE XMVECTOR XMQuaternionExp
  301. (
  302. FXMVECTOR Q
  303. )
  304. {
  305. #if defined(_XM_NO_INTRINSICS_)
  306. XMVECTOR Theta;
  307. XMVECTOR SinTheta;
  308. XMVECTOR CosTheta;
  309. XMVECTOR S;
  310. XMVECTOR Control;
  311. XMVECTOR Zero;
  312. XMVECTOR Result;
  313. Theta = XMVector3Length(Q);
  314. XMVectorSinCos(&SinTheta, &CosTheta, Theta);
  315. S = XMVectorReciprocal(Theta);
  316. S = XMVectorMultiply(SinTheta, S);
  317. Result = XMVectorMultiply(Q, S);
  318. Zero = XMVectorZero();
  319. Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
  320. Result = XMVectorSelect(Result, Q, Control);
  321. Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
  322. return Result;
  323. #elif defined(_XM_SSE_INTRINSICS_)
  324. XMVECTOR Theta;
  325. XMVECTOR SinTheta;
  326. XMVECTOR CosTheta;
  327. XMVECTOR S;
  328. XMVECTOR Control;
  329. XMVECTOR Zero;
  330. XMVECTOR Result;
  331. Theta = XMVector3Length(Q);
  332. XMVectorSinCos(&SinTheta, &CosTheta, Theta);
  333. S = _mm_div_ps(SinTheta,Theta);
  334. Result = _mm_mul_ps(Q, S);
  335. Zero = XMVectorZero();
  336. Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
  337. Result = XMVectorSelect(Result,Q,Control);
  338. Result = _mm_and_ps(Result,g_XMMask3);
  339. CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
  340. Result = _mm_or_ps(Result,CosTheta);
  341. return Result;
  342. #else // _XM_VMX128_INTRINSICS_
  343. #endif // _XM_VMX128_INTRINSICS_
  344. }
  345. //------------------------------------------------------------------------------
  346. XMINLINE XMVECTOR XMQuaternionSlerp
  347. (
  348. FXMVECTOR Q0,
  349. FXMVECTOR Q1,
  350. FLOAT t
  351. )
  352. {
  353. XMVECTOR T = XMVectorReplicate(t);
  354. return XMQuaternionSlerpV(Q0, Q1, T);
  355. }
  356. //------------------------------------------------------------------------------
  357. XMINLINE XMVECTOR XMQuaternionSlerpV
  358. (
  359. FXMVECTOR Q0,
  360. FXMVECTOR Q1,
  361. FXMVECTOR T
  362. )
  363. {
  364. #if defined(_XM_NO_INTRINSICS_)
  365. // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
  366. XMVECTOR Omega;
  367. XMVECTOR CosOmega;
  368. XMVECTOR SinOmega;
  369. XMVECTOR InvSinOmega;
  370. XMVECTOR V01;
  371. XMVECTOR C1000;
  372. XMVECTOR SignMask;
  373. XMVECTOR S0;
  374. XMVECTOR S1;
  375. XMVECTOR Sign;
  376. XMVECTOR Control;
  377. XMVECTOR Result;
  378. XMVECTOR Zero;
  379. CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  380. XMASSERT((T.v[1] == T.v[0]) && (T.v[2] == T.v[0]) && (T.v[3] == T.v[0]));
  381. CosOmega = XMQuaternionDot(Q0, Q1);
  382. Zero = XMVectorZero();
  383. Control = XMVectorLess(CosOmega, Zero);
  384. Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
  385. CosOmega = XMVectorMultiply(CosOmega, Sign);
  386. Control = XMVectorLess(CosOmega, OneMinusEpsilon);
  387. SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
  388. SinOmega = XMVectorSqrt(SinOmega);
  389. Omega = XMVectorATan2(SinOmega, CosOmega);
  390. SignMask = XMVectorSplatSignMask();
  391. C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
  392. V01 = XMVectorShiftLeft(T, Zero, 2);
  393. SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
  394. V01 = XMVectorXorInt(V01, SignMask);
  395. V01 = XMVectorAdd(C1000, V01);
  396. InvSinOmega = XMVectorReciprocal(SinOmega);
  397. S0 = XMVectorMultiply(V01, Omega);
  398. S0 = XMVectorSin(S0);
  399. S0 = XMVectorMultiply(S0, InvSinOmega);
  400. S0 = XMVectorSelect(V01, S0, Control);
  401. S1 = XMVectorSplatY(S0);
  402. S0 = XMVectorSplatX(S0);
  403. S1 = XMVectorMultiply(S1, Sign);
  404. Result = XMVectorMultiply(Q0, S0);
  405. Result = XMVectorMultiplyAdd(Q1, S1, Result);
  406. return Result;
  407. #elif defined(_XM_SSE_INTRINSICS_)
  408. // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
  409. XMVECTOR Omega;
  410. XMVECTOR CosOmega;
  411. XMVECTOR SinOmega;
  412. XMVECTOR V01;
  413. XMVECTOR S0;
  414. XMVECTOR S1;
  415. XMVECTOR Sign;
  416. XMVECTOR Control;
  417. XMVECTOR Result;
  418. XMVECTOR Zero;
  419. static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  420. static const XMVECTORI32 g_XMSignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
  421. static const XMVECTORI32 g_XMMaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
  422. XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
  423. CosOmega = XMQuaternionDot(Q0, Q1);
  424. Zero = XMVectorZero();
  425. Control = XMVectorLess(CosOmega, Zero);
  426. Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
  427. CosOmega = _mm_mul_ps(CosOmega, Sign);
  428. Control = XMVectorLess(CosOmega, OneMinusEpsilon);
  429. SinOmega = _mm_mul_ps(CosOmega,CosOmega);
  430. SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
  431. SinOmega = _mm_sqrt_ps(SinOmega);
  432. Omega = XMVectorATan2(SinOmega, CosOmega);
  433. V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
  434. V01 = _mm_and_ps(V01,g_XMMaskXY);
  435. V01 = _mm_xor_ps(V01,g_XMSignMask2);
  436. V01 = _mm_add_ps(g_XMIdentityR0, V01);
  437. S0 = _mm_mul_ps(V01, Omega);
  438. S0 = XMVectorSin(S0);
  439. S0 = _mm_div_ps(S0, SinOmega);
  440. S0 = XMVectorSelect(V01, S0, Control);
  441. S1 = XMVectorSplatY(S0);
  442. S0 = XMVectorSplatX(S0);
  443. S1 = _mm_mul_ps(S1, Sign);
  444. Result = _mm_mul_ps(Q0, S0);
  445. S1 = _mm_mul_ps(S1, Q1);
  446. Result = _mm_add_ps(Result,S1);
  447. return Result;
  448. #else // _XM_VMX128_INTRINSICS_
  449. #endif // _XM_VMX128_INTRINSICS_
  450. }
  451. //------------------------------------------------------------------------------
  452. XMFINLINE XMVECTOR XMQuaternionSquad
  453. (
  454. FXMVECTOR Q0,
  455. FXMVECTOR Q1,
  456. FXMVECTOR Q2,
  457. CXMVECTOR Q3,
  458. FLOAT t
  459. )
  460. {
  461. XMVECTOR T = XMVectorReplicate(t);
  462. return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
  463. }
  464. //------------------------------------------------------------------------------
  465. XMFINLINE XMVECTOR XMQuaternionSquadV
  466. (
  467. FXMVECTOR Q0,
  468. FXMVECTOR Q1,
  469. FXMVECTOR Q2,
  470. CXMVECTOR Q3,
  471. CXMVECTOR T
  472. )
  473. {
  474. XMVECTOR Q03;
  475. XMVECTOR Q12;
  476. XMVECTOR TP;
  477. XMVECTOR Two;
  478. XMVECTOR Result;
  479. XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
  480. TP = T;
  481. Two = XMVectorSplatConstant(2, 0);
  482. Q03 = XMQuaternionSlerpV(Q0, Q3, T);
  483. Q12 = XMQuaternionSlerpV(Q1, Q2, T);
  484. TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
  485. TP = XMVectorMultiply(TP, Two);
  486. Result = XMQuaternionSlerpV(Q03, Q12, TP);
  487. return Result;
  488. }
  489. //------------------------------------------------------------------------------
  490. XMINLINE VOID XMQuaternionSquadSetup
  491. (
  492. XMVECTOR* pA,
  493. XMVECTOR* pB,
  494. XMVECTOR* pC,
  495. FXMVECTOR Q0,
  496. FXMVECTOR Q1,
  497. FXMVECTOR Q2,
  498. CXMVECTOR Q3
  499. )
  500. {
  501. XMVECTOR SQ0, SQ2, SQ3;
  502. XMVECTOR InvQ1, InvQ2;
  503. XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
  504. XMVECTOR ExpQ02, ExpQ13;
  505. XMVECTOR LS01, LS12, LS23;
  506. XMVECTOR LD01, LD12, LD23;
  507. XMVECTOR Control0, Control1, Control2;
  508. XMVECTOR NegativeOneQuarter;
  509. XMASSERT(pA);
  510. XMASSERT(pB);
  511. XMASSERT(pC);
  512. LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
  513. LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
  514. SQ2 = XMVectorNegate(Q2);
  515. Control1 = XMVectorLess(LS12, LD12);
  516. SQ2 = XMVectorSelect(Q2, SQ2, Control1);
  517. LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
  518. LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
  519. SQ0 = XMVectorNegate(Q0);
  520. LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
  521. LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
  522. SQ3 = XMVectorNegate(Q3);
  523. Control0 = XMVectorLess(LS01, LD01);
  524. Control2 = XMVectorLess(LS23, LD23);
  525. SQ0 = XMVectorSelect(Q0, SQ0, Control0);
  526. SQ3 = XMVectorSelect(Q3, SQ3, Control2);
  527. InvQ1 = XMQuaternionInverse(Q1);
  528. InvQ2 = XMQuaternionInverse(SQ2);
  529. LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
  530. LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
  531. LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
  532. LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
  533. NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
  534. ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
  535. ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
  536. ExpQ02 = XMQuaternionExp(ExpQ02);
  537. ExpQ13 = XMQuaternionExp(ExpQ13);
  538. *pA = XMQuaternionMultiply(Q1, ExpQ02);
  539. *pB = XMQuaternionMultiply(SQ2, ExpQ13);
  540. *pC = SQ2;
  541. }
  542. //------------------------------------------------------------------------------
  543. XMFINLINE XMVECTOR XMQuaternionBaryCentric
  544. (
  545. FXMVECTOR Q0,
  546. FXMVECTOR Q1,
  547. FXMVECTOR Q2,
  548. FLOAT f,
  549. FLOAT g
  550. )
  551. {
  552. XMVECTOR Q01;
  553. XMVECTOR Q02;
  554. FLOAT s;
  555. XMVECTOR Result;
  556. s = f + g;
  557. if (s < 0.00001f && s > -0.00001f)
  558. {
  559. Result = Q0;
  560. }
  561. else
  562. {
  563. Q01 = XMQuaternionSlerp(Q0, Q1, s);
  564. Q02 = XMQuaternionSlerp(Q0, Q2, s);
  565. Result = XMQuaternionSlerp(Q01, Q02, g / s);
  566. }
  567. return Result;
  568. }
  569. //------------------------------------------------------------------------------
  570. XMFINLINE XMVECTOR XMQuaternionBaryCentricV
  571. (
  572. FXMVECTOR Q0,
  573. FXMVECTOR Q1,
  574. FXMVECTOR Q2,
  575. CXMVECTOR F,
  576. CXMVECTOR G
  577. )
  578. {
  579. XMVECTOR Q01;
  580. XMVECTOR Q02;
  581. XMVECTOR S, GS;
  582. XMVECTOR Epsilon;
  583. XMVECTOR Result;
  584. XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
  585. XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
  586. Epsilon = XMVectorSplatConstant(1, 16);
  587. S = XMVectorAdd(F, G);
  588. if (XMVector4InBounds(S, Epsilon))
  589. {
  590. Result = Q0;
  591. }
  592. else
  593. {
  594. Q01 = XMQuaternionSlerpV(Q0, Q1, S);
  595. Q02 = XMQuaternionSlerpV(Q0, Q2, S);
  596. GS = XMVectorReciprocal(S);
  597. GS = XMVectorMultiply(G, GS);
  598. Result = XMQuaternionSlerpV(Q01, Q02, GS);
  599. }
  600. return Result;
  601. }
  602. //------------------------------------------------------------------------------
  603. // Transformation operations
  604. //------------------------------------------------------------------------------
  605. //------------------------------------------------------------------------------
  606. XMFINLINE XMVECTOR XMQuaternionIdentity()
  607. {
  608. #if defined(_XM_NO_INTRINSICS_)
  609. return g_XMIdentityR3.v;
  610. #elif defined(_XM_SSE_INTRINSICS_)
  611. return g_XMIdentityR3;
  612. #else // _XM_VMX128_INTRINSICS_
  613. #endif // _XM_VMX128_INTRINSICS_
  614. }
  615. //------------------------------------------------------------------------------
  616. XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
  617. (
  618. FLOAT Pitch,
  619. FLOAT Yaw,
  620. FLOAT Roll
  621. )
  622. {
  623. XMVECTOR Angles;
  624. XMVECTOR Q;
  625. Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
  626. Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
  627. return Q;
  628. }
  629. //------------------------------------------------------------------------------
  630. XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
  631. (
  632. FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
  633. )
  634. {
  635. #if defined(_XM_NO_INTRINSICS_)
  636. XMVECTOR Q, Q0, Q1;
  637. XMVECTOR P0, P1, Y0, Y1, R0, R1;
  638. XMVECTOR HalfAngles;
  639. XMVECTOR SinAngles, CosAngles;
  640. static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
  641. static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
  642. static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
  643. static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f};
  644. HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
  645. XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
  646. P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
  647. Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
  648. R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
  649. P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
  650. Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
  651. R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
  652. Q1 = XMVectorMultiply(P1, Sign);
  653. Q0 = XMVectorMultiply(P0, Y0);
  654. Q1 = XMVectorMultiply(Q1, Y1);
  655. Q0 = XMVectorMultiply(Q0, R0);
  656. Q = XMVectorMultiplyAdd(Q1, R1, Q0);
  657. return Q;
  658. #elif defined(_XM_SSE_INTRINSICS_)
  659. XMVECTOR Q, Q0, Q1;
  660. XMVECTOR P0, P1, Y0, Y1, R0, R1;
  661. XMVECTOR HalfAngles;
  662. XMVECTOR SinAngles, CosAngles;
  663. static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
  664. static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
  665. static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
  666. static CONST XMVECTORF32 g_XMSign = {1.0f, -1.0f, -1.0f, 1.0f};
  667. HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
  668. XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
  669. P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
  670. Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
  671. R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
  672. P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
  673. Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
  674. R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
  675. Q1 = _mm_mul_ps(P1, g_XMSign);
  676. Q0 = _mm_mul_ps(P0, Y0);
  677. Q1 = _mm_mul_ps(Q1, Y1);
  678. Q0 = _mm_mul_ps(Q0, R0);
  679. Q = _mm_mul_ps(Q1, R1);
  680. Q = _mm_add_ps(Q,Q0);
  681. return Q;
  682. #else // _XM_VMX128_INTRINSICS_
  683. #endif // _XM_VMX128_INTRINSICS_
  684. }
  685. //------------------------------------------------------------------------------
  686. XMFINLINE XMVECTOR XMQuaternionRotationNormal
  687. (
  688. FXMVECTOR NormalAxis,
  689. FLOAT Angle
  690. )
  691. {
  692. #if defined(_XM_NO_INTRINSICS_)
  693. XMVECTOR Q;
  694. XMVECTOR N;
  695. XMVECTOR Scale;
  696. N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
  697. XMScalarSinCos(&Scale.v[2], &Scale.v[3], 0.5f * Angle);
  698. Scale.v[0] = Scale.v[1] = Scale.v[2];
  699. Q = XMVectorMultiply(N, Scale);
  700. return Q;
  701. #elif defined(_XM_SSE_INTRINSICS_)
  702. XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
  703. N = _mm_or_ps(N,g_XMIdentityR3);
  704. XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
  705. XMVECTOR vSine;
  706. XMVECTOR vCosine;
  707. XMVectorSinCos(&vSine,&vCosine,Scale);
  708. Scale = _mm_and_ps(vSine,g_XMMask3);
  709. vCosine = _mm_and_ps(vCosine,g_XMMaskW);
  710. Scale = _mm_or_ps(Scale,vCosine);
  711. N = _mm_mul_ps(N,Scale);
  712. return N;
  713. #else // _XM_VMX128_INTRINSICS_
  714. #endif // _XM_VMX128_INTRINSICS_
  715. }
  716. //------------------------------------------------------------------------------
  717. XMFINLINE XMVECTOR XMQuaternionRotationAxis
  718. (
  719. FXMVECTOR Axis,
  720. FLOAT Angle
  721. )
  722. {
  723. #if defined(_XM_NO_INTRINSICS_)
  724. XMVECTOR Normal;
  725. XMVECTOR Q;
  726. XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
  727. XMASSERT(!XMVector3IsInfinite(Axis));
  728. Normal = XMVector3Normalize(Axis);
  729. Q = XMQuaternionRotationNormal(Normal, Angle);
  730. return Q;
  731. #elif defined(_XM_SSE_INTRINSICS_)
  732. XMVECTOR Normal;
  733. XMVECTOR Q;
  734. XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
  735. XMASSERT(!XMVector3IsInfinite(Axis));
  736. Normal = XMVector3Normalize(Axis);
  737. Q = XMQuaternionRotationNormal(Normal, Angle);
  738. return Q;
  739. #else // _XM_VMX128_INTRINSICS_
  740. #endif // _XM_VMX128_INTRINSICS_
  741. }
  742. //------------------------------------------------------------------------------
  743. XMINLINE XMVECTOR XMQuaternionRotationMatrix
  744. (
  745. CXMMATRIX M
  746. )
  747. {
  748. #if defined(_XM_NO_INTRINSICS_)
  749. XMVECTOR Q0, Q1, Q2;
  750. XMVECTOR M00, M11, M22;
  751. XMVECTOR CQ0, CQ1, C;
  752. XMVECTOR CX, CY, CZ, CW;
  753. XMVECTOR SQ1, Scale;
  754. XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select;
  755. XMVECTOR A, B, P;
  756. XMVECTOR PermuteSplat, PermuteSplatT;
  757. XMVECTOR SignB, SignBT;
  758. XMVECTOR PermuteControl, PermuteControlT;
  759. XMVECTOR Zero;
  760. XMVECTOR Result;
  761. static CONST XMVECTOR OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
  762. static CONST XMVECTOR SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
  763. static CONST XMVECTOR SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
  764. static CONST XMVECTOR SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
  765. static CONST XMVECTOR SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
  766. static CONST XMVECTOR SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
  767. static CONST XMVECTOR SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
  768. static CONST XMVECTOR SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f};
  769. static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
  770. static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
  771. static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  772. static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  773. static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
  774. static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
  775. static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
  776. static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
  777. static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  778. static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
  779. static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
  780. static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
  781. static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
  782. M00 = XMVectorSplatX(M.r[0]);
  783. M11 = XMVectorSplatY(M.r[1]);
  784. M22 = XMVectorSplatZ(M.r[2]);
  785. Q0 = XMVectorMultiply(SignPNNP, M00);
  786. Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0);
  787. Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0);
  788. Q1 = XMVectorAdd(Q0, g_XMOne.v);
  789. Rsq = XMVectorReciprocalSqrt(Q1);
  790. Zero = XMVectorZero();
  791. VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity.v);
  792. VEqualsZero = XMVectorEqual(Q1, Zero);
  793. Sqrt = XMVectorMultiply(Q1, Rsq);
  794. Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
  795. Q1 = XMVectorSelect(Q1, Sqrt, Select);
  796. Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
  797. SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
  798. CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
  799. CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W.v);
  800. C = XMVectorGreaterOrEqual(CQ0, CQ1);
  801. CX = XMVectorSplatX(C);
  802. CY = XMVectorSplatY(C);
  803. CZ = XMVectorSplatZ(C);
  804. CW = XMVectorSplatW(C);
  805. PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
  806. SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ);
  807. PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
  808. PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
  809. SignB = XMVectorSelect(SignB, SignNPPP, CX);
  810. PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
  811. PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
  812. SignBT = XMVectorSelect(SignB, SignPNPP, CY);
  813. PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
  814. PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
  815. SignB = XMVectorSelect(SignB, SignBT, CX);
  816. PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
  817. PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
  818. SignB = XMVectorSelect(SignB, SignNNNX, CW);
  819. PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
  820. Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
  821. P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21}
  822. A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03}
  823. B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03}
  824. Q2 = XMVectorMultiplyAdd(SignB, B, A);
  825. Q2 = XMVectorMultiply(Q2, Scale);
  826. Result = XMVectorPermute(Q1, Q2, PermuteControl);
  827. return Result;
  828. #elif defined(_XM_SSE_INTRINSICS_)
  829. XMVECTOR Q0, Q1, Q2;
  830. XMVECTOR M00, M11, M22;
  831. XMVECTOR CQ0, CQ1, C;
  832. XMVECTOR CX, CY, CZ, CW;
  833. XMVECTOR SQ1, Scale;
  834. XMVECTOR Rsq, Sqrt, VEqualsInfinity, VEqualsZero, Select;
  835. XMVECTOR A, B, P;
  836. XMVECTOR PermuteSplat, PermuteSplatT;
  837. XMVECTOR SignB, SignBT;
  838. XMVECTOR PermuteControl, PermuteControlT;
  839. XMVECTOR Zero;
  840. XMVECTOR Result;
  841. static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
  842. static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
  843. static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
  844. static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
  845. static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
  846. static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
  847. static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
  848. static CONST XMVECTORF32 SignNNNX = {-1.0f, -1.0f, -1.0f, 2.0e-126f};
  849. static CONST XMVECTORI32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
  850. static CONST XMVECTORI32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
  851. static CONST XMVECTORI32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  852. static CONST XMVECTORI32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  853. static CONST XMVECTORI32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
  854. static CONST XMVECTORI32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
  855. static CONST XMVECTORI32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
  856. static CONST XMVECTORI32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
  857. static CONST XMVECTORI32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  858. static CONST XMVECTORI32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
  859. static CONST XMVECTORI32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
  860. static CONST XMVECTORI32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
  861. static CONST XMVECTORI32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
  862. M00 = XMVectorSplatX(M.r[0]);
  863. M11 = XMVectorSplatY(M.r[1]);
  864. M22 = XMVectorSplatZ(M.r[2]);
  865. Q0 = XMVectorMultiply(SignPNNP, M00);
  866. Q0 = XMVectorMultiplyAdd(SignNPNP, M11, Q0);
  867. Q0 = XMVectorMultiplyAdd(SignNNPP, M22, Q0);
  868. Q1 = XMVectorAdd(Q0, g_XMOne);
  869. Rsq = XMVectorReciprocalSqrt(Q1);
  870. Zero = XMVectorZero();
  871. VEqualsInfinity = XMVectorEqualInt(Q1, g_XMInfinity);
  872. VEqualsZero = XMVectorEqual(Q1, Zero);
  873. Sqrt = XMVectorMultiply(Q1, Rsq);
  874. Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
  875. Q1 = XMVectorSelect(Q1, Sqrt, Select);
  876. Q1 = XMVectorMultiply(Q1, g_XMOneHalf);
  877. SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf);
  878. CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W);
  879. CQ1 = XMVectorPermute(Q0, SignNNNX, Permute0Y0Z0Z1W);
  880. C = XMVectorGreaterOrEqual(CQ0, CQ1);
  881. CX = XMVectorSplatX(C);
  882. CY = XMVectorSplatY(C);
  883. CZ = XMVectorSplatZ(C);
  884. CW = XMVectorSplatW(C);
  885. PermuteSplat = XMVectorSelect(SplatZ, SplatY, CZ);
  886. SignB = XMVectorSelect(SignNPPP, SignPPNP, CZ);
  887. PermuteControl = XMVectorSelect(Permute2, Permute1, CZ);
  888. PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ, CX);
  889. SignB = XMVectorSelect(SignB, SignNPPP, CX);
  890. PermuteControl = XMVectorSelect(PermuteControl, Permute2, CX);
  891. PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX, CY);
  892. SignBT = XMVectorSelect(SignB, SignPNPP, CY);
  893. PermuteControlT = XMVectorSelect(PermuteControl,Permute0, CY);
  894. PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
  895. SignB = XMVectorSelect(SignB, SignBT, CX);
  896. PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
  897. PermuteSplat = XMVectorSelect(PermuteSplat,SplatW, CW);
  898. SignB = XMVectorSelect(SignB, SignNNNX, CW);
  899. PermuteControl = XMVectorSelect(PermuteControl,Permute3, CW);
  900. Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
  901. P = XMVectorPermute(M.r[1], M.r[2],PermuteC); // {M10, M12, M20, M21}
  902. A = XMVectorPermute(M.r[0], P, PermuteA); // {M01, M12, M20, M03}
  903. B = XMVectorPermute(M.r[0], P, PermuteB); // {M10, M21, M02, M03}
  904. Q2 = XMVectorMultiplyAdd(SignB, B, A);
  905. Q2 = XMVectorMultiply(Q2, Scale);
  906. Result = XMVectorPermute(Q1, Q2, PermuteControl);
  907. return Result;
  908. #else // _XM_VMX128_INTRINSICS_
  909. #endif // _XM_VMX128_INTRINSICS_
  910. }
  911. //------------------------------------------------------------------------------
  912. // Conversion operations
  913. //------------------------------------------------------------------------------
  914. //------------------------------------------------------------------------------
  915. XMFINLINE VOID XMQuaternionToAxisAngle
  916. (
  917. XMVECTOR* pAxis,
  918. FLOAT* pAngle,
  919. FXMVECTOR Q
  920. )
  921. {
  922. XMASSERT(pAxis);
  923. XMASSERT(pAngle);
  924. *pAxis = Q;
  925. #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
  926. *pAngle = 2.0f * acosf(XMVectorGetW(Q));
  927. #else
  928. *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
  929. #endif
  930. }
  931. /****************************************************************************
  932. *
  933. * Plane
  934. *
  935. ****************************************************************************/
  936. //------------------------------------------------------------------------------
  937. // Comparison operations
  938. //------------------------------------------------------------------------------
  939. //------------------------------------------------------------------------------
  940. XMFINLINE BOOL XMPlaneEqual
  941. (
  942. FXMVECTOR P1,
  943. FXMVECTOR P2
  944. )
  945. {
  946. return XMVector4Equal(P1, P2);
  947. }
  948. //------------------------------------------------------------------------------
  949. XMFINLINE BOOL XMPlaneNearEqual
  950. (
  951. FXMVECTOR P1,
  952. FXMVECTOR P2,
  953. FXMVECTOR Epsilon
  954. )
  955. {
  956. XMVECTOR NP1 = XMPlaneNormalize(P1);
  957. XMVECTOR NP2 = XMPlaneNormalize(P2);
  958. return XMVector4NearEqual(NP1, NP2, Epsilon);
  959. }
  960. //------------------------------------------------------------------------------
  961. XMFINLINE BOOL XMPlaneNotEqual
  962. (
  963. FXMVECTOR P1,
  964. FXMVECTOR P2
  965. )
  966. {
  967. return XMVector4NotEqual(P1, P2);
  968. }
  969. //------------------------------------------------------------------------------
  970. XMFINLINE BOOL XMPlaneIsNaN
  971. (
  972. FXMVECTOR P
  973. )
  974. {
  975. return XMVector4IsNaN(P);
  976. }
  977. //------------------------------------------------------------------------------
  978. XMFINLINE BOOL XMPlaneIsInfinite
  979. (
  980. FXMVECTOR P
  981. )
  982. {
  983. return XMVector4IsInfinite(P);
  984. }
  985. //------------------------------------------------------------------------------
  986. // Computation operations
  987. //------------------------------------------------------------------------------
  988. //------------------------------------------------------------------------------
  989. XMFINLINE XMVECTOR XMPlaneDot
  990. (
  991. FXMVECTOR P,
  992. FXMVECTOR V
  993. )
  994. {
  995. #if defined(_XM_NO_INTRINSICS_)
  996. return XMVector4Dot(P, V);
  997. #elif defined(_XM_SSE_INTRINSICS_)
  998. __m128 vTemp2 = V;
  999. __m128 vTemp = _mm_mul_ps(P,vTemp2);
  1000. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  1001. vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
  1002. vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  1003. vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
  1004. return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  1005. #else // _XM_VMX128_INTRINSICS_
  1006. #endif // _XM_VMX128_INTRINSICS_
  1007. }
  1008. //------------------------------------------------------------------------------
  1009. XMFINLINE XMVECTOR XMPlaneDotCoord
  1010. (
  1011. FXMVECTOR P,
  1012. FXMVECTOR V
  1013. )
  1014. {
  1015. #if defined(_XM_NO_INTRINSICS_)
  1016. XMVECTOR V3;
  1017. XMVECTOR Result;
  1018. // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
  1019. V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
  1020. Result = XMVector4Dot(P, V3);
  1021. return Result;
  1022. #elif defined(_XM_SSE_INTRINSICS_)
  1023. XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
  1024. vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
  1025. XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
  1026. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  1027. vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
  1028. vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  1029. vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
  1030. return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  1031. #else // _XM_VMX128_INTRINSICS_
  1032. #endif // _XM_VMX128_INTRINSICS_
  1033. }
  1034. //------------------------------------------------------------------------------
  1035. XMFINLINE XMVECTOR XMPlaneDotNormal
  1036. (
  1037. FXMVECTOR P,
  1038. FXMVECTOR V
  1039. )
  1040. {
  1041. return XMVector3Dot(P, V);
  1042. }
  1043. //------------------------------------------------------------------------------
  1044. // XMPlaneNormalizeEst uses a reciprocal estimate and
  1045. // returns QNaN on zero and infinite vectors.
  1046. XMFINLINE XMVECTOR XMPlaneNormalizeEst
  1047. (
  1048. FXMVECTOR P
  1049. )
  1050. {
  1051. #if defined(_XM_NO_INTRINSICS_)
  1052. XMVECTOR Result;
  1053. Result = XMVector3ReciprocalLength(P);
  1054. Result = XMVectorMultiply(P, Result);
  1055. return Result;
  1056. #elif defined(_XM_SSE_INTRINSICS_)
  1057. // Perform the dot product
  1058. XMVECTOR vDot = _mm_mul_ps(P,P);
  1059. // x=Dot.y, y=Dot.z
  1060. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  1061. // Result.x = x+y
  1062. vDot = _mm_add_ss(vDot,vTemp);
  1063. // x=Dot.z
  1064. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  1065. // Result.x = (x+y)+z
  1066. vDot = _mm_add_ss(vDot,vTemp);
  1067. // Splat x
  1068. vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  1069. // Get the reciprocal
  1070. vDot = _mm_rsqrt_ps(vDot);
  1071. // Get the reciprocal
  1072. vDot = _mm_mul_ps(vDot,P);
  1073. return vDot;
  1074. #else // _XM_VMX128_INTRINSICS_
  1075. #endif // _XM_VMX128_INTRINSICS_
  1076. }
  1077. //------------------------------------------------------------------------------
  1078. XMFINLINE XMVECTOR XMPlaneNormalize
  1079. (
  1080. FXMVECTOR P
  1081. )
  1082. {
  1083. #if defined(_XM_NO_INTRINSICS_)
  1084. FLOAT fLengthSq = sqrtf((P.x*P.x)+(P.y*P.y)+(P.z*P.z));
  1085. // Prevent divide by zero
  1086. if (fLengthSq) {
  1087. fLengthSq = 1.0f/fLengthSq;
  1088. }
  1089. {
  1090. XMVECTOR vResult = {
  1091. P.x*fLengthSq,
  1092. P.y*fLengthSq,
  1093. P.z*fLengthSq,
  1094. P.w*fLengthSq
  1095. };
  1096. return vResult;
  1097. }
  1098. #elif defined(_XM_SSE_INTRINSICS_)
  1099. // Perform the dot product on x,y and z only
  1100. XMVECTOR vLengthSq = _mm_mul_ps(P,P);
  1101. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
  1102. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  1103. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  1104. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  1105. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  1106. // Prepare for the division
  1107. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  1108. // Failsafe on zero (Or epsilon) length planes
  1109. // If the length is infinity, set the elements to zero
  1110. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  1111. // Reciprocal mul to perform the normalization
  1112. vResult = _mm_div_ps(P,vResult);
  1113. // Any that are infinity, set to zero
  1114. vResult = _mm_and_ps(vResult,vLengthSq);
  1115. return vResult;
  1116. #else // _XM_VMX128_INTRINSICS_
  1117. #endif // _XM_VMX128_INTRINSICS_
  1118. }
  1119. //------------------------------------------------------------------------------
  1120. XMFINLINE XMVECTOR XMPlaneIntersectLine
  1121. (
  1122. FXMVECTOR P,
  1123. FXMVECTOR LinePoint1,
  1124. FXMVECTOR LinePoint2
  1125. )
  1126. {
  1127. #if defined(_XM_NO_INTRINSICS_)
  1128. XMVECTOR V1;
  1129. XMVECTOR V2;
  1130. XMVECTOR D;
  1131. XMVECTOR ReciprocalD;
  1132. XMVECTOR VT;
  1133. XMVECTOR Point;
  1134. XMVECTOR Zero;
  1135. XMVECTOR Control;
  1136. XMVECTOR Result;
  1137. V1 = XMVector3Dot(P, LinePoint1);
  1138. V2 = XMVector3Dot(P, LinePoint2);
  1139. D = XMVectorSubtract(V1, V2);
  1140. ReciprocalD = XMVectorReciprocal(D);
  1141. VT = XMPlaneDotCoord(P, LinePoint1);
  1142. VT = XMVectorMultiply(VT, ReciprocalD);
  1143. Point = XMVectorSubtract(LinePoint2, LinePoint1);
  1144. Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
  1145. Zero = XMVectorZero();
  1146. Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
  1147. Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
  1148. return Result;
  1149. #elif defined(_XM_SSE_INTRINSICS_)
  1150. XMVECTOR V1;
  1151. XMVECTOR V2;
  1152. XMVECTOR D;
  1153. XMVECTOR VT;
  1154. XMVECTOR Point;
  1155. XMVECTOR Zero;
  1156. XMVECTOR Control;
  1157. XMVECTOR Result;
  1158. V1 = XMVector3Dot(P, LinePoint1);
  1159. V2 = XMVector3Dot(P, LinePoint2);
  1160. D = _mm_sub_ps(V1, V2);
  1161. VT = XMPlaneDotCoord(P, LinePoint1);
  1162. VT = _mm_div_ps(VT, D);
  1163. Point = _mm_sub_ps(LinePoint2, LinePoint1);
  1164. Point = _mm_mul_ps(Point,VT);
  1165. Point = _mm_add_ps(Point,LinePoint1);
  1166. Zero = XMVectorZero();
  1167. Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
  1168. Result = XMVectorSelect(Point, g_XMQNaN, Control);
  1169. return Result;
  1170. #else // _XM_VMX128_INTRINSICS_
  1171. #endif // _XM_VMX128_INTRINSICS_
  1172. }
  1173. //------------------------------------------------------------------------------
  1174. XMINLINE VOID XMPlaneIntersectPlane
  1175. (
  1176. XMVECTOR* pLinePoint1,
  1177. XMVECTOR* pLinePoint2,
  1178. FXMVECTOR P1,
  1179. FXMVECTOR P2
  1180. )
  1181. {
  1182. #if defined(_XM_NO_INTRINSICS_)
  1183. XMVECTOR V1;
  1184. XMVECTOR V2;
  1185. XMVECTOR V3;
  1186. XMVECTOR LengthSq;
  1187. XMVECTOR RcpLengthSq;
  1188. XMVECTOR Point;
  1189. XMVECTOR P1W;
  1190. XMVECTOR P2W;
  1191. XMVECTOR Control;
  1192. XMVECTOR LinePoint1;
  1193. XMVECTOR LinePoint2;
  1194. XMASSERT(pLinePoint1);
  1195. XMASSERT(pLinePoint2);
  1196. V1 = XMVector3Cross(P2, P1);
  1197. LengthSq = XMVector3LengthSq(V1);
  1198. V2 = XMVector3Cross(P2, V1);
  1199. P1W = XMVectorSplatW(P1);
  1200. Point = XMVectorMultiply(V2, P1W);
  1201. V3 = XMVector3Cross(V1, P1);
  1202. P2W = XMVectorSplatW(P2);
  1203. Point = XMVectorMultiplyAdd(V3, P2W, Point);
  1204. RcpLengthSq = XMVectorReciprocal(LengthSq);
  1205. LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
  1206. LinePoint2 = XMVectorAdd(LinePoint1, V1);
  1207. Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
  1208. *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
  1209. *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
  1210. #elif defined(_XM_SSE_INTRINSICS_)
  1211. XMASSERT(pLinePoint1);
  1212. XMASSERT(pLinePoint2);
  1213. XMVECTOR V1;
  1214. XMVECTOR V2;
  1215. XMVECTOR V3;
  1216. XMVECTOR LengthSq;
  1217. XMVECTOR Point;
  1218. XMVECTOR P1W;
  1219. XMVECTOR P2W;
  1220. XMVECTOR Control;
  1221. XMVECTOR LinePoint1;
  1222. XMVECTOR LinePoint2;
  1223. V1 = XMVector3Cross(P2, P1);
  1224. LengthSq = XMVector3LengthSq(V1);
  1225. V2 = XMVector3Cross(P2, V1);
  1226. P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
  1227. Point = _mm_mul_ps(V2, P1W);
  1228. V3 = XMVector3Cross(V1, P1);
  1229. P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
  1230. V3 = _mm_mul_ps(V3,P2W);
  1231. Point = _mm_add_ps(Point,V3);
  1232. LinePoint1 = _mm_div_ps(Point,LengthSq);
  1233. LinePoint2 = _mm_add_ps(LinePoint1, V1);
  1234. Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
  1235. *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
  1236. *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
  1237. #else // _XM_VMX128_INTRINSICS_
  1238. #endif // _XM_VMX128_INTRINSICS_
  1239. }
  1240. //------------------------------------------------------------------------------
  1241. XMFINLINE XMVECTOR XMPlaneTransform
  1242. (
  1243. FXMVECTOR P,
  1244. CXMMATRIX M
  1245. )
  1246. {
  1247. #if defined(_XM_NO_INTRINSICS_)
  1248. XMVECTOR X;
  1249. XMVECTOR Y;
  1250. XMVECTOR Z;
  1251. XMVECTOR W;
  1252. XMVECTOR Result;
  1253. W = XMVectorSplatW(P);
  1254. Z = XMVectorSplatZ(P);
  1255. Y = XMVectorSplatY(P);
  1256. X = XMVectorSplatX(P);
  1257. Result = XMVectorMultiply(W, M.r[3]);
  1258. Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
  1259. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  1260. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  1261. return Result;
  1262. #elif defined(_XM_SSE_INTRINSICS_)
  1263. XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
  1264. XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
  1265. XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
  1266. XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
  1267. X = _mm_mul_ps(X, M.r[0]);
  1268. Y = _mm_mul_ps(Y, M.r[1]);
  1269. Z = _mm_mul_ps(Z, M.r[2]);
  1270. W = _mm_mul_ps(W, M.r[3]);
  1271. X = _mm_add_ps(X,Z);
  1272. Y = _mm_add_ps(Y,W);
  1273. X = _mm_add_ps(X,Y);
  1274. return X;
  1275. #else // _XM_VMX128_INTRINSICS_
  1276. #endif // _XM_VMX128_INTRINSICS_
  1277. }
  1278. //------------------------------------------------------------------------------
  1279. XMFINLINE XMFLOAT4* XMPlaneTransformStream
  1280. (
  1281. XMFLOAT4* pOutputStream,
  1282. UINT OutputStride,
  1283. CONST XMFLOAT4* pInputStream,
  1284. UINT InputStride,
  1285. UINT PlaneCount,
  1286. CXMMATRIX M
  1287. )
  1288. {
  1289. return XMVector4TransformStream(pOutputStream,
  1290. OutputStride,
  1291. pInputStream,
  1292. InputStride,
  1293. PlaneCount,
  1294. M);
  1295. }
  1296. //------------------------------------------------------------------------------
  1297. // Conversion operations
  1298. //------------------------------------------------------------------------------
  1299. //------------------------------------------------------------------------------
  1300. XMFINLINE XMVECTOR XMPlaneFromPointNormal
  1301. (
  1302. FXMVECTOR Point,
  1303. FXMVECTOR Normal
  1304. )
  1305. {
  1306. #if defined(_XM_NO_INTRINSICS_)
  1307. XMVECTOR W;
  1308. XMVECTOR Result;
  1309. W = XMVector3Dot(Point, Normal);
  1310. W = XMVectorNegate(W);
  1311. Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
  1312. return Result;
  1313. #elif defined(_XM_SSE_INTRINSICS_)
  1314. XMVECTOR W;
  1315. XMVECTOR Result;
  1316. W = XMVector3Dot(Point,Normal);
  1317. W = _mm_mul_ps(W,g_XMNegativeOne);
  1318. Result = _mm_and_ps(Normal,g_XMMask3);
  1319. W = _mm_and_ps(W,g_XMMaskW);
  1320. Result = _mm_or_ps(Result,W);
  1321. return Result;
  1322. #else // _XM_VMX128_INTRINSICS_
  1323. #endif // _XM_VMX128_INTRINSICS_
  1324. }
  1325. //------------------------------------------------------------------------------
  1326. XMFINLINE XMVECTOR XMPlaneFromPoints
  1327. (
  1328. FXMVECTOR Point1,
  1329. FXMVECTOR Point2,
  1330. FXMVECTOR Point3
  1331. )
  1332. {
  1333. #if defined(_XM_NO_INTRINSICS_)
  1334. XMVECTOR N;
  1335. XMVECTOR D;
  1336. XMVECTOR V21;
  1337. XMVECTOR V31;
  1338. XMVECTOR Result;
  1339. V21 = XMVectorSubtract(Point1, Point2);
  1340. V31 = XMVectorSubtract(Point1, Point3);
  1341. N = XMVector3Cross(V21, V31);
  1342. N = XMVector3Normalize(N);
  1343. D = XMPlaneDotNormal(N, Point1);
  1344. D = XMVectorNegate(D);
  1345. Result = XMVectorSelect(D, N, g_XMSelect1110.v);
  1346. return Result;
  1347. #elif defined(_XM_SSE_INTRINSICS_)
  1348. XMVECTOR N;
  1349. XMVECTOR D;
  1350. XMVECTOR V21;
  1351. XMVECTOR V31;
  1352. XMVECTOR Result;
  1353. V21 = _mm_sub_ps(Point1, Point2);
  1354. V31 = _mm_sub_ps(Point1, Point3);
  1355. N = XMVector3Cross(V21, V31);
  1356. N = XMVector3Normalize(N);
  1357. D = XMPlaneDotNormal(N, Point1);
  1358. D = _mm_mul_ps(D,g_XMNegativeOne);
  1359. N = _mm_and_ps(N,g_XMMask3);
  1360. D = _mm_and_ps(D,g_XMMaskW);
  1361. Result = _mm_or_ps(D,N);
  1362. return Result;
  1363. #else // _XM_VMX128_INTRINSICS_
  1364. #endif // _XM_VMX128_INTRINSICS_
  1365. }
  1366. /****************************************************************************
  1367. *
  1368. * Color
  1369. *
  1370. ****************************************************************************/
  1371. //------------------------------------------------------------------------------
  1372. // Comparison operations
  1373. //------------------------------------------------------------------------------
  1374. //------------------------------------------------------------------------------
  1375. XMFINLINE BOOL XMColorEqual
  1376. (
  1377. FXMVECTOR C1,
  1378. FXMVECTOR C2
  1379. )
  1380. {
  1381. return XMVector4Equal(C1, C2);
  1382. }
  1383. //------------------------------------------------------------------------------
  1384. XMFINLINE BOOL XMColorNotEqual
  1385. (
  1386. FXMVECTOR C1,
  1387. FXMVECTOR C2
  1388. )
  1389. {
  1390. return XMVector4NotEqual(C1, C2);
  1391. }
  1392. //------------------------------------------------------------------------------
  1393. XMFINLINE BOOL XMColorGreater
  1394. (
  1395. FXMVECTOR C1,
  1396. FXMVECTOR C2
  1397. )
  1398. {
  1399. return XMVector4Greater(C1, C2);
  1400. }
  1401. //------------------------------------------------------------------------------
  1402. XMFINLINE BOOL XMColorGreaterOrEqual
  1403. (
  1404. FXMVECTOR C1,
  1405. FXMVECTOR C2
  1406. )
  1407. {
  1408. return XMVector4GreaterOrEqual(C1, C2);
  1409. }
  1410. //------------------------------------------------------------------------------
  1411. XMFINLINE BOOL XMColorLess
  1412. (
  1413. FXMVECTOR C1,
  1414. FXMVECTOR C2
  1415. )
  1416. {
  1417. return XMVector4Less(C1, C2);
  1418. }
  1419. //------------------------------------------------------------------------------
  1420. XMFINLINE BOOL XMColorLessOrEqual
  1421. (
  1422. FXMVECTOR C1,
  1423. FXMVECTOR C2
  1424. )
  1425. {
  1426. return XMVector4LessOrEqual(C1, C2);
  1427. }
  1428. //------------------------------------------------------------------------------
  1429. XMFINLINE BOOL XMColorIsNaN
  1430. (
  1431. FXMVECTOR C
  1432. )
  1433. {
  1434. return XMVector4IsNaN(C);
  1435. }
  1436. //------------------------------------------------------------------------------
  1437. XMFINLINE BOOL XMColorIsInfinite
  1438. (
  1439. FXMVECTOR C
  1440. )
  1441. {
  1442. return XMVector4IsInfinite(C);
  1443. }
  1444. //------------------------------------------------------------------------------
  1445. // Computation operations
  1446. //------------------------------------------------------------------------------
  1447. //------------------------------------------------------------------------------
  1448. XMFINLINE XMVECTOR XMColorNegative
  1449. (
  1450. FXMVECTOR vColor
  1451. )
  1452. {
  1453. #if defined(_XM_NO_INTRINSICS_)
  1454. // XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
  1455. // XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
  1456. XMVECTOR vResult = {
  1457. 1.0f - vColor.x,
  1458. 1.0f - vColor.y,
  1459. 1.0f - vColor.z,
  1460. vColor.w
  1461. };
  1462. return vResult;
  1463. #elif defined(_XM_SSE_INTRINSICS_)
  1464. // Negate only x,y and z.
  1465. XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
  1466. // Add 1,1,1,0 to -x,-y,-z,w
  1467. return _mm_add_ps(vTemp,g_XMOne3);
  1468. #else // _XM_VMX128_INTRINSICS_
  1469. #endif // _XM_VMX128_INTRINSICS_
  1470. }
  1471. //------------------------------------------------------------------------------
  1472. XMFINLINE XMVECTOR XMColorModulate
  1473. (
  1474. FXMVECTOR C1,
  1475. FXMVECTOR C2
  1476. )
  1477. {
  1478. return XMVectorMultiply(C1, C2);
  1479. }
  1480. //------------------------------------------------------------------------------
  1481. XMFINLINE XMVECTOR XMColorAdjustSaturation
  1482. (
  1483. FXMVECTOR vColor,
  1484. FLOAT fSaturation
  1485. )
  1486. {
  1487. #if defined(_XM_NO_INTRINSICS_)
  1488. CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
  1489. // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
  1490. // Result = (C - Luminance) * Saturation + Luminance;
  1491. FLOAT fLuminance = (vColor.x*gvLuminance.x)+(vColor.y*gvLuminance.y)+(vColor.z*gvLuminance.z);
  1492. XMVECTOR vResult = {
  1493. ((vColor.x - fLuminance)*fSaturation)+fLuminance,
  1494. ((vColor.y - fLuminance)*fSaturation)+fLuminance,
  1495. ((vColor.z - fLuminance)*fSaturation)+fLuminance,
  1496. vColor.w};
  1497. return vResult;
  1498. #elif defined(_XM_SSE_INTRINSICS_)
  1499. static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
  1500. // Mul RGB by intensity constants
  1501. XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
  1502. // vResult.x = vLuminance.y, vResult.y = vLuminance.y,
  1503. // vResult.z = vLuminance.z, vResult.w = vLuminance.z
  1504. XMVECTOR vResult = vLuminance;
  1505. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
  1506. // vLuminance.x += vLuminance.y
  1507. vLuminance = _mm_add_ss(vLuminance,vResult);
  1508. // Splat vLuminance.z
  1509. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
  1510. // vLuminance.x += vLuminance.z (Dot product)
  1511. vLuminance = _mm_add_ss(vLuminance,vResult);
  1512. // Splat vLuminance
  1513. vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
  1514. // Splat fSaturation
  1515. XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
  1516. // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
  1517. vResult = _mm_sub_ps(vColor,vLuminance);
  1518. vResult = _mm_mul_ps(vResult,vSaturation);
  1519. vResult = _mm_add_ps(vResult,vLuminance);
  1520. // Retain w from the source color
  1521. vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
  1522. vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
  1523. return vResult;
  1524. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  1525. #endif // _XM_VMX128_INTRINSICS_
  1526. }
  1527. //------------------------------------------------------------------------------
  1528. XMFINLINE XMVECTOR XMColorAdjustContrast
  1529. (
  1530. FXMVECTOR vColor,
  1531. FLOAT fContrast
  1532. )
  1533. {
  1534. #if defined(_XM_NO_INTRINSICS_)
  1535. // Result = (vColor - 0.5f) * fContrast + 0.5f;
  1536. XMVECTOR vResult = {
  1537. ((vColor.x-0.5f) * fContrast) + 0.5f,
  1538. ((vColor.y-0.5f) * fContrast) + 0.5f,
  1539. ((vColor.z-0.5f) * fContrast) + 0.5f,
  1540. vColor.w // Leave W untouched
  1541. };
  1542. return vResult;
  1543. #elif defined(_XM_SSE_INTRINSICS_)
  1544. XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
  1545. XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
  1546. vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
  1547. vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
  1548. // Retain w from the source color
  1549. vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
  1550. vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
  1551. return vResult;
  1552. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  1553. #endif // _XM_VMX128_INTRINSICS_
  1554. }
  1555. /****************************************************************************
  1556. *
  1557. * Miscellaneous
  1558. *
  1559. ****************************************************************************/
  1560. //------------------------------------------------------------------------------
  1561. XMINLINE BOOL XMVerifyCPUSupport()
  1562. {
  1563. #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
  1564. return TRUE;
  1565. #else // _XM_SSE_INTRINSICS_
  1566. // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
  1567. // Detecting SSE2 on older versions of Windows would require using cpuid directly
  1568. return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
  1569. #endif
  1570. }
  1571. //------------------------------------------------------------------------------
  1572. #define XMASSERT_LINE_STRING_SIZE 16
  1573. XMINLINE VOID XMAssert
  1574. (
  1575. CONST CHAR* pExpression,
  1576. CONST CHAR* pFileName,
  1577. UINT LineNumber
  1578. )
  1579. {
  1580. CHAR aLineString[XMASSERT_LINE_STRING_SIZE];
  1581. CHAR* pLineString;
  1582. UINT Line;
  1583. aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
  1584. aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
  1585. for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
  1586. Line != 0 && pLineString >= aLineString;
  1587. Line /= 10, pLineString--)
  1588. {
  1589. *pLineString = (CHAR)('0' + (Line % 10));
  1590. }
  1591. #ifndef NO_OUTPUT_DEBUG_STRING
  1592. OutputDebugStringA("Assertion failed: ");
  1593. OutputDebugStringA(pExpression);
  1594. OutputDebugStringA(", file ");
  1595. OutputDebugStringA(pFileName);
  1596. OutputDebugStringA(", line ");
  1597. OutputDebugStringA(pLineString + 1);
  1598. OutputDebugStringA("\r\n");
  1599. #else
  1600. DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
  1601. #endif
  1602. __debugbreak();
  1603. }
  1604. //------------------------------------------------------------------------------
  1605. XMFINLINE XMVECTOR XMFresnelTerm
  1606. (
  1607. FXMVECTOR CosIncidentAngle,
  1608. FXMVECTOR RefractionIndex
  1609. )
  1610. {
  1611. #if defined(_XM_NO_INTRINSICS_)
  1612. XMVECTOR G;
  1613. XMVECTOR D, S;
  1614. XMVECTOR V0, V1, V2, V3;
  1615. XMVECTOR Result;
  1616. // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
  1617. // c = CosIncidentAngle
  1618. // g = sqrt(c^2 + RefractionIndex^2 - 1)
  1619. XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
  1620. G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
  1621. G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
  1622. G = XMVectorAbs(G);
  1623. G = XMVectorSqrt(G);
  1624. S = XMVectorAdd(G, CosIncidentAngle);
  1625. D = XMVectorSubtract(G, CosIncidentAngle);
  1626. V0 = XMVectorMultiply(D, D);
  1627. V1 = XMVectorMultiply(S, S);
  1628. V1 = XMVectorReciprocal(V1);
  1629. V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
  1630. V0 = XMVectorMultiply(V0, V1);
  1631. V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
  1632. V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
  1633. V2 = XMVectorMultiply(V2, V2);
  1634. V3 = XMVectorMultiply(V3, V3);
  1635. V3 = XMVectorReciprocal(V3);
  1636. V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
  1637. Result = XMVectorMultiply(V0, V2);
  1638. Result = XMVectorSaturate(Result);
  1639. return Result;
  1640. #elif defined(_XM_SSE_INTRINSICS_)
  1641. // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
  1642. // c = CosIncidentAngle
  1643. // g = sqrt(c^2 + RefractionIndex^2 - 1)
  1644. XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
  1645. // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
  1646. XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
  1647. XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
  1648. G = _mm_sub_ps(G,g_XMOne);
  1649. vTemp = _mm_add_ps(vTemp,G);
  1650. // max((0-vTemp),vTemp) == abs(vTemp)
  1651. // The abs is needed to deal with refraction and cosine being zero
  1652. G = _mm_setzero_ps();
  1653. G = _mm_sub_ps(G,vTemp);
  1654. G = _mm_max_ps(G,vTemp);
  1655. // Last operation, the sqrt()
  1656. G = _mm_sqrt_ps(G);
  1657. // Calc G-C and G+C
  1658. XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
  1659. XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
  1660. // Perform the term (0.5f *(g - c)^2) / (g + c)^2
  1661. XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
  1662. vTemp = _mm_mul_ps(GAddC,GAddC);
  1663. vResult = _mm_mul_ps(vResult,g_XMOneHalf);
  1664. vResult = _mm_div_ps(vResult,vTemp);
  1665. // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
  1666. GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
  1667. GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
  1668. GAddC = _mm_sub_ps(GAddC,g_XMOne);
  1669. GSubC = _mm_add_ps(GSubC,g_XMOne);
  1670. GAddC = _mm_mul_ps(GAddC,GAddC);
  1671. GSubC = _mm_mul_ps(GSubC,GSubC);
  1672. GAddC = _mm_div_ps(GAddC,GSubC);
  1673. GAddC = _mm_add_ps(GAddC,g_XMOne);
  1674. // Multiply the two term parts
  1675. vResult = _mm_mul_ps(vResult,GAddC);
  1676. // Clamp to 0.0 - 1.0f
  1677. vResult = _mm_max_ps(vResult,g_XMZero);
  1678. vResult = _mm_min_ps(vResult,g_XMOne);
  1679. return vResult;
  1680. #else // _XM_VMX128_INTRINSICS_
  1681. #endif // _XM_VMX128_INTRINSICS_
  1682. }
  1683. //------------------------------------------------------------------------------
  1684. XMFINLINE BOOL XMScalarNearEqual
  1685. (
  1686. FLOAT S1,
  1687. FLOAT S2,
  1688. FLOAT Epsilon
  1689. )
  1690. {
  1691. FLOAT Delta = S1 - S2;
  1692. #if defined(_XM_NO_INTRINSICS_)
  1693. UINT AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF;
  1694. return (*(FLOAT*)&AbsDelta <= Epsilon);
  1695. #elif defined(_XM_SSE_INTRINSICS_)
  1696. return (fabsf(Delta) <= Epsilon);
  1697. #else
  1698. return (__fabs(Delta) <= Epsilon);
  1699. #endif
  1700. }
  1701. //------------------------------------------------------------------------------
  1702. // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
  1703. XMFINLINE FLOAT XMScalarModAngle
  1704. (
  1705. FLOAT Angle
  1706. )
  1707. {
  1708. // Note: The modulo is performed with unsigned math only to work
  1709. // around a precision error on numbers that are close to PI
  1710. float fTemp;
  1711. #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
  1712. // Normalize the range from 0.0f to XM_2PI
  1713. Angle = Angle + XM_PI;
  1714. // Perform the modulo, unsigned
  1715. fTemp = fabsf(Angle);
  1716. fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
  1717. // Restore the number to the range of -XM_PI to XM_PI-epsilon
  1718. fTemp = fTemp - XM_PI;
  1719. // If the modulo'd value was negative, restore negation
  1720. if (Angle<0.0f) {
  1721. fTemp = -fTemp;
  1722. }
  1723. return fTemp;
  1724. #else
  1725. #endif
  1726. }
  1727. //------------------------------------------------------------------------------
  1728. XMINLINE FLOAT XMScalarSin
  1729. (
  1730. FLOAT Value
  1731. )
  1732. {
  1733. #if defined(_XM_NO_INTRINSICS_)
  1734. FLOAT ValueMod;
  1735. FLOAT ValueSq;
  1736. XMVECTOR V0123, V0246, V1357, V9111315, V17192123;
  1737. XMVECTOR V1, V7, V8;
  1738. XMVECTOR R0, R1, R2;
  1739. ValueMod = XMScalarModAngle(Value);
  1740. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
  1741. // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  1742. ValueSq = ValueMod * ValueMod;
  1743. V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
  1744. V1 = XMVectorSplatY(V0123);
  1745. V0246 = XMVectorMultiply(V0123, V0123);
  1746. V1357 = XMVectorMultiply(V0246, V1);
  1747. V7 = XMVectorSplatW(V1357);
  1748. V8 = XMVectorMultiply(V7, V1);
  1749. V9111315 = XMVectorMultiply(V1357, V8);
  1750. V17192123 = XMVectorMultiply(V9111315, V8);
  1751. R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
  1752. R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
  1753. R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
  1754. return R0.v[0] + R1.v[0] + R2.v[0];
  1755. #elif defined(_XM_SSE_INTRINSICS_)
  1756. return sinf( Value );
  1757. #else // _XM_VMX128_INTRINSICS_
  1758. #endif // _XM_VMX128_INTRINSICS_
  1759. }
  1760. //------------------------------------------------------------------------------
  1761. XMINLINE FLOAT XMScalarCos
  1762. (
  1763. FLOAT Value
  1764. )
  1765. {
  1766. #if defined(_XM_NO_INTRINSICS_)
  1767. FLOAT ValueMod;
  1768. FLOAT ValueSq;
  1769. XMVECTOR V0123, V0246, V8101214, V16182022;
  1770. XMVECTOR V2, V6, V8;
  1771. XMVECTOR R0, R1, R2;
  1772. ValueMod = XMScalarModAngle(Value);
  1773. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
  1774. // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  1775. ValueSq = ValueMod * ValueMod;
  1776. V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
  1777. V0246 = XMVectorMultiply(V0123, V0123);
  1778. V2 = XMVectorSplatZ(V0123);
  1779. V6 = XMVectorSplatW(V0246);
  1780. V8 = XMVectorMultiply(V6, V2);
  1781. V8101214 = XMVectorMultiply(V0246, V8);
  1782. V16182022 = XMVectorMultiply(V8101214, V8);
  1783. R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
  1784. R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
  1785. R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
  1786. return R0.v[0] + R1.v[0] + R2.v[0];
  1787. #elif defined(_XM_SSE_INTRINSICS_)
  1788. return cosf(Value);
  1789. #else // _XM_VMX128_INTRINSICS_
  1790. #endif // _XM_VMX128_INTRINSICS_
  1791. }
  1792. //------------------------------------------------------------------------------
  1793. XMINLINE VOID XMScalarSinCos
  1794. (
  1795. FLOAT* pSin,
  1796. FLOAT* pCos,
  1797. FLOAT Value
  1798. )
  1799. {
  1800. #if defined(_XM_NO_INTRINSICS_)
  1801. FLOAT ValueMod;
  1802. FLOAT ValueSq;
  1803. XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
  1804. XMVECTOR V1, V2, V6, V8;
  1805. XMVECTOR S0, S1, S2, C0, C1, C2;
  1806. XMASSERT(pSin);
  1807. XMASSERT(pCos);
  1808. ValueMod = XMScalarModAngle(Value);
  1809. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
  1810. // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  1811. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
  1812. // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  1813. ValueSq = ValueMod * ValueMod;
  1814. V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
  1815. V1 = XMVectorSplatY(V0123);
  1816. V2 = XMVectorSplatZ(V0123);
  1817. V0246 = XMVectorMultiply(V0123, V0123);
  1818. V1357 = XMVectorMultiply(V0246, V1);
  1819. V6 = XMVectorSplatW(V0246);
  1820. V8 = XMVectorMultiply(V6, V2);
  1821. V8101214 = XMVectorMultiply(V0246, V8);
  1822. V9111315 = XMVectorMultiply(V1357, V8);
  1823. V16182022 = XMVectorMultiply(V8101214, V8);
  1824. V17192123 = XMVectorMultiply(V9111315, V8);
  1825. C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
  1826. S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
  1827. C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
  1828. S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
  1829. C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
  1830. S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
  1831. *pCos = C0.v[0] + C1.v[0] + C2.v[0];
  1832. *pSin = S0.v[0] + S1.v[0] + S2.v[0];
  1833. #elif defined(_XM_SSE_INTRINSICS_)
  1834. XMASSERT(pSin);
  1835. XMASSERT(pCos);
  1836. *pSin = sinf(Value);
  1837. *pCos = cosf(Value);
  1838. #else // _XM_VMX128_INTRINSICS_
  1839. #endif // _XM_VMX128_INTRINSICS_
  1840. }
  1841. //------------------------------------------------------------------------------
  1842. XMINLINE FLOAT XMScalarASin
  1843. (
  1844. FLOAT Value
  1845. )
  1846. {
  1847. #if defined(_XM_NO_INTRINSICS_)
  1848. FLOAT AbsValue, Value2, Value3, D;
  1849. XMVECTOR AbsV, R0, R1, Result;
  1850. XMVECTOR V3;
  1851. *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF;
  1852. Value2 = Value * AbsValue;
  1853. Value3 = Value * Value2;
  1854. D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
  1855. AbsV = XMVectorReplicate(AbsValue);
  1856. V3.v[0] = Value3;
  1857. V3.v[1] = 1.0f;
  1858. V3.v[2] = Value3;
  1859. V3.v[3] = 1.0f;
  1860. R1 = XMVectorSet(D, D, Value, Value);
  1861. R1 = XMVectorMultiply(R1, V3);
  1862. R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
  1863. R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
  1864. Result = XMVector4Dot(R0, R1);
  1865. return Result.v[0];
  1866. #elif defined(_XM_SSE_INTRINSICS_)
  1867. return asinf(Value);
  1868. #else // _XM_VMX128_INTRINSICS_
  1869. #endif // _XM_VMX128_INTRINSICS_
  1870. }
  1871. //------------------------------------------------------------------------------
  1872. XMINLINE FLOAT XMScalarACos
  1873. (
  1874. FLOAT Value
  1875. )
  1876. {
  1877. #if defined(_XM_NO_INTRINSICS_)
  1878. return XM_PIDIV2 - XMScalarASin(Value);
  1879. #elif defined(_XM_SSE_INTRINSICS_)
  1880. return acosf(Value);
  1881. #else // _XM_VMX128_INTRINSICS_
  1882. #endif // _XM_VMX128_INTRINSICS_
  1883. }
  1884. //------------------------------------------------------------------------------
  1885. XMFINLINE FLOAT XMScalarSinEst
  1886. (
  1887. FLOAT Value
  1888. )
  1889. {
  1890. #if defined(_XM_NO_INTRINSICS_)
  1891. FLOAT ValueSq;
  1892. XMVECTOR V;
  1893. XMVECTOR Y;
  1894. XMVECTOR Result;
  1895. XMASSERT(Value >= -XM_PI);
  1896. XMASSERT(Value < XM_PI);
  1897. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  1898. ValueSq = Value * Value;
  1899. V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
  1900. Y = XMVectorSplatY(V);
  1901. V = XMVectorMultiply(V, V);
  1902. V = XMVectorMultiply(V, Y);
  1903. Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
  1904. return Result.v[0];
  1905. #elif defined(_XM_SSE_INTRINSICS_)
  1906. XMASSERT(Value >= -XM_PI);
  1907. XMASSERT(Value < XM_PI);
  1908. float ValueSq = Value*Value;
  1909. XMVECTOR vValue = _mm_set_ps1(Value);
  1910. XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
  1911. vTemp = _mm_mul_ps(vTemp,vTemp);
  1912. vTemp = _mm_mul_ps(vTemp,vValue);
  1913. // vTemp = Value,Value^3,Value^5,Value^7
  1914. vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
  1915. vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  1916. vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
  1917. vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  1918. vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
  1919. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  1920. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  1921. return _mm_cvtss_f32(vTemp);
  1922. #else
  1923. return vTemp.m128_f32[0];
  1924. #endif
  1925. #else // _XM_VMX128_INTRINSICS_
  1926. #endif // _XM_VMX128_INTRINSICS_
  1927. }
  1928. //------------------------------------------------------------------------------
  1929. XMFINLINE FLOAT XMScalarCosEst
  1930. (
  1931. FLOAT Value
  1932. )
  1933. {
  1934. #if defined(_XM_NO_INTRINSICS_)
  1935. FLOAT ValueSq;
  1936. XMVECTOR V;
  1937. XMVECTOR Result;
  1938. XMASSERT(Value >= -XM_PI);
  1939. XMASSERT(Value < XM_PI);
  1940. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  1941. ValueSq = Value * Value;
  1942. V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
  1943. V = XMVectorMultiply(V, V);
  1944. Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
  1945. return Result.v[0];
  1946. #elif defined(_XM_SSE_INTRINSICS_)
  1947. XMASSERT(Value >= -XM_PI);
  1948. XMASSERT(Value < XM_PI);
  1949. float ValueSq = Value*Value;
  1950. XMVECTOR vValue = _mm_setzero_ps();
  1951. XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
  1952. vTemp = _mm_mul_ps(vTemp,vTemp);
  1953. // vTemp = 1.0f,Value^2,Value^4,Value^6
  1954. vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
  1955. vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  1956. vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
  1957. vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  1958. vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
  1959. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  1960. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  1961. return _mm_cvtss_f32(vTemp);
  1962. #else
  1963. return vTemp.m128_f32[0];
  1964. #endif
  1965. #else // _XM_VMX128_INTRINSICS_
  1966. #endif // _XM_VMX128_INTRINSICS_
  1967. }
  1968. //------------------------------------------------------------------------------
  1969. XMFINLINE VOID XMScalarSinCosEst
  1970. (
  1971. FLOAT* pSin,
  1972. FLOAT* pCos,
  1973. FLOAT Value
  1974. )
  1975. {
  1976. #if defined(_XM_NO_INTRINSICS_)
  1977. FLOAT ValueSq;
  1978. XMVECTOR V, Sin, Cos;
  1979. XMVECTOR Y;
  1980. XMASSERT(pSin);
  1981. XMASSERT(pCos);
  1982. XMASSERT(Value >= -XM_PI);
  1983. XMASSERT(Value < XM_PI);
  1984. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  1985. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  1986. ValueSq = Value * Value;
  1987. V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
  1988. Y = XMVectorSplatY(V);
  1989. Cos = XMVectorMultiply(V, V);
  1990. Sin = XMVectorMultiply(Cos, Y);
  1991. Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
  1992. Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
  1993. *pCos = Cos.v[0];
  1994. *pSin = Sin.v[0];
  1995. #elif defined(_XM_SSE_INTRINSICS_)
  1996. XMASSERT(pSin);
  1997. XMASSERT(pCos);
  1998. XMASSERT(Value >= -XM_PI);
  1999. XMASSERT(Value < XM_PI);
  2000. float ValueSq = Value * Value;
  2001. XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
  2002. XMVECTOR Sin = _mm_set_ps1(Value);
  2003. Cos = _mm_mul_ps(Cos,Cos);
  2004. Sin = _mm_mul_ps(Sin,Cos);
  2005. // Cos = 1.0f,Value^2,Value^4,Value^6
  2006. Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
  2007. _mm_store_ss(pCos,Cos);
  2008. // Sin = Value,Value^3,Value^5,Value^7
  2009. Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
  2010. _mm_store_ss(pSin,Sin);
  2011. #else // _XM_VMX128_INTRINSICS_
  2012. #endif // _XM_VMX128_INTRINSICS_
  2013. }
  2014. //------------------------------------------------------------------------------
  2015. XMFINLINE FLOAT XMScalarASinEst
  2016. (
  2017. FLOAT Value
  2018. )
  2019. {
  2020. #if defined(_XM_NO_INTRINSICS_)
  2021. XMVECTOR VR, CR, CS;
  2022. XMVECTOR Result;
  2023. FLOAT AbsV, V2, D;
  2024. CONST FLOAT OnePlusEps = 1.00000011921f;
  2025. *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
  2026. V2 = Value * AbsV;
  2027. D = OnePlusEps - AbsV;
  2028. CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
  2029. VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
  2030. CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
  2031. Result = XMVector4Dot(VR, CR);
  2032. return Result.v[0];
  2033. #elif defined(_XM_SSE_INTRINSICS_)
  2034. CONST FLOAT OnePlusEps = 1.00000011921f;
  2035. FLOAT AbsV = fabsf(Value);
  2036. FLOAT V2 = Value * AbsV; // Square with sign retained
  2037. FLOAT D = OnePlusEps - AbsV;
  2038. XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
  2039. XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
  2040. Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
  2041. Result = XMVector4Dot(VR,Result);
  2042. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  2043. return _mm_cvtss_f32(Result);
  2044. #else
  2045. return Result.m128_f32[0];
  2046. #endif
  2047. #else // _XM_VMX128_INTRINSICS_
  2048. #endif // _XM_VMX128_INTRINSICS_
  2049. }
  2050. //------------------------------------------------------------------------------
  2051. XMFINLINE FLOAT XMScalarACosEst
  2052. (
  2053. FLOAT Value
  2054. )
  2055. {
  2056. #if defined(_XM_NO_INTRINSICS_)
  2057. XMVECTOR VR, CR, CS;
  2058. XMVECTOR Result;
  2059. FLOAT AbsV, V2, D;
  2060. CONST FLOAT OnePlusEps = 1.00000011921f;
  2061. // return XM_PIDIV2 - XMScalarASin(Value);
  2062. *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
  2063. V2 = Value * AbsV;
  2064. D = OnePlusEps - AbsV;
  2065. CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
  2066. VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
  2067. CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
  2068. Result = XMVector4Dot(VR, CR);
  2069. return XM_PIDIV2 - Result.v[0];
  2070. #elif defined(_XM_SSE_INTRINSICS_)
  2071. CONST FLOAT OnePlusEps = 1.00000011921f;
  2072. FLOAT AbsV = fabsf(Value);
  2073. FLOAT V2 = Value * AbsV; // Value^2 retaining sign
  2074. FLOAT D = OnePlusEps - AbsV;
  2075. XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
  2076. XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
  2077. Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
  2078. Result = XMVector4Dot(VR,Result);
  2079. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  2080. return XM_PIDIV2 - _mm_cvtss_f32(Result);
  2081. #else
  2082. return XM_PIDIV2 - Result.m128_f32[0];
  2083. #endif
  2084. #else // _XM_VMX128_INTRINSICS_
  2085. #endif // _XM_VMX128_INTRINSICS_
  2086. }
  2087. #endif // __XNAMATHMISC_INL__