Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2464 lines
69 KiB

  1. /*++
  2. Copyright (c) Microsoft Corporation. All rights reserved.
  3. Module Name:
  4. xnamathmisc.inl
  5. Abstract:
  6. XNA math library for Windows and Xbox 360: Quaternion, plane, and color functions.
  7. --*/
  8. #if defined(_MSC_VER) && (_MSC_VER > 1000)
  9. #pragma once
  10. #endif
  11. #ifndef __XNAMATHMISC_INL__
  12. #define __XNAMATHMISC_INL__
  13. /****************************************************************************
  14. *
  15. * Quaternion
  16. *
  17. ****************************************************************************/
  18. //------------------------------------------------------------------------------
  19. // Comparison operations
  20. //------------------------------------------------------------------------------
  21. //------------------------------------------------------------------------------
  22. XMFINLINE BOOL XMQuaternionEqual
  23. (
  24. FXMVECTOR Q1,
  25. FXMVECTOR Q2
  26. )
  27. {
  28. return XMVector4Equal(Q1, Q2);
  29. }
  30. //------------------------------------------------------------------------------
  31. XMFINLINE BOOL XMQuaternionNotEqual
  32. (
  33. FXMVECTOR Q1,
  34. FXMVECTOR Q2
  35. )
  36. {
  37. return XMVector4NotEqual(Q1, Q2);
  38. }
  39. //------------------------------------------------------------------------------
  40. XMFINLINE BOOL XMQuaternionIsNaN
  41. (
  42. FXMVECTOR Q
  43. )
  44. {
  45. return XMVector4IsNaN(Q);
  46. }
  47. //------------------------------------------------------------------------------
  48. XMFINLINE BOOL XMQuaternionIsInfinite
  49. (
  50. FXMVECTOR Q
  51. )
  52. {
  53. return XMVector4IsInfinite(Q);
  54. }
  55. //------------------------------------------------------------------------------
  56. XMFINLINE BOOL XMQuaternionIsIdentity
  57. (
  58. FXMVECTOR Q
  59. )
  60. {
  61. #if defined(_XM_NO_INTRINSICS_)
  62. return XMVector4Equal(Q, g_XMIdentityR3.v);
  63. #elif defined(_XM_SSE_INTRINSICS_)
  64. XMVECTOR vTemp = _mm_cmpeq_ps(Q,g_XMIdentityR3);
  65. return (_mm_movemask_ps(vTemp)==0x0f) ? true : false;
  66. #else // _XM_VMX128_INTRINSICS_
  67. #endif // _XM_VMX128_INTRINSICS_
  68. }
  69. //------------------------------------------------------------------------------
  70. // Computation operations
  71. //------------------------------------------------------------------------------
  72. //------------------------------------------------------------------------------
  73. XMFINLINE XMVECTOR XMQuaternionDot
  74. (
  75. FXMVECTOR Q1,
  76. FXMVECTOR Q2
  77. )
  78. {
  79. return XMVector4Dot(Q1, Q2);
  80. }
  81. //------------------------------------------------------------------------------
  82. XMFINLINE XMVECTOR XMQuaternionMultiply
  83. (
  84. FXMVECTOR Q1,
  85. FXMVECTOR Q2
  86. )
  87. {
  88. #if defined(_XM_NO_INTRINSICS_)
  89. XMVECTOR NegativeQ1;
  90. XMVECTOR Q2X;
  91. XMVECTOR Q2Y;
  92. XMVECTOR Q2Z;
  93. XMVECTOR Q2W;
  94. XMVECTOR Q1WZYX;
  95. XMVECTOR Q1ZWXY;
  96. XMVECTOR Q1YXWZ;
  97. XMVECTOR Result;
  98. CONST XMVECTORU32 ControlWZYX = {XM_PERMUTE_0W, XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1X};
  99. CONST XMVECTORU32 ControlZWXY = {XM_PERMUTE_0Z, XM_PERMUTE_0W, XM_PERMUTE_1X, XM_PERMUTE_1Y};
  100. CONST XMVECTORU32 ControlYXWZ = {XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z};
  101. NegativeQ1 = XMVectorNegate(Q1);
  102. Q2W = XMVectorSplatW(Q2);
  103. Q2X = XMVectorSplatX(Q2);
  104. Q2Y = XMVectorSplatY(Q2);
  105. Q2Z = XMVectorSplatZ(Q2);
  106. Q1WZYX = XMVectorPermute(Q1, NegativeQ1, ControlWZYX.v);
  107. Q1ZWXY = XMVectorPermute(Q1, NegativeQ1, ControlZWXY.v);
  108. Q1YXWZ = XMVectorPermute(Q1, NegativeQ1, ControlYXWZ.v);
  109. Result = XMVectorMultiply(Q1, Q2W);
  110. Result = XMVectorMultiplyAdd(Q1WZYX, Q2X, Result);
  111. Result = XMVectorMultiplyAdd(Q1ZWXY, Q2Y, Result);
  112. Result = XMVectorMultiplyAdd(Q1YXWZ, Q2Z, Result);
  113. return Result;
  114. #elif defined(_XM_SSE_INTRINSICS_)
  115. static CONST XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
  116. static CONST XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
  117. static CONST XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
  118. // Copy to SSE registers and use as few as possible for x86
  119. XMVECTOR Q2X = Q2;
  120. XMVECTOR Q2Y = Q2;
  121. XMVECTOR Q2Z = Q2;
  122. XMVECTOR vResult = Q2;
  123. // Splat with one instruction
  124. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  125. Q2X = _mm_shuffle_ps(Q2X,Q2X,_MM_SHUFFLE(0,0,0,0));
  126. Q2Y = _mm_shuffle_ps(Q2Y,Q2Y,_MM_SHUFFLE(1,1,1,1));
  127. Q2Z = _mm_shuffle_ps(Q2Z,Q2Z,_MM_SHUFFLE(2,2,2,2));
  128. // Retire Q1 and perform Q1*Q2W
  129. vResult = _mm_mul_ps(vResult,Q1);
  130. XMVECTOR Q1Shuffle = Q1;
  131. // Shuffle the copies of Q1
  132. Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
  133. // Mul by Q1WZYX
  134. Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
  135. Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
  136. // Flip the signs on y and z
  137. Q2X = _mm_mul_ps(Q2X,ControlWZYX);
  138. // Mul by Q1ZWXY
  139. Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
  140. Q1Shuffle = _mm_shuffle_ps(Q1Shuffle,Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
  141. // Flip the signs on z and w
  142. Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
  143. // Mul by Q1YXWZ
  144. Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
  145. vResult = _mm_add_ps(vResult,Q2X);
  146. // Flip the signs on x and w
  147. Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
  148. Q2Y = _mm_add_ps(Q2Y,Q2Z);
  149. vResult = _mm_add_ps(vResult,Q2Y);
  150. return vResult;
  151. #else // _XM_VMX128_INTRINSICS_
  152. #endif // _XM_VMX128_INTRINSICS_
  153. }
  154. //------------------------------------------------------------------------------
  155. XMFINLINE XMVECTOR XMQuaternionLengthSq
  156. (
  157. FXMVECTOR Q
  158. )
  159. {
  160. return XMVector4LengthSq(Q);
  161. }
  162. //------------------------------------------------------------------------------
  163. XMFINLINE XMVECTOR XMQuaternionReciprocalLength
  164. (
  165. FXMVECTOR Q
  166. )
  167. {
  168. return XMVector4ReciprocalLength(Q);
  169. }
  170. //------------------------------------------------------------------------------
  171. XMFINLINE XMVECTOR XMQuaternionLength
  172. (
  173. FXMVECTOR Q
  174. )
  175. {
  176. return XMVector4Length(Q);
  177. }
  178. //------------------------------------------------------------------------------
  179. XMFINLINE XMVECTOR XMQuaternionNormalizeEst
  180. (
  181. FXMVECTOR Q
  182. )
  183. {
  184. return XMVector4NormalizeEst(Q);
  185. }
  186. //------------------------------------------------------------------------------
  187. XMFINLINE XMVECTOR XMQuaternionNormalize
  188. (
  189. FXMVECTOR Q
  190. )
  191. {
  192. return XMVector4Normalize(Q);
  193. }
  194. //------------------------------------------------------------------------------
  195. XMFINLINE XMVECTOR XMQuaternionConjugate
  196. (
  197. FXMVECTOR Q
  198. )
  199. {
  200. #if defined(_XM_NO_INTRINSICS_)
  201. XMVECTOR Result = {
  202. -Q.vector4_f32[0],
  203. -Q.vector4_f32[1],
  204. -Q.vector4_f32[2],
  205. Q.vector4_f32[3]
  206. };
  207. return Result;
  208. #elif defined(_XM_SSE_INTRINSICS_)
  209. static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
  210. XMVECTOR Result = _mm_mul_ps(Q,NegativeOne3);
  211. return Result;
  212. #else // _XM_VMX128_INTRINSICS_
  213. #endif // _XM_VMX128_INTRINSICS_
  214. }
  215. //------------------------------------------------------------------------------
  216. XMFINLINE XMVECTOR XMQuaternionInverse
  217. (
  218. FXMVECTOR Q
  219. )
  220. {
  221. #if defined(_XM_NO_INTRINSICS_)
  222. XMVECTOR Conjugate;
  223. XMVECTOR L;
  224. XMVECTOR Control;
  225. XMVECTOR Result;
  226. CONST XMVECTOR Zero = XMVectorZero();
  227. L = XMVector4LengthSq(Q);
  228. Conjugate = XMQuaternionConjugate(Q);
  229. Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
  230. L = XMVectorReciprocal(L);
  231. Result = XMVectorMultiply(Conjugate, L);
  232. Result = XMVectorSelect(Result, Zero, Control);
  233. return Result;
  234. #elif defined(_XM_SSE_INTRINSICS_)
  235. XMVECTOR Conjugate;
  236. XMVECTOR L;
  237. XMVECTOR Control;
  238. XMVECTOR Result;
  239. XMVECTOR Zero = XMVectorZero();
  240. L = XMVector4LengthSq(Q);
  241. Conjugate = XMQuaternionConjugate(Q);
  242. Control = XMVectorLessOrEqual(L, g_XMEpsilon);
  243. Result = _mm_div_ps(Conjugate,L);
  244. Result = XMVectorSelect(Result, Zero, Control);
  245. return Result;
  246. #else // _XM_VMX128_INTRINSICS_
  247. #endif // _XM_VMX128_INTRINSICS_
  248. }
  249. //------------------------------------------------------------------------------
  250. XMFINLINE XMVECTOR XMQuaternionLn
  251. (
  252. FXMVECTOR Q
  253. )
  254. {
  255. #if defined(_XM_NO_INTRINSICS_)
  256. XMVECTOR Q0;
  257. XMVECTOR QW;
  258. XMVECTOR Theta;
  259. XMVECTOR SinTheta;
  260. XMVECTOR S;
  261. XMVECTOR ControlW;
  262. XMVECTOR Result;
  263. static CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  264. QW = XMVectorSplatW(Q);
  265. Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
  266. ControlW = XMVectorInBounds(QW, OneMinusEpsilon);
  267. Theta = XMVectorACos(QW);
  268. SinTheta = XMVectorSin(Theta);
  269. S = XMVectorReciprocal(SinTheta);
  270. S = XMVectorMultiply(Theta, S);
  271. Result = XMVectorMultiply(Q0, S);
  272. Result = XMVectorSelect(Q0, Result, ControlW);
  273. return Result;
  274. #elif defined(_XM_SSE_INTRINSICS_)
  275. static CONST XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  276. static CONST XMVECTORF32 NegOneMinusEpsilon = {-(1.0f - 0.00001f), -(1.0f - 0.00001f),-(1.0f - 0.00001f),-(1.0f - 0.00001f)};
  277. // Get W only
  278. XMVECTOR QW = _mm_shuffle_ps(Q,Q,_MM_SHUFFLE(3,3,3,3));
  279. // W = 0
  280. XMVECTOR Q0 = _mm_and_ps(Q,g_XMMask3);
  281. // Use W if within bounds
  282. XMVECTOR ControlW = _mm_cmple_ps(QW,OneMinusEpsilon);
  283. XMVECTOR vTemp2 = _mm_cmpge_ps(QW,NegOneMinusEpsilon);
  284. ControlW = _mm_and_ps(ControlW,vTemp2);
  285. // Get theta
  286. XMVECTOR vTheta = XMVectorACos(QW);
  287. // Get Sine of theta
  288. vTemp2 = XMVectorSin(vTheta);
  289. // theta/sine of theta
  290. vTheta = _mm_div_ps(vTheta,vTemp2);
  291. // Here's the answer
  292. vTheta = _mm_mul_ps(vTheta,Q0);
  293. // Was W in bounds? If not, return input as is
  294. vTheta = XMVectorSelect(Q0,vTheta,ControlW);
  295. return vTheta;
  296. #else // _XM_VMX128_INTRINSICS_
  297. #endif // _XM_VMX128_INTRINSICS_
  298. }
  299. //------------------------------------------------------------------------------
  300. XMFINLINE XMVECTOR XMQuaternionExp
  301. (
  302. FXMVECTOR Q
  303. )
  304. {
  305. #if defined(_XM_NO_INTRINSICS_)
  306. XMVECTOR Theta;
  307. XMVECTOR SinTheta;
  308. XMVECTOR CosTheta;
  309. XMVECTOR S;
  310. XMVECTOR Control;
  311. XMVECTOR Zero;
  312. XMVECTOR Result;
  313. Theta = XMVector3Length(Q);
  314. XMVectorSinCos(&SinTheta, &CosTheta, Theta);
  315. S = XMVectorReciprocal(Theta);
  316. S = XMVectorMultiply(SinTheta, S);
  317. Result = XMVectorMultiply(Q, S);
  318. Zero = XMVectorZero();
  319. Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
  320. Result = XMVectorSelect(Result, Q, Control);
  321. Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
  322. return Result;
  323. #elif defined(_XM_SSE_INTRINSICS_)
  324. XMVECTOR Theta;
  325. XMVECTOR SinTheta;
  326. XMVECTOR CosTheta;
  327. XMVECTOR S;
  328. XMVECTOR Control;
  329. XMVECTOR Zero;
  330. XMVECTOR Result;
  331. Theta = XMVector3Length(Q);
  332. XMVectorSinCos(&SinTheta, &CosTheta, Theta);
  333. S = _mm_div_ps(SinTheta,Theta);
  334. Result = _mm_mul_ps(Q, S);
  335. Zero = XMVectorZero();
  336. Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon);
  337. Result = XMVectorSelect(Result,Q,Control);
  338. Result = _mm_and_ps(Result,g_XMMask3);
  339. CosTheta = _mm_and_ps(CosTheta,g_XMMaskW);
  340. Result = _mm_or_ps(Result,CosTheta);
  341. return Result;
  342. #else // _XM_VMX128_INTRINSICS_
  343. #endif // _XM_VMX128_INTRINSICS_
  344. }
  345. //------------------------------------------------------------------------------
  346. XMINLINE XMVECTOR XMQuaternionSlerp
  347. (
  348. FXMVECTOR Q0,
  349. FXMVECTOR Q1,
  350. FLOAT t
  351. )
  352. {
  353. XMVECTOR T = XMVectorReplicate(t);
  354. return XMQuaternionSlerpV(Q0, Q1, T);
  355. }
  356. //------------------------------------------------------------------------------
  357. XMINLINE XMVECTOR XMQuaternionSlerpV
  358. (
  359. FXMVECTOR Q0,
  360. FXMVECTOR Q1,
  361. FXMVECTOR T
  362. )
  363. {
  364. #if defined(_XM_NO_INTRINSICS_)
  365. // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
  366. XMVECTOR Omega;
  367. XMVECTOR CosOmega;
  368. XMVECTOR SinOmega;
  369. XMVECTOR InvSinOmega;
  370. XMVECTOR V01;
  371. XMVECTOR C1000;
  372. XMVECTOR SignMask;
  373. XMVECTOR S0;
  374. XMVECTOR S1;
  375. XMVECTOR Sign;
  376. XMVECTOR Control;
  377. XMVECTOR Result;
  378. XMVECTOR Zero;
  379. CONST XMVECTOR OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  380. XMASSERT((T.vector4_f32[1] == T.vector4_f32[0]) && (T.vector4_f32[2] == T.vector4_f32[0]) && (T.vector4_f32[3] == T.vector4_f32[0]));
  381. CosOmega = XMQuaternionDot(Q0, Q1);
  382. Zero = XMVectorZero();
  383. Control = XMVectorLess(CosOmega, Zero);
  384. Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
  385. CosOmega = XMVectorMultiply(CosOmega, Sign);
  386. Control = XMVectorLess(CosOmega, OneMinusEpsilon);
  387. SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
  388. SinOmega = XMVectorSqrt(SinOmega);
  389. Omega = XMVectorATan2(SinOmega, CosOmega);
  390. SignMask = XMVectorSplatSignMask();
  391. C1000 = XMVectorSetBinaryConstant(1, 0, 0, 0);
  392. V01 = XMVectorShiftLeft(T, Zero, 2);
  393. SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
  394. V01 = XMVectorXorInt(V01, SignMask);
  395. V01 = XMVectorAdd(C1000, V01);
  396. InvSinOmega = XMVectorReciprocal(SinOmega);
  397. S0 = XMVectorMultiply(V01, Omega);
  398. S0 = XMVectorSin(S0);
  399. S0 = XMVectorMultiply(S0, InvSinOmega);
  400. S0 = XMVectorSelect(V01, S0, Control);
  401. S1 = XMVectorSplatY(S0);
  402. S0 = XMVectorSplatX(S0);
  403. S1 = XMVectorMultiply(S1, Sign);
  404. Result = XMVectorMultiply(Q0, S0);
  405. Result = XMVectorMultiplyAdd(Q1, S1, Result);
  406. return Result;
  407. #elif defined(_XM_SSE_INTRINSICS_)
  408. // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
  409. XMVECTOR Omega;
  410. XMVECTOR CosOmega;
  411. XMVECTOR SinOmega;
  412. XMVECTOR V01;
  413. XMVECTOR S0;
  414. XMVECTOR S1;
  415. XMVECTOR Sign;
  416. XMVECTOR Control;
  417. XMVECTOR Result;
  418. XMVECTOR Zero;
  419. static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
  420. static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
  421. static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
  422. XMASSERT((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
  423. CosOmega = XMQuaternionDot(Q0, Q1);
  424. Zero = XMVectorZero();
  425. Control = XMVectorLess(CosOmega, Zero);
  426. Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
  427. CosOmega = _mm_mul_ps(CosOmega, Sign);
  428. Control = XMVectorLess(CosOmega, OneMinusEpsilon);
  429. SinOmega = _mm_mul_ps(CosOmega,CosOmega);
  430. SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
  431. SinOmega = _mm_sqrt_ps(SinOmega);
  432. Omega = XMVectorATan2(SinOmega, CosOmega);
  433. V01 = _mm_shuffle_ps(T,T,_MM_SHUFFLE(2,3,0,1));
  434. V01 = _mm_and_ps(V01,MaskXY);
  435. V01 = _mm_xor_ps(V01,SignMask2);
  436. V01 = _mm_add_ps(g_XMIdentityR0, V01);
  437. S0 = _mm_mul_ps(V01, Omega);
  438. S0 = XMVectorSin(S0);
  439. S0 = _mm_div_ps(S0, SinOmega);
  440. S0 = XMVectorSelect(V01, S0, Control);
  441. S1 = XMVectorSplatY(S0);
  442. S0 = XMVectorSplatX(S0);
  443. S1 = _mm_mul_ps(S1, Sign);
  444. Result = _mm_mul_ps(Q0, S0);
  445. S1 = _mm_mul_ps(S1, Q1);
  446. Result = _mm_add_ps(Result,S1);
  447. return Result;
  448. #else // _XM_VMX128_INTRINSICS_
  449. #endif // _XM_VMX128_INTRINSICS_
  450. }
  451. //------------------------------------------------------------------------------
  452. XMFINLINE XMVECTOR XMQuaternionSquad
  453. (
  454. FXMVECTOR Q0,
  455. FXMVECTOR Q1,
  456. FXMVECTOR Q2,
  457. CXMVECTOR Q3,
  458. FLOAT t
  459. )
  460. {
  461. XMVECTOR T = XMVectorReplicate(t);
  462. return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
  463. }
  464. //------------------------------------------------------------------------------
  465. XMFINLINE XMVECTOR XMQuaternionSquadV
  466. (
  467. FXMVECTOR Q0,
  468. FXMVECTOR Q1,
  469. FXMVECTOR Q2,
  470. CXMVECTOR Q3,
  471. CXMVECTOR T
  472. )
  473. {
  474. XMVECTOR Q03;
  475. XMVECTOR Q12;
  476. XMVECTOR TP;
  477. XMVECTOR Two;
  478. XMVECTOR Result;
  479. XMASSERT( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
  480. TP = T;
  481. Two = XMVectorSplatConstant(2, 0);
  482. Q03 = XMQuaternionSlerpV(Q0, Q3, T);
  483. Q12 = XMQuaternionSlerpV(Q1, Q2, T);
  484. TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
  485. TP = XMVectorMultiply(TP, Two);
  486. Result = XMQuaternionSlerpV(Q03, Q12, TP);
  487. return Result;
  488. }
  489. //------------------------------------------------------------------------------
  490. XMINLINE VOID XMQuaternionSquadSetup
  491. (
  492. XMVECTOR* pA,
  493. XMVECTOR* pB,
  494. XMVECTOR* pC,
  495. FXMVECTOR Q0,
  496. FXMVECTOR Q1,
  497. FXMVECTOR Q2,
  498. CXMVECTOR Q3
  499. )
  500. {
  501. XMVECTOR SQ0, SQ2, SQ3;
  502. XMVECTOR InvQ1, InvQ2;
  503. XMVECTOR LnQ0, LnQ1, LnQ2, LnQ3;
  504. XMVECTOR ExpQ02, ExpQ13;
  505. XMVECTOR LS01, LS12, LS23;
  506. XMVECTOR LD01, LD12, LD23;
  507. XMVECTOR Control0, Control1, Control2;
  508. XMVECTOR NegativeOneQuarter;
  509. XMASSERT(pA);
  510. XMASSERT(pB);
  511. XMASSERT(pC);
  512. LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
  513. LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
  514. SQ2 = XMVectorNegate(Q2);
  515. Control1 = XMVectorLess(LS12, LD12);
  516. SQ2 = XMVectorSelect(Q2, SQ2, Control1);
  517. LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
  518. LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
  519. SQ0 = XMVectorNegate(Q0);
  520. LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
  521. LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
  522. SQ3 = XMVectorNegate(Q3);
  523. Control0 = XMVectorLess(LS01, LD01);
  524. Control2 = XMVectorLess(LS23, LD23);
  525. SQ0 = XMVectorSelect(Q0, SQ0, Control0);
  526. SQ3 = XMVectorSelect(Q3, SQ3, Control2);
  527. InvQ1 = XMQuaternionInverse(Q1);
  528. InvQ2 = XMQuaternionInverse(SQ2);
  529. LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
  530. LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
  531. LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
  532. LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
  533. NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
  534. ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
  535. ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
  536. ExpQ02 = XMQuaternionExp(ExpQ02);
  537. ExpQ13 = XMQuaternionExp(ExpQ13);
  538. *pA = XMQuaternionMultiply(Q1, ExpQ02);
  539. *pB = XMQuaternionMultiply(SQ2, ExpQ13);
  540. *pC = SQ2;
  541. }
  542. //------------------------------------------------------------------------------
  543. XMFINLINE XMVECTOR XMQuaternionBaryCentric
  544. (
  545. FXMVECTOR Q0,
  546. FXMVECTOR Q1,
  547. FXMVECTOR Q2,
  548. FLOAT f,
  549. FLOAT g
  550. )
  551. {
  552. XMVECTOR Q01;
  553. XMVECTOR Q02;
  554. FLOAT s;
  555. XMVECTOR Result;
  556. s = f + g;
  557. if ((s < 0.00001f) && (s > -0.00001f))
  558. {
  559. Result = Q0;
  560. }
  561. else
  562. {
  563. Q01 = XMQuaternionSlerp(Q0, Q1, s);
  564. Q02 = XMQuaternionSlerp(Q0, Q2, s);
  565. Result = XMQuaternionSlerp(Q01, Q02, g / s);
  566. }
  567. return Result;
  568. }
  569. //------------------------------------------------------------------------------
  570. XMFINLINE XMVECTOR XMQuaternionBaryCentricV
  571. (
  572. FXMVECTOR Q0,
  573. FXMVECTOR Q1,
  574. FXMVECTOR Q2,
  575. CXMVECTOR F,
  576. CXMVECTOR G
  577. )
  578. {
  579. XMVECTOR Q01;
  580. XMVECTOR Q02;
  581. XMVECTOR S, GS;
  582. XMVECTOR Epsilon;
  583. XMVECTOR Result;
  584. XMASSERT( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
  585. XMASSERT( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
  586. Epsilon = XMVectorSplatConstant(1, 16);
  587. S = XMVectorAdd(F, G);
  588. if (XMVector4InBounds(S, Epsilon))
  589. {
  590. Result = Q0;
  591. }
  592. else
  593. {
  594. Q01 = XMQuaternionSlerpV(Q0, Q1, S);
  595. Q02 = XMQuaternionSlerpV(Q0, Q2, S);
  596. GS = XMVectorReciprocal(S);
  597. GS = XMVectorMultiply(G, GS);
  598. Result = XMQuaternionSlerpV(Q01, Q02, GS);
  599. }
  600. return Result;
  601. }
  602. //------------------------------------------------------------------------------
  603. // Transformation operations
  604. //------------------------------------------------------------------------------
  605. //------------------------------------------------------------------------------
  606. XMFINLINE XMVECTOR XMQuaternionIdentity()
  607. {
  608. #if defined(_XM_NO_INTRINSICS_)
  609. return g_XMIdentityR3.v;
  610. #elif defined(_XM_SSE_INTRINSICS_)
  611. return g_XMIdentityR3;
  612. #else // _XM_VMX128_INTRINSICS_
  613. #endif // _XM_VMX128_INTRINSICS_
  614. }
  615. //------------------------------------------------------------------------------
  616. XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYaw
  617. (
  618. FLOAT Pitch,
  619. FLOAT Yaw,
  620. FLOAT Roll
  621. )
  622. {
  623. XMVECTOR Angles;
  624. XMVECTOR Q;
  625. Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
  626. Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
  627. return Q;
  628. }
  629. //------------------------------------------------------------------------------
  630. XMFINLINE XMVECTOR XMQuaternionRotationRollPitchYawFromVector
  631. (
  632. FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
  633. )
  634. {
  635. #if defined(_XM_NO_INTRINSICS_)
  636. XMVECTOR Q, Q0, Q1;
  637. XMVECTOR P0, P1, Y0, Y1, R0, R1;
  638. XMVECTOR HalfAngles;
  639. XMVECTOR SinAngles, CosAngles;
  640. static CONST XMVECTORU32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
  641. static CONST XMVECTORU32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
  642. static CONST XMVECTORU32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
  643. static CONST XMVECTOR Sign = {1.0f, -1.0f, -1.0f, 1.0f};
  644. HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
  645. XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
  646. P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch.v);
  647. Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw.v);
  648. R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll.v);
  649. P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch.v);
  650. Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw.v);
  651. R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll.v);
  652. Q1 = XMVectorMultiply(P1, Sign);
  653. Q0 = XMVectorMultiply(P0, Y0);
  654. Q1 = XMVectorMultiply(Q1, Y1);
  655. Q0 = XMVectorMultiply(Q0, R0);
  656. Q = XMVectorMultiplyAdd(Q1, R1, Q0);
  657. return Q;
  658. #elif defined(_XM_SSE_INTRINSICS_)
  659. XMVECTOR Q, Q0, Q1;
  660. XMVECTOR P0, P1, Y0, Y1, R0, R1;
  661. XMVECTOR HalfAngles;
  662. XMVECTOR SinAngles, CosAngles;
  663. static CONST XMVECTORI32 ControlPitch = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X};
  664. static CONST XMVECTORI32 ControlYaw = {XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y};
  665. static CONST XMVECTORI32 ControlRoll = {XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z};
  666. static CONST XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
  667. HalfAngles = _mm_mul_ps(Angles, g_XMOneHalf);
  668. XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
  669. P0 = XMVectorPermute(SinAngles, CosAngles, ControlPitch);
  670. Y0 = XMVectorPermute(SinAngles, CosAngles, ControlYaw);
  671. R0 = XMVectorPermute(SinAngles, CosAngles, ControlRoll);
  672. P1 = XMVectorPermute(CosAngles, SinAngles, ControlPitch);
  673. Y1 = XMVectorPermute(CosAngles, SinAngles, ControlYaw);
  674. R1 = XMVectorPermute(CosAngles, SinAngles, ControlRoll);
  675. Q1 = _mm_mul_ps(P1, Sign);
  676. Q0 = _mm_mul_ps(P0, Y0);
  677. Q1 = _mm_mul_ps(Q1, Y1);
  678. Q0 = _mm_mul_ps(Q0, R0);
  679. Q = _mm_mul_ps(Q1, R1);
  680. Q = _mm_add_ps(Q,Q0);
  681. return Q;
  682. #else // _XM_VMX128_INTRINSICS_
  683. #endif // _XM_VMX128_INTRINSICS_
  684. }
  685. //------------------------------------------------------------------------------
  686. XMFINLINE XMVECTOR XMQuaternionRotationNormal
  687. (
  688. FXMVECTOR NormalAxis,
  689. FLOAT Angle
  690. )
  691. {
  692. #if defined(_XM_NO_INTRINSICS_)
  693. XMVECTOR Q;
  694. XMVECTOR N;
  695. XMVECTOR Scale;
  696. N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
  697. XMScalarSinCos(&Scale.vector4_f32[2], &Scale.vector4_f32[3], 0.5f * Angle);
  698. Scale.vector4_f32[0] = Scale.vector4_f32[1] = Scale.vector4_f32[2];
  699. Q = XMVectorMultiply(N, Scale);
  700. return Q;
  701. #elif defined(_XM_SSE_INTRINSICS_)
  702. XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
  703. N = _mm_or_ps(N,g_XMIdentityR3);
  704. XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
  705. XMVECTOR vSine;
  706. XMVECTOR vCosine;
  707. XMVectorSinCos(&vSine,&vCosine,Scale);
  708. Scale = _mm_and_ps(vSine,g_XMMask3);
  709. vCosine = _mm_and_ps(vCosine,g_XMMaskW);
  710. Scale = _mm_or_ps(Scale,vCosine);
  711. N = _mm_mul_ps(N,Scale);
  712. return N;
  713. #else // _XM_VMX128_INTRINSICS_
  714. #endif // _XM_VMX128_INTRINSICS_
  715. }
  716. //------------------------------------------------------------------------------
  717. XMFINLINE XMVECTOR XMQuaternionRotationAxis
  718. (
  719. FXMVECTOR Axis,
  720. FLOAT Angle
  721. )
  722. {
  723. #if defined(_XM_NO_INTRINSICS_)
  724. XMVECTOR Normal;
  725. XMVECTOR Q;
  726. XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
  727. XMASSERT(!XMVector3IsInfinite(Axis));
  728. Normal = XMVector3Normalize(Axis);
  729. Q = XMQuaternionRotationNormal(Normal, Angle);
  730. return Q;
  731. #elif defined(_XM_SSE_INTRINSICS_)
  732. XMVECTOR Normal;
  733. XMVECTOR Q;
  734. XMASSERT(!XMVector3Equal(Axis, XMVectorZero()));
  735. XMASSERT(!XMVector3IsInfinite(Axis));
  736. Normal = XMVector3Normalize(Axis);
  737. Q = XMQuaternionRotationNormal(Normal, Angle);
  738. return Q;
  739. #else // _XM_VMX128_INTRINSICS_
  740. #endif // _XM_VMX128_INTRINSICS_
  741. }
  742. //------------------------------------------------------------------------------
  743. XMINLINE XMVECTOR XMQuaternionRotationMatrix
  744. (
  745. CXMMATRIX M
  746. )
  747. {
  748. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
  749. XMVECTOR Q0, Q1, Q2;
  750. XMVECTOR M00, M11, M22;
  751. XMVECTOR CQ0, CQ1, C;
  752. XMVECTOR CX, CY, CZ, CW;
  753. XMVECTOR SQ1, Scale;
  754. XMVECTOR Rsq, Sqrt, VEqualsNaN;
  755. XMVECTOR A, B, P;
  756. XMVECTOR PermuteSplat, PermuteSplatT;
  757. XMVECTOR SignB, SignBT;
  758. XMVECTOR PermuteControl, PermuteControlT;
  759. XMVECTOR Result;
  760. static CONST XMVECTORF32 OneQuarter = {0.25f, 0.25f, 0.25f, 0.25f};
  761. static CONST XMVECTORF32 SignPNNP = {1.0f, -1.0f, -1.0f, 1.0f};
  762. static CONST XMVECTORF32 SignNPNP = {-1.0f, 1.0f, -1.0f, 1.0f};
  763. static CONST XMVECTORF32 SignNNPP = {-1.0f, -1.0f, 1.0f, 1.0f};
  764. static CONST XMVECTORF32 SignPNPP = {1.0f, -1.0f, 1.0f, 1.0f};
  765. static CONST XMVECTORF32 SignPPNP = {1.0f, 1.0f, -1.0f, 1.0f};
  766. static CONST XMVECTORF32 SignNPPP = {-1.0f, 1.0f, 1.0f, 1.0f};
  767. static CONST XMVECTORU32 Permute0X0X0Y0W = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_0W};
  768. static CONST XMVECTORU32 Permute0Y0Z0Z1W = {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_1W};
  769. static CONST XMVECTORU32 SplatX = {XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  770. static CONST XMVECTORU32 SplatY = {XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  771. static CONST XMVECTORU32 SplatZ = {XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Z};
  772. static CONST XMVECTORU32 SplatW = {XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W, XM_PERMUTE_0W};
  773. static CONST XMVECTORU32 PermuteC = {XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Y};
  774. static CONST XMVECTORU32 PermuteA = {XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0W};
  775. static CONST XMVECTORU32 PermuteB = {XM_PERMUTE_1X, XM_PERMUTE_1W, XM_PERMUTE_0Z, XM_PERMUTE_0W};
  776. static CONST XMVECTORU32 Permute0 = {XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Z, XM_PERMUTE_1Y};
  777. static CONST XMVECTORU32 Permute1 = {XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Z};
  778. static CONST XMVECTORU32 Permute2 = {XM_PERMUTE_1Z, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1X};
  779. static CONST XMVECTORU32 Permute3 = {XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_1X, XM_PERMUTE_0W};
  780. M00 = XMVectorSplatX(M.r[0]);
  781. M11 = XMVectorSplatY(M.r[1]);
  782. M22 = XMVectorSplatZ(M.r[2]);
  783. Q0 = XMVectorMultiply(SignPNNP.v, M00);
  784. Q0 = XMVectorMultiplyAdd(SignNPNP.v, M11, Q0);
  785. Q0 = XMVectorMultiplyAdd(SignNNPP.v, M22, Q0);
  786. Q1 = XMVectorAdd(Q0, g_XMOne.v);
  787. Rsq = XMVectorReciprocalSqrt(Q1);
  788. VEqualsNaN = XMVectorIsNaN(Rsq);
  789. Sqrt = XMVectorMultiply(Q1, Rsq);
  790. Q1 = XMVectorSelect(Sqrt, Q1, VEqualsNaN);
  791. Q1 = XMVectorMultiply(Q1, g_XMOneHalf.v);
  792. SQ1 = XMVectorMultiply(Rsq, g_XMOneHalf.v);
  793. CQ0 = XMVectorPermute(Q0, Q0, Permute0X0X0Y0W.v);
  794. CQ1 = XMVectorPermute(Q0, g_XMEpsilon.v, Permute0Y0Z0Z1W.v);
  795. C = XMVectorGreaterOrEqual(CQ0, CQ1);
  796. CX = XMVectorSplatX(C);
  797. CY = XMVectorSplatY(C);
  798. CZ = XMVectorSplatZ(C);
  799. CW = XMVectorSplatW(C);
  800. PermuteSplat = XMVectorSelect(SplatZ.v, SplatY.v, CZ);
  801. SignB = XMVectorSelect(SignNPPP.v, SignPPNP.v, CZ);
  802. PermuteControl = XMVectorSelect(Permute2.v, Permute1.v, CZ);
  803. PermuteSplat = XMVectorSelect(PermuteSplat, SplatZ.v, CX);
  804. SignB = XMVectorSelect(SignB, SignNPPP.v, CX);
  805. PermuteControl = XMVectorSelect(PermuteControl, Permute2.v, CX);
  806. PermuteSplatT = XMVectorSelect(PermuteSplat,SplatX.v, CY);
  807. SignBT = XMVectorSelect(SignB, SignPNPP.v, CY);
  808. PermuteControlT = XMVectorSelect(PermuteControl,Permute0.v, CY);
  809. PermuteSplat = XMVectorSelect(PermuteSplat, PermuteSplatT, CX);
  810. SignB = XMVectorSelect(SignB, SignBT, CX);
  811. PermuteControl = XMVectorSelect(PermuteControl, PermuteControlT, CX);
  812. PermuteSplat = XMVectorSelect(PermuteSplat,SplatW.v, CW);
  813. SignB = XMVectorSelect(SignB, g_XMNegativeOne.v, CW);
  814. PermuteControl = XMVectorSelect(PermuteControl,Permute3.v, CW);
  815. Scale = XMVectorPermute(SQ1, SQ1, PermuteSplat);
  816. P = XMVectorPermute(M.r[1], M.r[2],PermuteC.v); // {M10, M12, M20, M21}
  817. A = XMVectorPermute(M.r[0], P, PermuteA.v); // {M01, M12, M20, M03}
  818. B = XMVectorPermute(M.r[0], P, PermuteB.v); // {M10, M21, M02, M03}
  819. Q2 = XMVectorMultiplyAdd(SignB, B, A);
  820. Q2 = XMVectorMultiply(Q2, Scale);
  821. Result = XMVectorPermute(Q1, Q2, PermuteControl);
  822. return Result;
  823. #else // _XM_VMX128_INTRINSICS_
  824. #endif // _XM_VMX128_INTRINSICS_
  825. }
  826. //------------------------------------------------------------------------------
  827. // Conversion operations
  828. //------------------------------------------------------------------------------
  829. //------------------------------------------------------------------------------
  830. XMFINLINE VOID XMQuaternionToAxisAngle
  831. (
  832. XMVECTOR* pAxis,
  833. FLOAT* pAngle,
  834. FXMVECTOR Q
  835. )
  836. {
  837. XMASSERT(pAxis);
  838. XMASSERT(pAngle);
  839. *pAxis = Q;
  840. #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
  841. *pAngle = 2.0f * acosf(XMVectorGetW(Q));
  842. #else
  843. *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
  844. #endif
  845. }
  846. /****************************************************************************
  847. *
  848. * Plane
  849. *
  850. ****************************************************************************/
  851. //------------------------------------------------------------------------------
  852. // Comparison operations
  853. //------------------------------------------------------------------------------
  854. //------------------------------------------------------------------------------
  855. XMFINLINE BOOL XMPlaneEqual
  856. (
  857. FXMVECTOR P1,
  858. FXMVECTOR P2
  859. )
  860. {
  861. return XMVector4Equal(P1, P2);
  862. }
  863. //------------------------------------------------------------------------------
  864. XMFINLINE BOOL XMPlaneNearEqual
  865. (
  866. FXMVECTOR P1,
  867. FXMVECTOR P2,
  868. FXMVECTOR Epsilon
  869. )
  870. {
  871. XMVECTOR NP1 = XMPlaneNormalize(P1);
  872. XMVECTOR NP2 = XMPlaneNormalize(P2);
  873. return XMVector4NearEqual(NP1, NP2, Epsilon);
  874. }
  875. //------------------------------------------------------------------------------
  876. XMFINLINE BOOL XMPlaneNotEqual
  877. (
  878. FXMVECTOR P1,
  879. FXMVECTOR P2
  880. )
  881. {
  882. return XMVector4NotEqual(P1, P2);
  883. }
  884. //------------------------------------------------------------------------------
  885. XMFINLINE BOOL XMPlaneIsNaN
  886. (
  887. FXMVECTOR P
  888. )
  889. {
  890. return XMVector4IsNaN(P);
  891. }
  892. //------------------------------------------------------------------------------
  893. XMFINLINE BOOL XMPlaneIsInfinite
  894. (
  895. FXMVECTOR P
  896. )
  897. {
  898. return XMVector4IsInfinite(P);
  899. }
  900. //------------------------------------------------------------------------------
  901. // Computation operations
  902. //------------------------------------------------------------------------------
  903. //------------------------------------------------------------------------------
  904. XMFINLINE XMVECTOR XMPlaneDot
  905. (
  906. FXMVECTOR P,
  907. FXMVECTOR V
  908. )
  909. {
  910. #if defined(_XM_NO_INTRINSICS_)
  911. return XMVector4Dot(P, V);
  912. #elif defined(_XM_SSE_INTRINSICS_)
  913. __m128 vTemp2 = V;
  914. __m128 vTemp = _mm_mul_ps(P,vTemp2);
  915. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  916. vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
  917. vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  918. vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
  919. return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  920. #else // _XM_VMX128_INTRINSICS_
  921. #endif // _XM_VMX128_INTRINSICS_
  922. }
  923. //------------------------------------------------------------------------------
  924. XMFINLINE XMVECTOR XMPlaneDotCoord
  925. (
  926. FXMVECTOR P,
  927. FXMVECTOR V
  928. )
  929. {
  930. #if defined(_XM_NO_INTRINSICS_)
  931. XMVECTOR V3;
  932. XMVECTOR Result;
  933. // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
  934. V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
  935. Result = XMVector4Dot(P, V3);
  936. return Result;
  937. #elif defined(_XM_SSE_INTRINSICS_)
  938. XMVECTOR vTemp2 = _mm_and_ps(V,g_XMMask3);
  939. vTemp2 = _mm_or_ps(vTemp2,g_XMIdentityR3);
  940. XMVECTOR vTemp = _mm_mul_ps(P,vTemp2);
  941. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  942. vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
  943. vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  944. vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
  945. return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  946. #else // _XM_VMX128_INTRINSICS_
  947. #endif // _XM_VMX128_INTRINSICS_
  948. }
  949. //------------------------------------------------------------------------------
  950. XMFINLINE XMVECTOR XMPlaneDotNormal
  951. (
  952. FXMVECTOR P,
  953. FXMVECTOR V
  954. )
  955. {
  956. return XMVector3Dot(P, V);
  957. }
  958. //------------------------------------------------------------------------------
  959. // XMPlaneNormalizeEst uses a reciprocal estimate and
  960. // returns QNaN on zero and infinite vectors.
  961. XMFINLINE XMVECTOR XMPlaneNormalizeEst
  962. (
  963. FXMVECTOR P
  964. )
  965. {
  966. #if defined(_XM_NO_INTRINSICS_)
  967. XMVECTOR Result;
  968. Result = XMVector3ReciprocalLength(P);
  969. Result = XMVectorMultiply(P, Result);
  970. return Result;
  971. #elif defined(_XM_SSE_INTRINSICS_)
  972. // Perform the dot product
  973. XMVECTOR vDot = _mm_mul_ps(P,P);
  974. // x=Dot.y, y=Dot.z
  975. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  976. // Result.x = x+y
  977. vDot = _mm_add_ss(vDot,vTemp);
  978. // x=Dot.z
  979. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  980. // Result.x = (x+y)+z
  981. vDot = _mm_add_ss(vDot,vTemp);
  982. // Splat x
  983. vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  984. // Get the reciprocal
  985. vDot = _mm_rsqrt_ps(vDot);
  986. // Get the reciprocal
  987. vDot = _mm_mul_ps(vDot,P);
  988. return vDot;
  989. #else // _XM_VMX128_INTRINSICS_
  990. #endif // _XM_VMX128_INTRINSICS_
  991. }
  992. //------------------------------------------------------------------------------
  993. XMFINLINE XMVECTOR XMPlaneNormalize
  994. (
  995. FXMVECTOR P
  996. )
  997. {
  998. #if defined(_XM_NO_INTRINSICS_)
  999. FLOAT fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
  1000. // Prevent divide by zero
  1001. if (fLengthSq) {
  1002. fLengthSq = 1.0f/fLengthSq;
  1003. }
  1004. {
  1005. XMVECTOR vResult = {
  1006. P.vector4_f32[0]*fLengthSq,
  1007. P.vector4_f32[1]*fLengthSq,
  1008. P.vector4_f32[2]*fLengthSq,
  1009. P.vector4_f32[3]*fLengthSq
  1010. };
  1011. return vResult;
  1012. }
  1013. #elif defined(_XM_SSE_INTRINSICS_)
  1014. // Perform the dot product on x,y and z only
  1015. XMVECTOR vLengthSq = _mm_mul_ps(P,P);
  1016. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
  1017. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  1018. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  1019. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  1020. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  1021. // Prepare for the division
  1022. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  1023. // Failsafe on zero (Or epsilon) length planes
  1024. // If the length is infinity, set the elements to zero
  1025. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  1026. // Reciprocal mul to perform the normalization
  1027. vResult = _mm_div_ps(P,vResult);
  1028. // Any that are infinity, set to zero
  1029. vResult = _mm_and_ps(vResult,vLengthSq);
  1030. return vResult;
  1031. #else // _XM_VMX128_INTRINSICS_
  1032. #endif // _XM_VMX128_INTRINSICS_
  1033. }
  1034. //------------------------------------------------------------------------------
  1035. XMFINLINE XMVECTOR XMPlaneIntersectLine
  1036. (
  1037. FXMVECTOR P,
  1038. FXMVECTOR LinePoint1,
  1039. FXMVECTOR LinePoint2
  1040. )
  1041. {
  1042. #if defined(_XM_NO_INTRINSICS_)
  1043. XMVECTOR V1;
  1044. XMVECTOR V2;
  1045. XMVECTOR D;
  1046. XMVECTOR ReciprocalD;
  1047. XMVECTOR VT;
  1048. XMVECTOR Point;
  1049. XMVECTOR Zero;
  1050. XMVECTOR Control;
  1051. XMVECTOR Result;
  1052. V1 = XMVector3Dot(P, LinePoint1);
  1053. V2 = XMVector3Dot(P, LinePoint2);
  1054. D = XMVectorSubtract(V1, V2);
  1055. ReciprocalD = XMVectorReciprocal(D);
  1056. VT = XMPlaneDotCoord(P, LinePoint1);
  1057. VT = XMVectorMultiply(VT, ReciprocalD);
  1058. Point = XMVectorSubtract(LinePoint2, LinePoint1);
  1059. Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
  1060. Zero = XMVectorZero();
  1061. Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
  1062. Result = XMVectorSelect(Point, g_XMQNaN.v, Control);
  1063. return Result;
  1064. #elif defined(_XM_SSE_INTRINSICS_)
  1065. XMVECTOR V1;
  1066. XMVECTOR V2;
  1067. XMVECTOR D;
  1068. XMVECTOR VT;
  1069. XMVECTOR Point;
  1070. XMVECTOR Zero;
  1071. XMVECTOR Control;
  1072. XMVECTOR Result;
  1073. V1 = XMVector3Dot(P, LinePoint1);
  1074. V2 = XMVector3Dot(P, LinePoint2);
  1075. D = _mm_sub_ps(V1, V2);
  1076. VT = XMPlaneDotCoord(P, LinePoint1);
  1077. VT = _mm_div_ps(VT, D);
  1078. Point = _mm_sub_ps(LinePoint2, LinePoint1);
  1079. Point = _mm_mul_ps(Point,VT);
  1080. Point = _mm_add_ps(Point,LinePoint1);
  1081. Zero = XMVectorZero();
  1082. Control = XMVectorNearEqual(D, Zero, g_XMEpsilon);
  1083. Result = XMVectorSelect(Point, g_XMQNaN, Control);
  1084. return Result;
  1085. #else // _XM_VMX128_INTRINSICS_
  1086. #endif // _XM_VMX128_INTRINSICS_
  1087. }
  1088. //------------------------------------------------------------------------------
  1089. XMINLINE VOID XMPlaneIntersectPlane
  1090. (
  1091. XMVECTOR* pLinePoint1,
  1092. XMVECTOR* pLinePoint2,
  1093. FXMVECTOR P1,
  1094. FXMVECTOR P2
  1095. )
  1096. {
  1097. #if defined(_XM_NO_INTRINSICS_)
  1098. XMVECTOR V1;
  1099. XMVECTOR V2;
  1100. XMVECTOR V3;
  1101. XMVECTOR LengthSq;
  1102. XMVECTOR RcpLengthSq;
  1103. XMVECTOR Point;
  1104. XMVECTOR P1W;
  1105. XMVECTOR P2W;
  1106. XMVECTOR Control;
  1107. XMVECTOR LinePoint1;
  1108. XMVECTOR LinePoint2;
  1109. XMASSERT(pLinePoint1);
  1110. XMASSERT(pLinePoint2);
  1111. V1 = XMVector3Cross(P2, P1);
  1112. LengthSq = XMVector3LengthSq(V1);
  1113. V2 = XMVector3Cross(P2, V1);
  1114. P1W = XMVectorSplatW(P1);
  1115. Point = XMVectorMultiply(V2, P1W);
  1116. V3 = XMVector3Cross(V1, P1);
  1117. P2W = XMVectorSplatW(P2);
  1118. Point = XMVectorMultiplyAdd(V3, P2W, Point);
  1119. RcpLengthSq = XMVectorReciprocal(LengthSq);
  1120. LinePoint1 = XMVectorMultiply(Point, RcpLengthSq);
  1121. LinePoint2 = XMVectorAdd(LinePoint1, V1);
  1122. Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
  1123. *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
  1124. *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
  1125. #elif defined(_XM_SSE_INTRINSICS_)
  1126. XMASSERT(pLinePoint1);
  1127. XMASSERT(pLinePoint2);
  1128. XMVECTOR V1;
  1129. XMVECTOR V2;
  1130. XMVECTOR V3;
  1131. XMVECTOR LengthSq;
  1132. XMVECTOR Point;
  1133. XMVECTOR P1W;
  1134. XMVECTOR P2W;
  1135. XMVECTOR Control;
  1136. XMVECTOR LinePoint1;
  1137. XMVECTOR LinePoint2;
  1138. V1 = XMVector3Cross(P2, P1);
  1139. LengthSq = XMVector3LengthSq(V1);
  1140. V2 = XMVector3Cross(P2, V1);
  1141. P1W = _mm_shuffle_ps(P1,P1,_MM_SHUFFLE(3,3,3,3));
  1142. Point = _mm_mul_ps(V2, P1W);
  1143. V3 = XMVector3Cross(V1, P1);
  1144. P2W = _mm_shuffle_ps(P2,P2,_MM_SHUFFLE(3,3,3,3));
  1145. V3 = _mm_mul_ps(V3,P2W);
  1146. Point = _mm_add_ps(Point,V3);
  1147. LinePoint1 = _mm_div_ps(Point,LengthSq);
  1148. LinePoint2 = _mm_add_ps(LinePoint1, V1);
  1149. Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon);
  1150. *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN, Control);
  1151. *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN, Control);
  1152. #else // _XM_VMX128_INTRINSICS_
  1153. #endif // _XM_VMX128_INTRINSICS_
  1154. }
  1155. //------------------------------------------------------------------------------
  1156. XMFINLINE XMVECTOR XMPlaneTransform
  1157. (
  1158. FXMVECTOR P,
  1159. CXMMATRIX M
  1160. )
  1161. {
  1162. #if defined(_XM_NO_INTRINSICS_)
  1163. XMVECTOR X;
  1164. XMVECTOR Y;
  1165. XMVECTOR Z;
  1166. XMVECTOR W;
  1167. XMVECTOR Result;
  1168. W = XMVectorSplatW(P);
  1169. Z = XMVectorSplatZ(P);
  1170. Y = XMVectorSplatY(P);
  1171. X = XMVectorSplatX(P);
  1172. Result = XMVectorMultiply(W, M.r[3]);
  1173. Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
  1174. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  1175. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  1176. return Result;
  1177. #elif defined(_XM_SSE_INTRINSICS_)
  1178. XMVECTOR X = _mm_shuffle_ps(P,P,_MM_SHUFFLE(0,0,0,0));
  1179. XMVECTOR Y = _mm_shuffle_ps(P,P,_MM_SHUFFLE(1,1,1,1));
  1180. XMVECTOR Z = _mm_shuffle_ps(P,P,_MM_SHUFFLE(2,2,2,2));
  1181. XMVECTOR W = _mm_shuffle_ps(P,P,_MM_SHUFFLE(3,3,3,3));
  1182. X = _mm_mul_ps(X, M.r[0]);
  1183. Y = _mm_mul_ps(Y, M.r[1]);
  1184. Z = _mm_mul_ps(Z, M.r[2]);
  1185. W = _mm_mul_ps(W, M.r[3]);
  1186. X = _mm_add_ps(X,Z);
  1187. Y = _mm_add_ps(Y,W);
  1188. X = _mm_add_ps(X,Y);
  1189. return X;
  1190. #else // _XM_VMX128_INTRINSICS_
  1191. #endif // _XM_VMX128_INTRINSICS_
  1192. }
  1193. //------------------------------------------------------------------------------
  1194. XMFINLINE XMFLOAT4* XMPlaneTransformStream
  1195. (
  1196. XMFLOAT4* pOutputStream,
  1197. UINT OutputStride,
  1198. CONST XMFLOAT4* pInputStream,
  1199. UINT InputStride,
  1200. UINT PlaneCount,
  1201. CXMMATRIX M
  1202. )
  1203. {
  1204. return XMVector4TransformStream(pOutputStream,
  1205. OutputStride,
  1206. pInputStream,
  1207. InputStride,
  1208. PlaneCount,
  1209. M);
  1210. }
  1211. //------------------------------------------------------------------------------
  1212. // Conversion operations
  1213. //------------------------------------------------------------------------------
  1214. //------------------------------------------------------------------------------
  1215. XMFINLINE XMVECTOR XMPlaneFromPointNormal
  1216. (
  1217. FXMVECTOR Point,
  1218. FXMVECTOR Normal
  1219. )
  1220. {
  1221. #if defined(_XM_NO_INTRINSICS_)
  1222. XMVECTOR W;
  1223. XMVECTOR Result;
  1224. W = XMVector3Dot(Point, Normal);
  1225. W = XMVectorNegate(W);
  1226. Result = XMVectorSelect(W, Normal, g_XMSelect1110.v);
  1227. return Result;
  1228. #elif defined(_XM_SSE_INTRINSICS_)
  1229. XMVECTOR W;
  1230. XMVECTOR Result;
  1231. W = XMVector3Dot(Point,Normal);
  1232. W = _mm_mul_ps(W,g_XMNegativeOne);
  1233. Result = _mm_and_ps(Normal,g_XMMask3);
  1234. W = _mm_and_ps(W,g_XMMaskW);
  1235. Result = _mm_or_ps(Result,W);
  1236. return Result;
  1237. #else // _XM_VMX128_INTRINSICS_
  1238. #endif // _XM_VMX128_INTRINSICS_
  1239. }
  1240. //------------------------------------------------------------------------------
  1241. XMFINLINE XMVECTOR XMPlaneFromPoints
  1242. (
  1243. FXMVECTOR Point1,
  1244. FXMVECTOR Point2,
  1245. FXMVECTOR Point3
  1246. )
  1247. {
  1248. #if defined(_XM_NO_INTRINSICS_)
  1249. XMVECTOR N;
  1250. XMVECTOR D;
  1251. XMVECTOR V21;
  1252. XMVECTOR V31;
  1253. XMVECTOR Result;
  1254. V21 = XMVectorSubtract(Point1, Point2);
  1255. V31 = XMVectorSubtract(Point1, Point3);
  1256. N = XMVector3Cross(V21, V31);
  1257. N = XMVector3Normalize(N);
  1258. D = XMPlaneDotNormal(N, Point1);
  1259. D = XMVectorNegate(D);
  1260. Result = XMVectorSelect(D, N, g_XMSelect1110.v);
  1261. return Result;
  1262. #elif defined(_XM_SSE_INTRINSICS_)
  1263. XMVECTOR N;
  1264. XMVECTOR D;
  1265. XMVECTOR V21;
  1266. XMVECTOR V31;
  1267. XMVECTOR Result;
  1268. V21 = _mm_sub_ps(Point1, Point2);
  1269. V31 = _mm_sub_ps(Point1, Point3);
  1270. N = XMVector3Cross(V21, V31);
  1271. N = XMVector3Normalize(N);
  1272. D = XMPlaneDotNormal(N, Point1);
  1273. D = _mm_mul_ps(D,g_XMNegativeOne);
  1274. N = _mm_and_ps(N,g_XMMask3);
  1275. D = _mm_and_ps(D,g_XMMaskW);
  1276. Result = _mm_or_ps(D,N);
  1277. return Result;
  1278. #else // _XM_VMX128_INTRINSICS_
  1279. #endif // _XM_VMX128_INTRINSICS_
  1280. }
  1281. /****************************************************************************
  1282. *
  1283. * Color
  1284. *
  1285. ****************************************************************************/
  1286. //------------------------------------------------------------------------------
  1287. // Comparison operations
  1288. //------------------------------------------------------------------------------
  1289. //------------------------------------------------------------------------------
  1290. XMFINLINE BOOL XMColorEqual
  1291. (
  1292. FXMVECTOR C1,
  1293. FXMVECTOR C2
  1294. )
  1295. {
  1296. return XMVector4Equal(C1, C2);
  1297. }
  1298. //------------------------------------------------------------------------------
  1299. XMFINLINE BOOL XMColorNotEqual
  1300. (
  1301. FXMVECTOR C1,
  1302. FXMVECTOR C2
  1303. )
  1304. {
  1305. return XMVector4NotEqual(C1, C2);
  1306. }
  1307. //------------------------------------------------------------------------------
  1308. XMFINLINE BOOL XMColorGreater
  1309. (
  1310. FXMVECTOR C1,
  1311. FXMVECTOR C2
  1312. )
  1313. {
  1314. return XMVector4Greater(C1, C2);
  1315. }
  1316. //------------------------------------------------------------------------------
  1317. XMFINLINE BOOL XMColorGreaterOrEqual
  1318. (
  1319. FXMVECTOR C1,
  1320. FXMVECTOR C2
  1321. )
  1322. {
  1323. return XMVector4GreaterOrEqual(C1, C2);
  1324. }
  1325. //------------------------------------------------------------------------------
  1326. XMFINLINE BOOL XMColorLess
  1327. (
  1328. FXMVECTOR C1,
  1329. FXMVECTOR C2
  1330. )
  1331. {
  1332. return XMVector4Less(C1, C2);
  1333. }
  1334. //------------------------------------------------------------------------------
  1335. XMFINLINE BOOL XMColorLessOrEqual
  1336. (
  1337. FXMVECTOR C1,
  1338. FXMVECTOR C2
  1339. )
  1340. {
  1341. return XMVector4LessOrEqual(C1, C2);
  1342. }
  1343. //------------------------------------------------------------------------------
  1344. XMFINLINE BOOL XMColorIsNaN
  1345. (
  1346. FXMVECTOR C
  1347. )
  1348. {
  1349. return XMVector4IsNaN(C);
  1350. }
  1351. //------------------------------------------------------------------------------
  1352. XMFINLINE BOOL XMColorIsInfinite
  1353. (
  1354. FXMVECTOR C
  1355. )
  1356. {
  1357. return XMVector4IsInfinite(C);
  1358. }
  1359. //------------------------------------------------------------------------------
  1360. // Computation operations
  1361. //------------------------------------------------------------------------------
  1362. //------------------------------------------------------------------------------
  1363. XMFINLINE XMVECTOR XMColorNegative
  1364. (
  1365. FXMVECTOR vColor
  1366. )
  1367. {
  1368. #if defined(_XM_NO_INTRINSICS_)
  1369. // XMASSERT(XMVector4GreaterOrEqual(C, XMVectorReplicate(0.0f)));
  1370. // XMASSERT(XMVector4LessOrEqual(C, XMVectorReplicate(1.0f)));
  1371. XMVECTOR vResult = {
  1372. 1.0f - vColor.vector4_f32[0],
  1373. 1.0f - vColor.vector4_f32[1],
  1374. 1.0f - vColor.vector4_f32[2],
  1375. vColor.vector4_f32[3]
  1376. };
  1377. return vResult;
  1378. #elif defined(_XM_SSE_INTRINSICS_)
  1379. // Negate only x,y and z.
  1380. XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
  1381. // Add 1,1,1,0 to -x,-y,-z,w
  1382. return _mm_add_ps(vTemp,g_XMOne3);
  1383. #else // _XM_VMX128_INTRINSICS_
  1384. #endif // _XM_VMX128_INTRINSICS_
  1385. }
  1386. //------------------------------------------------------------------------------
  1387. XMFINLINE XMVECTOR XMColorModulate
  1388. (
  1389. FXMVECTOR C1,
  1390. FXMVECTOR C2
  1391. )
  1392. {
  1393. return XMVectorMultiply(C1, C2);
  1394. }
  1395. //------------------------------------------------------------------------------
  1396. XMFINLINE XMVECTOR XMColorAdjustSaturation
  1397. (
  1398. FXMVECTOR vColor,
  1399. FLOAT fSaturation
  1400. )
  1401. {
  1402. #if defined(_XM_NO_INTRINSICS_)
  1403. CONST XMVECTOR gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
  1404. // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
  1405. // Result = (C - Luminance) * Saturation + Luminance;
  1406. FLOAT fLuminance = (vColor.vector4_f32[0]*gvLuminance.vector4_f32[0])+(vColor.vector4_f32[1]*gvLuminance.vector4_f32[1])+(vColor.vector4_f32[2]*gvLuminance.vector4_f32[2]);
  1407. XMVECTOR vResult = {
  1408. ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
  1409. ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
  1410. ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
  1411. vColor.vector4_f32[3]};
  1412. return vResult;
  1413. #elif defined(_XM_SSE_INTRINSICS_)
  1414. static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
  1415. // Mul RGB by intensity constants
  1416. XMVECTOR vLuminance = _mm_mul_ps(vColor,gvLuminance);
  1417. // vResult.x = vLuminance.y, vResult.y = vLuminance.y,
  1418. // vResult.z = vLuminance.z, vResult.w = vLuminance.z
  1419. XMVECTOR vResult = vLuminance;
  1420. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,1,1));
  1421. // vLuminance.x += vLuminance.y
  1422. vLuminance = _mm_add_ss(vLuminance,vResult);
  1423. // Splat vLuminance.z
  1424. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(2,2,2,2));
  1425. // vLuminance.x += vLuminance.z (Dot product)
  1426. vLuminance = _mm_add_ss(vLuminance,vResult);
  1427. // Splat vLuminance
  1428. vLuminance = _mm_shuffle_ps(vLuminance,vLuminance,_MM_SHUFFLE(0,0,0,0));
  1429. // Splat fSaturation
  1430. XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
  1431. // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
  1432. vResult = _mm_sub_ps(vColor,vLuminance);
  1433. vResult = _mm_mul_ps(vResult,vSaturation);
  1434. vResult = _mm_add_ps(vResult,vLuminance);
  1435. // Retain w from the source color
  1436. vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
  1437. vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
  1438. return vResult;
  1439. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  1440. #endif // _XM_VMX128_INTRINSICS_
  1441. }
  1442. //------------------------------------------------------------------------------
  1443. XMFINLINE XMVECTOR XMColorAdjustContrast
  1444. (
  1445. FXMVECTOR vColor,
  1446. FLOAT fContrast
  1447. )
  1448. {
  1449. #if defined(_XM_NO_INTRINSICS_)
  1450. // Result = (vColor - 0.5f) * fContrast + 0.5f;
  1451. XMVECTOR vResult = {
  1452. ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
  1453. ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
  1454. ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
  1455. vColor.vector4_f32[3] // Leave W untouched
  1456. };
  1457. return vResult;
  1458. #elif defined(_XM_SSE_INTRINSICS_)
  1459. XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
  1460. XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
  1461. vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
  1462. vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
  1463. // Retain w from the source color
  1464. vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
  1465. vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
  1466. return vResult;
  1467. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  1468. #endif // _XM_VMX128_INTRINSICS_
  1469. }
  1470. /****************************************************************************
  1471. *
  1472. * Miscellaneous
  1473. *
  1474. ****************************************************************************/
  1475. //------------------------------------------------------------------------------
  1476. XMINLINE BOOL XMVerifyCPUSupport()
  1477. {
  1478. #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_SSE_INTRINSICS_)
  1479. return TRUE;
  1480. #else // _XM_SSE_INTRINSICS_
  1481. // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
  1482. // Detecting SSE2 on older versions of Windows would require using cpuid directly
  1483. return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) );
  1484. #endif
  1485. }
  1486. //------------------------------------------------------------------------------
  1487. #define XMASSERT_LINE_STRING_SIZE 16
  1488. XMINLINE VOID XMAssert
  1489. (
  1490. CONST CHAR* pExpression,
  1491. CONST CHAR* pFileName,
  1492. UINT LineNumber
  1493. )
  1494. {
  1495. CHAR aLineString[XMASSERT_LINE_STRING_SIZE];
  1496. CHAR* pLineString;
  1497. UINT Line;
  1498. aLineString[XMASSERT_LINE_STRING_SIZE - 2] = '0';
  1499. aLineString[XMASSERT_LINE_STRING_SIZE - 1] = '\0';
  1500. for (Line = LineNumber, pLineString = aLineString + XMASSERT_LINE_STRING_SIZE - 2;
  1501. Line != 0 && pLineString >= aLineString;
  1502. Line /= 10, pLineString--)
  1503. {
  1504. *pLineString = (CHAR)('0' + (Line % 10));
  1505. }
  1506. #ifndef NO_OUTPUT_DEBUG_STRING
  1507. OutputDebugStringA("Assertion failed: ");
  1508. OutputDebugStringA(pExpression);
  1509. OutputDebugStringA(", file ");
  1510. OutputDebugStringA(pFileName);
  1511. OutputDebugStringA(", line ");
  1512. OutputDebugStringA(pLineString + 1);
  1513. OutputDebugStringA("\r\n");
  1514. #else
  1515. DbgPrint("Assertion failed: %s, file %s, line %d\r\n", pExpression, pFileName, LineNumber);
  1516. #endif
  1517. __debugbreak();
  1518. }
  1519. //------------------------------------------------------------------------------
  1520. XMFINLINE XMVECTOR XMFresnelTerm
  1521. (
  1522. FXMVECTOR CosIncidentAngle,
  1523. FXMVECTOR RefractionIndex
  1524. )
  1525. {
  1526. #if defined(_XM_NO_INTRINSICS_)
  1527. XMVECTOR G;
  1528. XMVECTOR D, S;
  1529. XMVECTOR V0, V1, V2, V3;
  1530. XMVECTOR Result;
  1531. // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
  1532. // c = CosIncidentAngle
  1533. // g = sqrt(c^2 + RefractionIndex^2 - 1)
  1534. XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
  1535. G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
  1536. G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
  1537. G = XMVectorAbs(G);
  1538. G = XMVectorSqrt(G);
  1539. S = XMVectorAdd(G, CosIncidentAngle);
  1540. D = XMVectorSubtract(G, CosIncidentAngle);
  1541. V0 = XMVectorMultiply(D, D);
  1542. V1 = XMVectorMultiply(S, S);
  1543. V1 = XMVectorReciprocal(V1);
  1544. V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
  1545. V0 = XMVectorMultiply(V0, V1);
  1546. V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
  1547. V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
  1548. V2 = XMVectorMultiply(V2, V2);
  1549. V3 = XMVectorMultiply(V3, V3);
  1550. V3 = XMVectorReciprocal(V3);
  1551. V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
  1552. Result = XMVectorMultiply(V0, V2);
  1553. Result = XMVectorSaturate(Result);
  1554. return Result;
  1555. #elif defined(_XM_SSE_INTRINSICS_)
  1556. // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
  1557. // c = CosIncidentAngle
  1558. // g = sqrt(c^2 + RefractionIndex^2 - 1)
  1559. XMASSERT(!XMVector4IsInfinite(CosIncidentAngle));
  1560. // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
  1561. XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
  1562. XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
  1563. G = _mm_sub_ps(G,g_XMOne);
  1564. vTemp = _mm_add_ps(vTemp,G);
  1565. // max((0-vTemp),vTemp) == abs(vTemp)
  1566. // The abs is needed to deal with refraction and cosine being zero
  1567. G = _mm_setzero_ps();
  1568. G = _mm_sub_ps(G,vTemp);
  1569. G = _mm_max_ps(G,vTemp);
  1570. // Last operation, the sqrt()
  1571. G = _mm_sqrt_ps(G);
  1572. // Calc G-C and G+C
  1573. XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
  1574. XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
  1575. // Perform the term (0.5f *(g - c)^2) / (g + c)^2
  1576. XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
  1577. vTemp = _mm_mul_ps(GAddC,GAddC);
  1578. vResult = _mm_mul_ps(vResult,g_XMOneHalf);
  1579. vResult = _mm_div_ps(vResult,vTemp);
  1580. // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
  1581. GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
  1582. GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
  1583. GAddC = _mm_sub_ps(GAddC,g_XMOne);
  1584. GSubC = _mm_add_ps(GSubC,g_XMOne);
  1585. GAddC = _mm_mul_ps(GAddC,GAddC);
  1586. GSubC = _mm_mul_ps(GSubC,GSubC);
  1587. GAddC = _mm_div_ps(GAddC,GSubC);
  1588. GAddC = _mm_add_ps(GAddC,g_XMOne);
  1589. // Multiply the two term parts
  1590. vResult = _mm_mul_ps(vResult,GAddC);
  1591. // Clamp to 0.0 - 1.0f
  1592. vResult = _mm_max_ps(vResult,g_XMZero);
  1593. vResult = _mm_min_ps(vResult,g_XMOne);
  1594. return vResult;
  1595. #else // _XM_VMX128_INTRINSICS_
  1596. #endif // _XM_VMX128_INTRINSICS_
  1597. }
  1598. //------------------------------------------------------------------------------
  1599. XMFINLINE BOOL XMScalarNearEqual
  1600. (
  1601. FLOAT S1,
  1602. FLOAT S2,
  1603. FLOAT Epsilon
  1604. )
  1605. {
  1606. FLOAT Delta = S1 - S2;
  1607. #if defined(_XM_NO_INTRINSICS_)
  1608. UINT AbsDelta = *(UINT*)&Delta & 0x7FFFFFFF;
  1609. return (*(FLOAT*)&AbsDelta <= Epsilon);
  1610. #elif defined(_XM_SSE_INTRINSICS_)
  1611. return (fabsf(Delta) <= Epsilon);
  1612. #else
  1613. return (__fabs(Delta) <= Epsilon);
  1614. #endif
  1615. }
  1616. //------------------------------------------------------------------------------
  1617. // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
  1618. XMFINLINE FLOAT XMScalarModAngle
  1619. (
  1620. FLOAT Angle
  1621. )
  1622. {
  1623. // Note: The modulo is performed with unsigned math only to work
  1624. // around a precision error on numbers that are close to PI
  1625. float fTemp;
  1626. #if defined(_XM_NO_INTRINSICS_) || !defined(_XM_VMX128_INTRINSICS_)
  1627. // Normalize the range from 0.0f to XM_2PI
  1628. Angle = Angle + XM_PI;
  1629. // Perform the modulo, unsigned
  1630. fTemp = fabsf(Angle);
  1631. fTemp = fTemp - (XM_2PI * (FLOAT)((INT)(fTemp/XM_2PI)));
  1632. // Restore the number to the range of -XM_PI to XM_PI-epsilon
  1633. fTemp = fTemp - XM_PI;
  1634. // If the modulo'd value was negative, restore negation
  1635. if (Angle<0.0f) {
  1636. fTemp = -fTemp;
  1637. }
  1638. return fTemp;
  1639. #else
  1640. #endif
  1641. }
  1642. //------------------------------------------------------------------------------
  1643. XMINLINE FLOAT XMScalarSin
  1644. (
  1645. FLOAT Value
  1646. )
  1647. {
  1648. #if defined(_XM_NO_INTRINSICS_)
  1649. FLOAT ValueMod;
  1650. FLOAT ValueSq;
  1651. XMVECTOR V0123, V0246, V1357, V9111315, V17192123;
  1652. XMVECTOR V1, V7, V8;
  1653. XMVECTOR R0, R1, R2;
  1654. ValueMod = XMScalarModAngle(Value);
  1655. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
  1656. // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  1657. ValueSq = ValueMod * ValueMod;
  1658. V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
  1659. V1 = XMVectorSplatY(V0123);
  1660. V0246 = XMVectorMultiply(V0123, V0123);
  1661. V1357 = XMVectorMultiply(V0246, V1);
  1662. V7 = XMVectorSplatW(V1357);
  1663. V8 = XMVectorMultiply(V7, V1);
  1664. V9111315 = XMVectorMultiply(V1357, V8);
  1665. V17192123 = XMVectorMultiply(V9111315, V8);
  1666. R0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
  1667. R1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
  1668. R2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
  1669. return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
  1670. #elif defined(_XM_SSE_INTRINSICS_)
  1671. return sinf( Value );
  1672. #else // _XM_VMX128_INTRINSICS_
  1673. #endif // _XM_VMX128_INTRINSICS_
  1674. }
  1675. //------------------------------------------------------------------------------
  1676. XMINLINE FLOAT XMScalarCos
  1677. (
  1678. FLOAT Value
  1679. )
  1680. {
  1681. #if defined(_XM_NO_INTRINSICS_)
  1682. FLOAT ValueMod;
  1683. FLOAT ValueSq;
  1684. XMVECTOR V0123, V0246, V8101214, V16182022;
  1685. XMVECTOR V2, V6, V8;
  1686. XMVECTOR R0, R1, R2;
  1687. ValueMod = XMScalarModAngle(Value);
  1688. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
  1689. // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  1690. ValueSq = ValueMod * ValueMod;
  1691. V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
  1692. V0246 = XMVectorMultiply(V0123, V0123);
  1693. V2 = XMVectorSplatZ(V0123);
  1694. V6 = XMVectorSplatW(V0246);
  1695. V8 = XMVectorMultiply(V6, V2);
  1696. V8101214 = XMVectorMultiply(V0246, V8);
  1697. V16182022 = XMVectorMultiply(V8101214, V8);
  1698. R0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
  1699. R1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
  1700. R2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
  1701. return R0.vector4_f32[0] + R1.vector4_f32[0] + R2.vector4_f32[0];
  1702. #elif defined(_XM_SSE_INTRINSICS_)
  1703. return cosf(Value);
  1704. #else // _XM_VMX128_INTRINSICS_
  1705. #endif // _XM_VMX128_INTRINSICS_
  1706. }
  1707. //------------------------------------------------------------------------------
  1708. XMINLINE VOID XMScalarSinCos
  1709. (
  1710. FLOAT* pSin,
  1711. FLOAT* pCos,
  1712. FLOAT Value
  1713. )
  1714. {
  1715. #if defined(_XM_NO_INTRINSICS_)
  1716. FLOAT ValueMod;
  1717. FLOAT ValueSq;
  1718. XMVECTOR V0123, V0246, V1357, V8101214, V9111315, V16182022, V17192123;
  1719. XMVECTOR V1, V2, V6, V8;
  1720. XMVECTOR S0, S1, S2, C0, C1, C2;
  1721. XMASSERT(pSin);
  1722. XMASSERT(pCos);
  1723. ValueMod = XMScalarModAngle(Value);
  1724. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! - V^15 / 15! +
  1725. // V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  1726. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! +
  1727. // V^12 / 12! - V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  1728. ValueSq = ValueMod * ValueMod;
  1729. V0123 = XMVectorSet(1.0f, ValueMod, ValueSq, ValueSq * ValueMod);
  1730. V1 = XMVectorSplatY(V0123);
  1731. V2 = XMVectorSplatZ(V0123);
  1732. V0246 = XMVectorMultiply(V0123, V0123);
  1733. V1357 = XMVectorMultiply(V0246, V1);
  1734. V6 = XMVectorSplatW(V0246);
  1735. V8 = XMVectorMultiply(V6, V2);
  1736. V8101214 = XMVectorMultiply(V0246, V8);
  1737. V9111315 = XMVectorMultiply(V1357, V8);
  1738. V16182022 = XMVectorMultiply(V8101214, V8);
  1739. V17192123 = XMVectorMultiply(V9111315, V8);
  1740. C0 = XMVector4Dot(V0246, g_XMCosCoefficients0.v);
  1741. S0 = XMVector4Dot(V1357, g_XMSinCoefficients0.v);
  1742. C1 = XMVector4Dot(V8101214, g_XMCosCoefficients1.v);
  1743. S1 = XMVector4Dot(V9111315, g_XMSinCoefficients1.v);
  1744. C2 = XMVector4Dot(V16182022, g_XMCosCoefficients2.v);
  1745. S2 = XMVector4Dot(V17192123, g_XMSinCoefficients2.v);
  1746. *pCos = C0.vector4_f32[0] + C1.vector4_f32[0] + C2.vector4_f32[0];
  1747. *pSin = S0.vector4_f32[0] + S1.vector4_f32[0] + S2.vector4_f32[0];
  1748. #elif defined(_XM_SSE_INTRINSICS_)
  1749. XMASSERT(pSin);
  1750. XMASSERT(pCos);
  1751. *pSin = sinf(Value);
  1752. *pCos = cosf(Value);
  1753. #else // _XM_VMX128_INTRINSICS_
  1754. #endif // _XM_VMX128_INTRINSICS_
  1755. }
  1756. //------------------------------------------------------------------------------
  1757. XMINLINE FLOAT XMScalarASin
  1758. (
  1759. FLOAT Value
  1760. )
  1761. {
  1762. #if defined(_XM_NO_INTRINSICS_)
  1763. FLOAT AbsValue, Value2, Value3, D;
  1764. XMVECTOR AbsV, R0, R1, Result;
  1765. XMVECTOR V3;
  1766. *(UINT*)&AbsValue = *(UINT*)&Value & 0x7FFFFFFF;
  1767. Value2 = Value * AbsValue;
  1768. Value3 = Value * Value2;
  1769. D = (Value - Value2) / sqrtf(1.00000011921f - AbsValue);
  1770. AbsV = XMVectorReplicate(AbsValue);
  1771. V3.vector4_f32[0] = Value3;
  1772. V3.vector4_f32[1] = 1.0f;
  1773. V3.vector4_f32[2] = Value3;
  1774. V3.vector4_f32[3] = 1.0f;
  1775. R1 = XMVectorSet(D, D, Value, Value);
  1776. R1 = XMVectorMultiply(R1, V3);
  1777. R0 = XMVectorMultiplyAdd(AbsV, g_XMASinCoefficients0.v, g_XMASinCoefficients1.v);
  1778. R0 = XMVectorMultiplyAdd(AbsV, R0, g_XMASinCoefficients2.v);
  1779. Result = XMVector4Dot(R0, R1);
  1780. return Result.vector4_f32[0];
  1781. #elif defined(_XM_SSE_INTRINSICS_)
  1782. return asinf(Value);
  1783. #else // _XM_VMX128_INTRINSICS_
  1784. #endif // _XM_VMX128_INTRINSICS_
  1785. }
  1786. //------------------------------------------------------------------------------
  1787. XMINLINE FLOAT XMScalarACos
  1788. (
  1789. FLOAT Value
  1790. )
  1791. {
  1792. #if defined(_XM_NO_INTRINSICS_)
  1793. return XM_PIDIV2 - XMScalarASin(Value);
  1794. #elif defined(_XM_SSE_INTRINSICS_)
  1795. return acosf(Value);
  1796. #else // _XM_VMX128_INTRINSICS_
  1797. #endif // _XM_VMX128_INTRINSICS_
  1798. }
  1799. //------------------------------------------------------------------------------
  1800. XMFINLINE FLOAT XMScalarSinEst
  1801. (
  1802. FLOAT Value
  1803. )
  1804. {
  1805. #if defined(_XM_NO_INTRINSICS_)
  1806. FLOAT ValueSq;
  1807. XMVECTOR V;
  1808. XMVECTOR Y;
  1809. XMVECTOR Result;
  1810. XMASSERT(Value >= -XM_PI);
  1811. XMASSERT(Value < XM_PI);
  1812. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  1813. ValueSq = Value * Value;
  1814. V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
  1815. Y = XMVectorSplatY(V);
  1816. V = XMVectorMultiply(V, V);
  1817. V = XMVectorMultiply(V, Y);
  1818. Result = XMVector4Dot(V, g_XMSinEstCoefficients.v);
  1819. return Result.vector4_f32[0];
  1820. #elif defined(_XM_SSE_INTRINSICS_)
  1821. XMASSERT(Value >= -XM_PI);
  1822. XMASSERT(Value < XM_PI);
  1823. float ValueSq = Value*Value;
  1824. XMVECTOR vValue = _mm_set_ps1(Value);
  1825. XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
  1826. vTemp = _mm_mul_ps(vTemp,vTemp);
  1827. vTemp = _mm_mul_ps(vTemp,vValue);
  1828. // vTemp = Value,Value^3,Value^5,Value^7
  1829. vTemp = _mm_mul_ps(vTemp,g_XMSinEstCoefficients);
  1830. vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  1831. vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
  1832. vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  1833. vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
  1834. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  1835. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  1836. return _mm_cvtss_f32(vTemp);
  1837. #else
  1838. return vTemp.m128_f32[0];
  1839. #endif
  1840. #else // _XM_VMX128_INTRINSICS_
  1841. #endif // _XM_VMX128_INTRINSICS_
  1842. }
  1843. //------------------------------------------------------------------------------
  1844. XMFINLINE FLOAT XMScalarCosEst
  1845. (
  1846. FLOAT Value
  1847. )
  1848. {
  1849. #if defined(_XM_NO_INTRINSICS_)
  1850. FLOAT ValueSq;
  1851. XMVECTOR V;
  1852. XMVECTOR Result;
  1853. XMASSERT(Value >= -XM_PI);
  1854. XMASSERT(Value < XM_PI);
  1855. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  1856. ValueSq = Value * Value;
  1857. V = XMVectorSet(1.0f, Value, ValueSq, ValueSq * Value);
  1858. V = XMVectorMultiply(V, V);
  1859. Result = XMVector4Dot(V, g_XMCosEstCoefficients.v);
  1860. return Result.vector4_f32[0];
  1861. #elif defined(_XM_SSE_INTRINSICS_)
  1862. XMASSERT(Value >= -XM_PI);
  1863. XMASSERT(Value < XM_PI);
  1864. float ValueSq = Value*Value;
  1865. XMVECTOR vValue = _mm_setzero_ps();
  1866. XMVECTOR vTemp = _mm_set_ps(ValueSq * Value,ValueSq,Value,1.0f);
  1867. vTemp = _mm_mul_ps(vTemp,vTemp);
  1868. // vTemp = 1.0f,Value^2,Value^4,Value^6
  1869. vTemp = _mm_mul_ps(vTemp,g_XMCosEstCoefficients);
  1870. vValue = _mm_shuffle_ps(vValue,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  1871. vValue = _mm_add_ps(vValue,vTemp); // Add Z = X+Z; W = Y+W;
  1872. vTemp = _mm_shuffle_ps(vTemp,vValue,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  1873. vTemp = _mm_add_ps(vTemp,vValue); // Add Z and W together
  1874. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  1875. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  1876. return _mm_cvtss_f32(vTemp);
  1877. #else
  1878. return vTemp.m128_f32[0];
  1879. #endif
  1880. #else // _XM_VMX128_INTRINSICS_
  1881. #endif // _XM_VMX128_INTRINSICS_
  1882. }
  1883. //------------------------------------------------------------------------------
  1884. XMFINLINE VOID XMScalarSinCosEst
  1885. (
  1886. FLOAT* pSin,
  1887. FLOAT* pCos,
  1888. FLOAT Value
  1889. )
  1890. {
  1891. #if defined(_XM_NO_INTRINSICS_)
  1892. FLOAT ValueSq;
  1893. XMVECTOR V, Sin, Cos;
  1894. XMVECTOR Y;
  1895. XMASSERT(pSin);
  1896. XMASSERT(pCos);
  1897. XMASSERT(Value >= -XM_PI);
  1898. XMASSERT(Value < XM_PI);
  1899. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  1900. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  1901. ValueSq = Value * Value;
  1902. V = XMVectorSet(1.0f, Value, ValueSq, Value * ValueSq);
  1903. Y = XMVectorSplatY(V);
  1904. Cos = XMVectorMultiply(V, V);
  1905. Sin = XMVectorMultiply(Cos, Y);
  1906. Cos = XMVector4Dot(Cos, g_XMCosEstCoefficients.v);
  1907. Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients.v);
  1908. *pCos = Cos.vector4_f32[0];
  1909. *pSin = Sin.vector4_f32[0];
  1910. #elif defined(_XM_SSE_INTRINSICS_)
  1911. XMASSERT(pSin);
  1912. XMASSERT(pCos);
  1913. XMASSERT(Value >= -XM_PI);
  1914. XMASSERT(Value < XM_PI);
  1915. float ValueSq = Value * Value;
  1916. XMVECTOR Cos = _mm_set_ps(Value * ValueSq,ValueSq,Value,1.0f);
  1917. XMVECTOR Sin = _mm_set_ps1(Value);
  1918. Cos = _mm_mul_ps(Cos,Cos);
  1919. Sin = _mm_mul_ps(Sin,Cos);
  1920. // Cos = 1.0f,Value^2,Value^4,Value^6
  1921. Cos = XMVector4Dot(Cos,g_XMCosEstCoefficients);
  1922. _mm_store_ss(pCos,Cos);
  1923. // Sin = Value,Value^3,Value^5,Value^7
  1924. Sin = XMVector4Dot(Sin, g_XMSinEstCoefficients);
  1925. _mm_store_ss(pSin,Sin);
  1926. #else // _XM_VMX128_INTRINSICS_
  1927. #endif // _XM_VMX128_INTRINSICS_
  1928. }
  1929. //------------------------------------------------------------------------------
  1930. XMFINLINE FLOAT XMScalarASinEst
  1931. (
  1932. FLOAT Value
  1933. )
  1934. {
  1935. #if defined(_XM_NO_INTRINSICS_)
  1936. XMVECTOR VR, CR, CS;
  1937. XMVECTOR Result;
  1938. FLOAT AbsV, V2, D;
  1939. CONST FLOAT OnePlusEps = 1.00000011921f;
  1940. *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
  1941. V2 = Value * AbsV;
  1942. D = OnePlusEps - AbsV;
  1943. CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
  1944. VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
  1945. CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
  1946. Result = XMVector4Dot(VR, CR);
  1947. return Result.vector4_f32[0];
  1948. #elif defined(_XM_SSE_INTRINSICS_)
  1949. CONST FLOAT OnePlusEps = 1.00000011921f;
  1950. FLOAT AbsV = fabsf(Value);
  1951. FLOAT V2 = Value * AbsV; // Square with sign retained
  1952. FLOAT D = OnePlusEps - AbsV;
  1953. XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
  1954. XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
  1955. Result = _mm_mul_ps(Result, g_XMASinEstCoefficients);
  1956. Result = XMVector4Dot(VR,Result);
  1957. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  1958. return _mm_cvtss_f32(Result);
  1959. #else
  1960. return Result.m128_f32[0];
  1961. #endif
  1962. #else // _XM_VMX128_INTRINSICS_
  1963. #endif // _XM_VMX128_INTRINSICS_
  1964. }
  1965. //------------------------------------------------------------------------------
  1966. XMFINLINE FLOAT XMScalarACosEst
  1967. (
  1968. FLOAT Value
  1969. )
  1970. {
  1971. #if defined(_XM_NO_INTRINSICS_)
  1972. XMVECTOR VR, CR, CS;
  1973. XMVECTOR Result;
  1974. FLOAT AbsV, V2, D;
  1975. CONST FLOAT OnePlusEps = 1.00000011921f;
  1976. // return XM_PIDIV2 - XMScalarASin(Value);
  1977. *(UINT*)&AbsV = *(UINT*)&Value & 0x7FFFFFFF;
  1978. V2 = Value * AbsV;
  1979. D = OnePlusEps - AbsV;
  1980. CS = XMVectorSet(Value, 1.0f, 1.0f, V2);
  1981. VR = XMVectorSet(sqrtf(D), Value, V2, D * AbsV);
  1982. CR = XMVectorMultiply(CS, g_XMASinEstCoefficients.v);
  1983. Result = XMVector4Dot(VR, CR);
  1984. return XM_PIDIV2 - Result.vector4_f32[0];
  1985. #elif defined(_XM_SSE_INTRINSICS_)
  1986. CONST FLOAT OnePlusEps = 1.00000011921f;
  1987. FLOAT AbsV = fabsf(Value);
  1988. FLOAT V2 = Value * AbsV; // Value^2 retaining sign
  1989. FLOAT D = OnePlusEps - AbsV;
  1990. XMVECTOR Result = _mm_set_ps(V2,1.0f,1.0f,Value);
  1991. XMVECTOR VR = _mm_set_ps(D * AbsV,V2,Value,sqrtf(D));
  1992. Result = _mm_mul_ps(Result,g_XMASinEstCoefficients);
  1993. Result = XMVector4Dot(VR,Result);
  1994. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  1995. return XM_PIDIV2 - _mm_cvtss_f32(Result);
  1996. #else
  1997. return XM_PIDIV2 - Result.m128_f32[0];
  1998. #endif
  1999. #else // _XM_VMX128_INTRINSICS_
  2000. #endif // _XM_VMX128_INTRINSICS_
  2001. }
  2002. #endif // __XNAMATHMISC_INL__