Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

13560 lines
361 KiB

  1. /*++
  2. Copyright (c) Microsoft Corporation. All rights reserved.
  3. Module Name:
  4. xnamathvector.inl
  5. Abstract:
  6. XNA math library for Windows and Xbox 360: Vector functions
  7. --*/
  8. #if defined(_MSC_VER) && (_MSC_VER > 1000)
  9. #pragma once
  10. #endif
  11. #ifndef __XNAMATHVECTOR_INL__
  12. #define __XNAMATHVECTOR_INL__
  13. #if defined(_XM_NO_INTRINSICS_)
  14. #define XMISNAN(x) ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0)
  15. #define XMISINF(x) ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000)
  16. #endif
  17. /****************************************************************************
  18. *
  19. * General Vector
  20. *
  21. ****************************************************************************/
  22. //------------------------------------------------------------------------------
  23. // Assignment operations
  24. //------------------------------------------------------------------------------
  25. //------------------------------------------------------------------------------
  26. // Return a vector with all elements equaling zero
  27. XMFINLINE XMVECTOR XMVectorZero()
  28. {
  29. #if defined(_XM_NO_INTRINSICS_)
  30. XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
  31. return vResult;
  32. #elif defined(_XM_SSE_INTRINSICS_)
  33. return _mm_setzero_ps();
  34. #else // _XM_VMX128_INTRINSICS_
  35. #endif // _XM_VMX128_INTRINSICS_
  36. }
  37. //------------------------------------------------------------------------------
  38. // Initialize a vector with four floating point values
  39. XMFINLINE XMVECTOR XMVectorSet
  40. (
  41. FLOAT x,
  42. FLOAT y,
  43. FLOAT z,
  44. FLOAT w
  45. )
  46. {
  47. #if defined(_XM_NO_INTRINSICS_)
  48. XMVECTORF32 vResult = {x,y,z,w};
  49. return vResult.v;
  50. #elif defined(_XM_SSE_INTRINSICS_)
  51. return _mm_set_ps( w, z, y, x );
  52. #else // _XM_VMX128_INTRINSICS_
  53. #endif // _XM_VMX128_INTRINSICS_
  54. }
  55. //------------------------------------------------------------------------------
  56. // Initialize a vector with four integer values
  57. XMFINLINE XMVECTOR XMVectorSetInt
  58. (
  59. UINT x,
  60. UINT y,
  61. UINT z,
  62. UINT w
  63. )
  64. {
  65. #if defined(_XM_NO_INTRINSICS_)
  66. XMVECTORU32 vResult = {x,y,z,w};
  67. return vResult.v;
  68. #elif defined(_XM_SSE_INTRINSICS_)
  69. __m128i V = _mm_set_epi32( w, z, y, x );
  70. return reinterpret_cast<__m128 *>(&V)[0];
  71. #else // _XM_VMX128_INTRINSICS_
  72. #endif // _XM_VMX128_INTRINSICS_
  73. }
  74. //------------------------------------------------------------------------------
  75. // Initialize a vector with a replicated floating point value
  76. XMFINLINE XMVECTOR XMVectorReplicate
  77. (
  78. FLOAT Value
  79. )
  80. {
  81. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  82. XMVECTORF32 vResult = {Value,Value,Value,Value};
  83. return vResult.v;
  84. #elif defined(_XM_SSE_INTRINSICS_)
  85. return _mm_set_ps1( Value );
  86. #else // _XM_VMX128_INTRINSICS_
  87. #endif // _XM_VMX128_INTRINSICS_
  88. }
  89. //------------------------------------------------------------------------------
  90. // Initialize a vector with a replicated floating point value passed by pointer
  91. XMFINLINE XMVECTOR XMVectorReplicatePtr
  92. (
  93. CONST FLOAT *pValue
  94. )
  95. {
  96. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  97. FLOAT Value = pValue[0];
  98. XMVECTORF32 vResult = {Value,Value,Value,Value};
  99. return vResult.v;
  100. #elif defined(_XM_SSE_INTRINSICS_)
  101. return _mm_load_ps1( pValue );
  102. #else // _XM_VMX128_INTRINSICS_
  103. #endif // _XM_VMX128_INTRINSICS_
  104. }
  105. //------------------------------------------------------------------------------
  106. // Initialize a vector with a replicated integer value
  107. XMFINLINE XMVECTOR XMVectorReplicateInt
  108. (
  109. UINT Value
  110. )
  111. {
  112. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  113. XMVECTORU32 vResult = {Value,Value,Value,Value};
  114. return vResult.v;
  115. #elif defined(_XM_SSE_INTRINSICS_)
  116. __m128i vTemp = _mm_set1_epi32( Value );
  117. return reinterpret_cast<const __m128 *>(&vTemp)[0];
  118. #else // _XM_VMX128_INTRINSICS_
  119. #endif // _XM_VMX128_INTRINSICS_
  120. }
  121. //------------------------------------------------------------------------------
  122. // Initialize a vector with a replicated integer value passed by pointer
  123. XMFINLINE XMVECTOR XMVectorReplicateIntPtr
  124. (
  125. CONST UINT *pValue
  126. )
  127. {
  128. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  129. UINT Value = pValue[0];
  130. XMVECTORU32 vResult = {Value,Value,Value,Value};
  131. return vResult.v;
  132. #elif defined(_XM_SSE_INTRINSICS_)
  133. return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
  134. #else // _XM_VMX128_INTRINSICS_
  135. #endif // _XM_VMX128_INTRINSICS_
  136. }
  137. //------------------------------------------------------------------------------
  138. // Initialize a vector with all bits set (true mask)
  139. XMFINLINE XMVECTOR XMVectorTrueInt()
  140. {
  141. #if defined(_XM_NO_INTRINSICS_)
  142. XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
  143. return vResult.v;
  144. #elif defined(_XM_SSE_INTRINSICS_)
  145. __m128i V = _mm_set1_epi32(-1);
  146. return reinterpret_cast<__m128 *>(&V)[0];
  147. #else // _XM_VMX128_INTRINSICS_
  148. #endif // _XM_VMX128_INTRINSICS_
  149. }
  150. //------------------------------------------------------------------------------
  151. // Initialize a vector with all bits clear (false mask)
  152. XMFINLINE XMVECTOR XMVectorFalseInt()
  153. {
  154. #if defined(_XM_NO_INTRINSICS_)
  155. XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
  156. return vResult;
  157. #elif defined(_XM_SSE_INTRINSICS_)
  158. return _mm_setzero_ps();
  159. #else // _XM_VMX128_INTRINSICS_
  160. #endif // _XM_VMX128_INTRINSICS_
  161. }
  162. //------------------------------------------------------------------------------
  163. // Replicate the x component of the vector
  164. XMFINLINE XMVECTOR XMVectorSplatX
  165. (
  166. FXMVECTOR V
  167. )
  168. {
  169. #if defined(_XM_NO_INTRINSICS_)
  170. XMVECTOR vResult;
  171. vResult.x = V.x;
  172. vResult.y = V.x;
  173. vResult.z = V.x;
  174. vResult.w = V.x;
  175. return vResult;
  176. #elif defined(_XM_SSE_INTRINSICS_)
  177. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) );
  178. #else // _XM_VMX128_INTRINSICS_
  179. #endif // _XM_VMX128_INTRINSICS_
  180. }
  181. //------------------------------------------------------------------------------
  182. // Replicate the y component of the vector
  183. XMFINLINE XMVECTOR XMVectorSplatY
  184. (
  185. FXMVECTOR V
  186. )
  187. {
  188. #if defined(_XM_NO_INTRINSICS_)
  189. XMVECTOR vResult;
  190. vResult.x = V.y;
  191. vResult.y = V.y;
  192. vResult.z = V.y;
  193. vResult.w = V.y;
  194. return vResult;
  195. #elif defined(_XM_SSE_INTRINSICS_)
  196. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) );
  197. #else // _XM_VMX128_INTRINSICS_
  198. #endif // _XM_VMX128_INTRINSICS_
  199. }
  200. //------------------------------------------------------------------------------
  201. // Replicate the z component of the vector
  202. XMFINLINE XMVECTOR XMVectorSplatZ
  203. (
  204. FXMVECTOR V
  205. )
  206. {
  207. #if defined(_XM_NO_INTRINSICS_)
  208. XMVECTOR vResult;
  209. vResult.x = V.z;
  210. vResult.y = V.z;
  211. vResult.z = V.z;
  212. vResult.w = V.z;
  213. return vResult;
  214. #elif defined(_XM_SSE_INTRINSICS_)
  215. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) );
  216. #else // _XM_VMX128_INTRINSICS_
  217. #endif // _XM_VMX128_INTRINSICS_
  218. }
  219. //------------------------------------------------------------------------------
  220. // Replicate the w component of the vector
  221. XMFINLINE XMVECTOR XMVectorSplatW
  222. (
  223. FXMVECTOR V
  224. )
  225. {
  226. #if defined(_XM_NO_INTRINSICS_)
  227. XMVECTOR vResult;
  228. vResult.x = V.w;
  229. vResult.y = V.w;
  230. vResult.z = V.w;
  231. vResult.w = V.w;
  232. return vResult;
  233. #elif defined(_XM_SSE_INTRINSICS_)
  234. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) );
  235. #else // _XM_VMX128_INTRINSICS_
  236. #endif // _XM_VMX128_INTRINSICS_
  237. }
  238. //------------------------------------------------------------------------------
  239. // Return a vector of 1.0f,1.0f,1.0f,1.0f
  240. XMFINLINE XMVECTOR XMVectorSplatOne()
  241. {
  242. #if defined(_XM_NO_INTRINSICS_)
  243. XMVECTOR vResult;
  244. vResult.x = 1.0f;
  245. vResult.y = 1.0f;
  246. vResult.z = 1.0f;
  247. vResult.w = 1.0f;
  248. return vResult;
  249. #elif defined(_XM_SSE_INTRINSICS_)
  250. return g_XMOne;
  251. #else // _XM_VMX128_INTRINSICS_
  252. #endif // _XM_VMX128_INTRINSICS_
  253. }
  254. //------------------------------------------------------------------------------
  255. // Return a vector of INF,INF,INF,INF
  256. XMFINLINE XMVECTOR XMVectorSplatInfinity()
  257. {
  258. #if defined(_XM_NO_INTRINSICS_)
  259. XMVECTOR vResult;
  260. vResult.u[0] = 0x7F800000;
  261. vResult.u[1] = 0x7F800000;
  262. vResult.u[2] = 0x7F800000;
  263. vResult.u[3] = 0x7F800000;
  264. return vResult;
  265. #elif defined(_XM_SSE_INTRINSICS_)
  266. return g_XMInfinity;
  267. #else // _XM_VMX128_INTRINSICS_
  268. #endif // _XM_VMX128_INTRINSICS_
  269. }
  270. //------------------------------------------------------------------------------
  271. // Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
  272. XMFINLINE XMVECTOR XMVectorSplatQNaN()
  273. {
  274. #if defined(_XM_NO_INTRINSICS_)
  275. XMVECTOR vResult;
  276. vResult.u[0] = 0x7FC00000;
  277. vResult.u[1] = 0x7FC00000;
  278. vResult.u[2] = 0x7FC00000;
  279. vResult.u[3] = 0x7FC00000;
  280. return vResult;
  281. #elif defined(_XM_SSE_INTRINSICS_)
  282. return g_XMQNaN;
  283. #else // _XM_VMX128_INTRINSICS_
  284. #endif // _XM_VMX128_INTRINSICS_
  285. }
  286. //------------------------------------------------------------------------------
  287. // Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
  288. XMFINLINE XMVECTOR XMVectorSplatEpsilon()
  289. {
  290. #if defined(_XM_NO_INTRINSICS_)
  291. XMVECTOR vResult;
  292. vResult.u[0] = 0x34000000;
  293. vResult.u[1] = 0x34000000;
  294. vResult.u[2] = 0x34000000;
  295. vResult.u[3] = 0x34000000;
  296. return vResult;
  297. #elif defined(_XM_SSE_INTRINSICS_)
  298. return g_XMEpsilon;
  299. #else // _XM_VMX128_INTRINSICS_
  300. #endif // _XM_VMX128_INTRINSICS_
  301. }
  302. //------------------------------------------------------------------------------
  303. // Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
  304. XMFINLINE XMVECTOR XMVectorSplatSignMask()
  305. {
  306. #if defined(_XM_NO_INTRINSICS_)
  307. XMVECTOR vResult;
  308. vResult.u[0] = 0x80000000U;
  309. vResult.u[1] = 0x80000000U;
  310. vResult.u[2] = 0x80000000U;
  311. vResult.u[3] = 0x80000000U;
  312. return vResult;
  313. #elif defined(_XM_SSE_INTRINSICS_)
  314. __m128i V = _mm_set1_epi32( 0x80000000 );
  315. return reinterpret_cast<__m128*>(&V)[0];
  316. #else // _XM_VMX128_INTRINSICS_
  317. #endif // _XM_VMX128_INTRINSICS_
  318. }
  319. //------------------------------------------------------------------------------
  320. // Return a floating point value via an index. This is not a recommended
  321. // function to use due to performance loss.
  322. XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i)
  323. {
  324. XMASSERT( i <= 3 );
  325. #if defined(_XM_NO_INTRINSICS_)
  326. return V.v[i];
  327. #elif defined(_XM_SSE_INTRINSICS_)
  328. return V.m128_f32[i];
  329. #else // _XM_VMX128_INTRINSICS_
  330. #endif // _XM_VMX128_INTRINSICS_
  331. }
  332. //------------------------------------------------------------------------------
  333. // Return the X component in an FPU register.
  334. // This causes Load/Hit/Store on VMX targets
  335. XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V)
  336. {
  337. #if defined(_XM_NO_INTRINSICS_)
  338. return V.x;
  339. #elif defined(_XM_SSE_INTRINSICS_)
  340. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  341. return _mm_cvtss_f32(V);
  342. #else
  343. return V.m128_f32[0];
  344. #endif
  345. #else // _XM_VMX128_INTRINSICS_
  346. #endif // _XM_VMX128_INTRINSICS_
  347. }
  348. // Return the Y component in an FPU register.
  349. // This causes Load/Hit/Store on VMX targets
  350. XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V)
  351. {
  352. #if defined(_XM_NO_INTRINSICS_)
  353. return V.y;
  354. #elif defined(_XM_SSE_INTRINSICS_)
  355. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  356. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  357. return _mm_cvtss_f32(vTemp);
  358. #else
  359. return V.m128_f32[1];
  360. #endif
  361. #else // _XM_VMX128_INTRINSICS_
  362. #endif // _XM_VMX128_INTRINSICS_
  363. }
  364. // Return the Z component in an FPU register.
  365. // This causes Load/Hit/Store on VMX targets
  366. XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V)
  367. {
  368. #if defined(_XM_NO_INTRINSICS_)
  369. return V.z;
  370. #elif defined(_XM_SSE_INTRINSICS_)
  371. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  372. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  373. return _mm_cvtss_f32(vTemp);
  374. #else
  375. return V.m128_f32[2];
  376. #endif
  377. #else // _XM_VMX128_INTRINSICS_
  378. #endif // _XM_VMX128_INTRINSICS_
  379. }
  380. // Return the W component in an FPU register.
  381. // This causes Load/Hit/Store on VMX targets
  382. XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V)
  383. {
  384. #if defined(_XM_NO_INTRINSICS_)
  385. return V.w;
  386. #elif defined(_XM_SSE_INTRINSICS_)
  387. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  388. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  389. return _mm_cvtss_f32(vTemp);
  390. #else
  391. return V.m128_f32[3];
  392. #endif
  393. #else // _XM_VMX128_INTRINSICS_
  394. #endif // _XM_VMX128_INTRINSICS_
  395. }
  396. //------------------------------------------------------------------------------
  397. // Store a component indexed by i into a 32 bit float location in memory.
  398. // This causes Load/Hit/Store on VMX targets
  399. XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i)
  400. {
  401. XMASSERT( f != 0 );
  402. XMASSERT( i < 4 );
  403. #if defined(_XM_NO_INTRINSICS_)
  404. *f = V.v[i];
  405. #elif defined(_XM_SSE_INTRINSICS_)
  406. *f = V.m128_f32[i];
  407. #else // _XM_VMX128_INTRINSICS_
  408. #endif // _XM_VMX128_INTRINSICS_
  409. }
  410. //------------------------------------------------------------------------------
  411. // Store the X component into a 32 bit float location in memory.
  412. XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V)
  413. {
  414. XMASSERT( x != 0 );
  415. #if defined(_XM_NO_INTRINSICS_)
  416. *x = V.x;
  417. #elif defined(_XM_SSE_INTRINSICS_)
  418. _mm_store_ss(x,V);
  419. #else // _XM_VMX128_INTRINSICS_
  420. #endif // _XM_VMX128_INTRINSICS_
  421. }
  422. // Store the Y component into a 32 bit float location in memory.
  423. XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V)
  424. {
  425. XMASSERT( y != 0 );
  426. #if defined(_XM_NO_INTRINSICS_)
  427. *y = V.y;
  428. #elif defined(_XM_SSE_INTRINSICS_)
  429. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  430. _mm_store_ss(y,vResult);
  431. #else // _XM_VMX128_INTRINSICS_
  432. #endif // _XM_VMX128_INTRINSICS_
  433. }
  434. // Store the Z component into a 32 bit float location in memory.
  435. XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V)
  436. {
  437. XMASSERT( z != 0 );
  438. #if defined(_XM_NO_INTRINSICS_)
  439. *z = V.z;
  440. #elif defined(_XM_SSE_INTRINSICS_)
  441. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  442. _mm_store_ss(z,vResult);
  443. #else // _XM_VMX128_INTRINSICS_
  444. #endif // _XM_VMX128_INTRINSICS_
  445. }
  446. // Store the W component into a 32 bit float location in memory.
  447. XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V)
  448. {
  449. XMASSERT( w != 0 );
  450. #if defined(_XM_NO_INTRINSICS_)
  451. *w = V.w;
  452. #elif defined(_XM_SSE_INTRINSICS_)
  453. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  454. _mm_store_ss(w,vResult);
  455. #else // _XM_VMX128_INTRINSICS_
  456. #endif // _XM_VMX128_INTRINSICS_
  457. }
  458. //------------------------------------------------------------------------------
  459. // Return an integer value via an index. This is not a recommended
  460. // function to use due to performance loss.
  461. XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i)
  462. {
  463. XMASSERT( i < 4 );
  464. #if defined(_XM_NO_INTRINSICS_)
  465. return V.u[i];
  466. #elif defined(_XM_SSE_INTRINSICS_)
  467. #if defined(_MSC_VER) && (_MSC_VER<1400)
  468. XMVECTORU32 tmp;
  469. tmp.v = V;
  470. return tmp.u[i];
  471. #else
  472. return V.m128_u32[i];
  473. #endif
  474. #else // _XM_VMX128_INTRINSICS_
  475. #endif // _XM_VMX128_INTRINSICS_
  476. }
  477. //------------------------------------------------------------------------------
  478. // Return the X component in an integer register.
  479. // This causes Load/Hit/Store on VMX targets
  480. XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V)
  481. {
  482. #if defined(_XM_NO_INTRINSICS_)
  483. return V.u[0];
  484. #elif defined(_XM_SSE_INTRINSICS_)
  485. return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0]));
  486. #else // _XM_VMX128_INTRINSICS_
  487. #endif // _XM_VMX128_INTRINSICS_
  488. }
  489. // Return the Y component in an integer register.
  490. // This causes Load/Hit/Store on VMX targets
  491. XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V)
  492. {
  493. #if defined(_XM_NO_INTRINSICS_)
  494. return V.u[1];
  495. #elif defined(_XM_SSE_INTRINSICS_)
  496. __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1));
  497. return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
  498. #else // _XM_VMX128_INTRINSICS_
  499. #endif // _XM_VMX128_INTRINSICS_
  500. }
  501. // Return the Z component in an integer register.
  502. // This causes Load/Hit/Store on VMX targets
  503. XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V)
  504. {
  505. #if defined(_XM_NO_INTRINSICS_)
  506. return V.u[2];
  507. #elif defined(_XM_SSE_INTRINSICS_)
  508. __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2));
  509. return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
  510. #else // _XM_VMX128_INTRINSICS_
  511. #endif // _XM_VMX128_INTRINSICS_
  512. }
  513. // Return the W component in an integer register.
  514. // This causes Load/Hit/Store on VMX targets
  515. XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V)
  516. {
  517. #if defined(_XM_NO_INTRINSICS_)
  518. return V.u[3];
  519. #elif defined(_XM_SSE_INTRINSICS_)
  520. __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3));
  521. return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
  522. #else // _XM_VMX128_INTRINSICS_
  523. #endif // _XM_VMX128_INTRINSICS_
  524. }
  525. //------------------------------------------------------------------------------
  526. // Store a component indexed by i into a 32 bit integer location in memory.
  527. // This causes Load/Hit/Store on VMX targets
  528. XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i)
  529. {
  530. XMASSERT( x != 0 );
  531. XMASSERT( i < 4 );
  532. #if defined(_XM_NO_INTRINSICS_)
  533. *x = V.u[i];
  534. #elif defined(_XM_SSE_INTRINSICS_)
  535. #if defined(_MSC_VER) && (_MSC_VER<1400)
  536. XMVECTORU32 tmp;
  537. tmp.v = V;
  538. *x = tmp.u[i];
  539. #else
  540. *x = V.m128_u32[i];
  541. #endif
  542. #else // _XM_VMX128_INTRINSICS_
  543. #endif // _XM_VMX128_INTRINSICS_
  544. }
  545. //------------------------------------------------------------------------------
  546. // Store the X component into a 32 bit integer location in memory.
  547. XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V)
  548. {
  549. XMASSERT( x != 0 );
  550. #if defined(_XM_NO_INTRINSICS_)
  551. *x = V.u[0];
  552. #elif defined(_XM_SSE_INTRINSICS_)
  553. _mm_store_ss(reinterpret_cast<float *>(x),V);
  554. #else // _XM_VMX128_INTRINSICS_
  555. #endif // _XM_VMX128_INTRINSICS_
  556. }
  557. // Store the Y component into a 32 bit integer location in memory.
  558. XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V)
  559. {
  560. XMASSERT( y != 0 );
  561. #if defined(_XM_NO_INTRINSICS_)
  562. *y = V.u[1];
  563. #elif defined(_XM_SSE_INTRINSICS_)
  564. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  565. _mm_store_ss(reinterpret_cast<float *>(y),vResult);
  566. #else // _XM_VMX128_INTRINSICS_
  567. #endif // _XM_VMX128_INTRINSICS_
  568. }
  569. // Store the Z component into a 32 bit integer locaCantion in memory.
  570. XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V)
  571. {
  572. XMASSERT( z != 0 );
  573. #if defined(_XM_NO_INTRINSICS_)
  574. *z = V.u[2];
  575. #elif defined(_XM_SSE_INTRINSICS_)
  576. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  577. _mm_store_ss(reinterpret_cast<float *>(z),vResult);
  578. #else // _XM_VMX128_INTRINSICS_
  579. #endif // _XM_VMX128_INTRINSICS_
  580. }
  581. // Store the W component into a 32 bit integer location in memory.
  582. XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V)
  583. {
  584. XMASSERT( w != 0 );
  585. #if defined(_XM_NO_INTRINSICS_)
  586. *w = V.u[3];
  587. #elif defined(_XM_SSE_INTRINSICS_)
  588. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  589. _mm_store_ss(reinterpret_cast<float *>(w),vResult);
  590. #else // _XM_VMX128_INTRINSICS_
  591. #endif // _XM_VMX128_INTRINSICS_
  592. }
  593. //------------------------------------------------------------------------------
  594. // Set a single indexed floating point component
  595. // This causes Load/Hit/Store on VMX targets
  596. XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i)
  597. {
  598. #if defined(_XM_NO_INTRINSICS_)
  599. XMVECTOR U;
  600. XMASSERT( i <= 3 );
  601. U = V;
  602. U.v[i] = f;
  603. return U;
  604. #elif defined(_XM_SSE_INTRINSICS_)
  605. XMASSERT( i <= 3 );
  606. XMVECTOR U = V;
  607. U.m128_f32[i] = f;
  608. return U;
  609. #else // _XM_VMX128_INTRINSICS_
  610. #endif // _XM_VMX128_INTRINSICS_
  611. }
  612. //------------------------------------------------------------------------------
  613. // Sets the X component of a vector to a passed floating point value
  614. // This causes Load/Hit/Store on VMX targets
  615. XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x)
  616. {
  617. #if defined(_XM_NO_INTRINSICS_)
  618. XMVECTOR U;
  619. U.v[0] = x;
  620. U.v[1] = V.v[1];
  621. U.v[2] = V.v[2];
  622. U.v[3] = V.v[3];
  623. return U;
  624. #elif defined(_XM_SSE_INTRINSICS_)
  625. #if defined(_XM_ISVS2005_)
  626. XMVECTOR vResult = V;
  627. vResult.m128_f32[0] = x;
  628. return vResult;
  629. #else
  630. XMVECTOR vResult = _mm_set_ss(x);
  631. vResult = _mm_move_ss(V,vResult);
  632. return vResult;
  633. #endif // _XM_ISVS2005_
  634. #else // _XM_VMX128_INTRINSICS_
  635. #endif // _XM_VMX128_INTRINSICS_
  636. }
  637. // Sets the Y component of a vector to a passed floating point value
  638. // This causes Load/Hit/Store on VMX targets
  639. XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y)
  640. {
  641. #if defined(_XM_NO_INTRINSICS_)
  642. XMVECTOR U;
  643. U.v[0] = V.v[0];
  644. U.v[1] = y;
  645. U.v[2] = V.v[2];
  646. U.v[3] = V.v[3];
  647. return U;
  648. #elif defined(_XM_SSE_INTRINSICS_)
  649. #if defined(_XM_ISVS2005_)
  650. XMVECTOR vResult = V;
  651. vResult.m128_f32[1] = y;
  652. return vResult;
  653. #else
  654. // Swap y and x
  655. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  656. // Convert input to vector
  657. XMVECTOR vTemp = _mm_set_ss(y);
  658. // Replace the x component
  659. vResult = _mm_move_ss(vResult,vTemp);
  660. // Swap y and x again
  661. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  662. return vResult;
  663. #endif // _XM_ISVS2005_
  664. #else // _XM_VMX128_INTRINSICS_
  665. #endif // _XM_VMX128_INTRINSICS_
  666. }
  667. // Sets the Z component of a vector to a passed floating point value
  668. // This causes Load/Hit/Store on VMX targets
  669. XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z)
  670. {
  671. #if defined(_XM_NO_INTRINSICS_)
  672. XMVECTOR U;
  673. U.v[0] = V.v[0];
  674. U.v[1] = V.v[1];
  675. U.v[2] = z;
  676. U.v[3] = V.v[3];
  677. return U;
  678. #elif defined(_XM_SSE_INTRINSICS_)
  679. #if defined(_XM_ISVS2005_)
  680. XMVECTOR vResult = V;
  681. vResult.m128_f32[2] = z;
  682. return vResult;
  683. #else
  684. // Swap z and x
  685. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  686. // Convert input to vector
  687. XMVECTOR vTemp = _mm_set_ss(z);
  688. // Replace the x component
  689. vResult = _mm_move_ss(vResult,vTemp);
  690. // Swap z and x again
  691. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  692. return vResult;
  693. #endif // _XM_ISVS2005_
  694. #else // _XM_VMX128_INTRINSICS_
  695. #endif // _XM_VMX128_INTRINSICS_
  696. }
  697. // Sets the W component of a vector to a passed floating point value
  698. // This causes Load/Hit/Store on VMX targets
  699. XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w)
  700. {
  701. #if defined(_XM_NO_INTRINSICS_)
  702. XMVECTOR U;
  703. U.v[0] = V.v[0];
  704. U.v[1] = V.v[1];
  705. U.v[2] = V.v[2];
  706. U.v[3] = w;
  707. return U;
  708. #elif defined(_XM_SSE_INTRINSICS_)
  709. #if defined(_XM_ISVS2005_)
  710. XMVECTOR vResult = V;
  711. vResult.m128_f32[3] = w;
  712. return vResult;
  713. #else
  714. // Swap w and x
  715. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  716. // Convert input to vector
  717. XMVECTOR vTemp = _mm_set_ss(w);
  718. // Replace the x component
  719. vResult = _mm_move_ss(vResult,vTemp);
  720. // Swap w and x again
  721. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  722. return vResult;
  723. #endif // _XM_ISVS2005_
  724. #else // _XM_VMX128_INTRINSICS_
  725. #endif // _XM_VMX128_INTRINSICS_
  726. }
  727. //------------------------------------------------------------------------------
  728. // Sets a component of a vector to a floating point value passed by pointer
  729. // This causes Load/Hit/Store on VMX targets
  730. XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i)
  731. {
  732. #if defined(_XM_NO_INTRINSICS_)
  733. XMVECTOR U;
  734. XMASSERT( f != 0 );
  735. XMASSERT( i <= 3 );
  736. U = V;
  737. U.v[i] = *f;
  738. return U;
  739. #elif defined(_XM_SSE_INTRINSICS_)
  740. XMASSERT( f != 0 );
  741. XMASSERT( i <= 3 );
  742. XMVECTOR U = V;
  743. U.m128_f32[i] = *f;
  744. return U;
  745. #else // _XM_VMX128_INTRINSICS_
  746. #endif // _XM_VMX128_INTRINSICS_
  747. }
  748. //------------------------------------------------------------------------------
  749. // Sets the X component of a vector to a floating point value passed by pointer
  750. XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x)
  751. {
  752. #if defined(_XM_NO_INTRINSICS_)
  753. XMVECTOR U;
  754. XMASSERT( x != 0 );
  755. U.v[0] = *x;
  756. U.v[1] = V.v[1];
  757. U.v[2] = V.v[2];
  758. U.v[3] = V.v[3];
  759. return U;
  760. #elif defined(_XM_SSE_INTRINSICS_)
  761. XMASSERT( x != 0 );
  762. XMVECTOR vResult = _mm_load_ss(x);
  763. vResult = _mm_move_ss(V,vResult);
  764. return vResult;
  765. #else // _XM_VMX128_INTRINSICS_
  766. #endif // _XM_VMX128_INTRINSICS_
  767. }
  768. // Sets the Y component of a vector to a floating point value passed by pointer
  769. XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y)
  770. {
  771. #if defined(_XM_NO_INTRINSICS_)
  772. XMVECTOR U;
  773. XMASSERT( y != 0 );
  774. U.v[0] = V.v[0];
  775. U.v[1] = *y;
  776. U.v[2] = V.v[2];
  777. U.v[3] = V.v[3];
  778. return U;
  779. #elif defined(_XM_SSE_INTRINSICS_)
  780. XMASSERT( y != 0 );
  781. // Swap y and x
  782. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  783. // Convert input to vector
  784. XMVECTOR vTemp = _mm_load_ss(y);
  785. // Replace the x component
  786. vResult = _mm_move_ss(vResult,vTemp);
  787. // Swap y and x again
  788. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  789. return vResult;
  790. #else // _XM_VMX128_INTRINSICS_
  791. #endif // _XM_VMX128_INTRINSICS_
  792. }
  793. // Sets the Z component of a vector to a floating point value passed by pointer
  794. XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z)
  795. {
  796. #if defined(_XM_NO_INTRINSICS_)
  797. XMVECTOR U;
  798. XMASSERT( z != 0 );
  799. U.v[0] = V.v[0];
  800. U.v[1] = V.v[1];
  801. U.v[2] = *z;
  802. U.v[3] = V.v[3];
  803. return U;
  804. #elif defined(_XM_SSE_INTRINSICS_)
  805. XMASSERT( z != 0 );
  806. // Swap z and x
  807. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  808. // Convert input to vector
  809. XMVECTOR vTemp = _mm_load_ss(z);
  810. // Replace the x component
  811. vResult = _mm_move_ss(vResult,vTemp);
  812. // Swap z and x again
  813. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  814. return vResult;
  815. #else // _XM_VMX128_INTRINSICS_
  816. #endif // _XM_VMX128_INTRINSICS_
  817. }
  818. // Sets the W component of a vector to a floating point value passed by pointer
  819. XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w)
  820. {
  821. #if defined(_XM_NO_INTRINSICS_)
  822. XMVECTOR U;
  823. XMASSERT( w != 0 );
  824. U.v[0] = V.v[0];
  825. U.v[1] = V.v[1];
  826. U.v[2] = V.v[2];
  827. U.v[3] = *w;
  828. return U;
  829. #elif defined(_XM_SSE_INTRINSICS_)
  830. XMASSERT( w != 0 );
  831. // Swap w and x
  832. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  833. // Convert input to vector
  834. XMVECTOR vTemp = _mm_load_ss(w);
  835. // Replace the x component
  836. vResult = _mm_move_ss(vResult,vTemp);
  837. // Swap w and x again
  838. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  839. return vResult;
  840. #else // _XM_VMX128_INTRINSICS_
  841. #endif // _XM_VMX128_INTRINSICS_
  842. }
  843. //------------------------------------------------------------------------------
  844. // Sets a component of a vector to an integer passed by value
  845. // This causes Load/Hit/Store on VMX targets
  846. XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i)
  847. {
  848. #if defined(_XM_NO_INTRINSICS_)
  849. XMVECTOR U;
  850. XMASSERT( i <= 3 );
  851. U = V;
  852. U.u[i] = x;
  853. return U;
  854. #elif defined(_XM_SSE_INTRINSICS_)
  855. XMASSERT( i <= 3 );
  856. XMVECTORU32 tmp;
  857. tmp.v = V;
  858. tmp.u[i] = x;
  859. return tmp;
  860. #else // _XM_VMX128_INTRINSICS_
  861. #endif // _XM_VMX128_INTRINSICS_
  862. }
  863. //------------------------------------------------------------------------------
  864. // Sets the X component of a vector to an integer passed by value
  865. // This causes Load/Hit/Store on VMX targets
  866. XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x)
  867. {
  868. #if defined(_XM_NO_INTRINSICS_)
  869. XMVECTOR U;
  870. U.u[0] = x;
  871. U.u[1] = V.u[1];
  872. U.u[2] = V.u[2];
  873. U.u[3] = V.u[3];
  874. return U;
  875. #elif defined(_XM_SSE_INTRINSICS_)
  876. #if defined(_XM_ISVS2005_)
  877. XMVECTOR vResult = V;
  878. vResult.m128_i32[0] = x;
  879. return vResult;
  880. #else
  881. __m128i vTemp = _mm_cvtsi32_si128(x);
  882. XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  883. return vResult;
  884. #endif // _XM_ISVS2005_
  885. #else // _XM_VMX128_INTRINSICS_
  886. #endif // _XM_VMX128_INTRINSICS_
  887. }
  888. // Sets the Y component of a vector to an integer passed by value
  889. // This causes Load/Hit/Store on VMX targets
  890. XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y)
  891. {
  892. #if defined(_XM_NO_INTRINSICS_)
  893. XMVECTOR U;
  894. U.u[0] = V.u[0];
  895. U.u[1] = y;
  896. U.u[2] = V.u[2];
  897. U.u[3] = V.u[3];
  898. return U;
  899. #elif defined(_XM_SSE_INTRINSICS_)
  900. #if defined(_XM_ISVS2005_)
  901. XMVECTOR vResult = V;
  902. vResult.m128_i32[1] = y;
  903. return vResult;
  904. #else // Swap y and x
  905. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  906. // Convert input to vector
  907. __m128i vTemp = _mm_cvtsi32_si128(y);
  908. // Replace the x component
  909. vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  910. // Swap y and x again
  911. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  912. return vResult;
  913. #endif // _XM_ISVS2005_
  914. #else // _XM_VMX128_INTRINSICS_
  915. #endif // _XM_VMX128_INTRINSICS_
  916. }
  917. // Sets the Z component of a vector to an integer passed by value
  918. // This causes Load/Hit/Store on VMX targets
  919. XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z)
  920. {
  921. #if defined(_XM_NO_INTRINSICS_)
  922. XMVECTOR U;
  923. U.u[0] = V.u[0];
  924. U.u[1] = V.u[1];
  925. U.u[2] = z;
  926. U.u[3] = V.u[3];
  927. return U;
  928. #elif defined(_XM_SSE_INTRINSICS_)
  929. #if defined(_XM_ISVS2005_)
  930. XMVECTOR vResult = V;
  931. vResult.m128_i32[2] = z;
  932. return vResult;
  933. #else
  934. // Swap z and x
  935. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  936. // Convert input to vector
  937. __m128i vTemp = _mm_cvtsi32_si128(z);
  938. // Replace the x component
  939. vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  940. // Swap z and x again
  941. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  942. return vResult;
  943. #endif // _XM_ISVS2005_
  944. #else // _XM_VMX128_INTRINSICS_
  945. #endif // _XM_VMX128_INTRINSICS_
  946. }
  947. // Sets the W component of a vector to an integer passed by value
  948. // This causes Load/Hit/Store on VMX targets
  949. XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w)
  950. {
  951. #if defined(_XM_NO_INTRINSICS_)
  952. XMVECTOR U;
  953. U.u[0] = V.u[0];
  954. U.u[1] = V.u[1];
  955. U.u[2] = V.u[2];
  956. U.u[3] = w;
  957. return U;
  958. #elif defined(_XM_SSE_INTRINSICS_)
  959. #if defined(_XM_ISVS2005_)
  960. XMVECTOR vResult = V;
  961. vResult.m128_i32[3] = w;
  962. return vResult;
  963. #else
  964. // Swap w and x
  965. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  966. // Convert input to vector
  967. __m128i vTemp = _mm_cvtsi32_si128(w);
  968. // Replace the x component
  969. vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  970. // Swap w and x again
  971. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  972. return vResult;
  973. #endif // _XM_ISVS2005_
  974. #else // _XM_VMX128_INTRINSICS_
  975. #endif // _XM_VMX128_INTRINSICS_
  976. }
  977. //------------------------------------------------------------------------------
  978. // Sets a component of a vector to an integer value passed by pointer
  979. // This causes Load/Hit/Store on VMX targets
  980. XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i)
  981. {
  982. #if defined(_XM_NO_INTRINSICS_)
  983. XMVECTOR U;
  984. XMASSERT( x != 0 );
  985. XMASSERT( i <= 3 );
  986. U = V;
  987. U.u[i] = *x;
  988. return U;
  989. #elif defined(_XM_SSE_INTRINSICS_)
  990. XMASSERT( x != 0 );
  991. XMASSERT( i <= 3 );
  992. XMVECTORU32 tmp;
  993. tmp.v = V;
  994. tmp.u[i] = *x;
  995. return tmp;
  996. #else // _XM_VMX128_INTRINSICS_
  997. #endif // _XM_VMX128_INTRINSICS_
  998. }
  999. //------------------------------------------------------------------------------
  1000. // Sets the X component of a vector to an integer value passed by pointer
  1001. XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x)
  1002. {
  1003. #if defined(_XM_NO_INTRINSICS_)
  1004. XMVECTOR U;
  1005. XMASSERT( x != 0 );
  1006. U.u[0] = *x;
  1007. U.u[1] = V.u[1];
  1008. U.u[2] = V.u[2];
  1009. U.u[3] = V.u[3];
  1010. return U;
  1011. #elif defined(_XM_SSE_INTRINSICS_)
  1012. XMASSERT( x != 0 );
  1013. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
  1014. XMVECTOR vResult = _mm_move_ss(V,vTemp);
  1015. return vResult;
  1016. #else // _XM_VMX128_INTRINSICS_
  1017. #endif // _XM_VMX128_INTRINSICS_
  1018. }
  1019. // Sets the Y component of a vector to an integer value passed by pointer
  1020. XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y)
  1021. {
  1022. #if defined(_XM_NO_INTRINSICS_)
  1023. XMVECTOR U;
  1024. XMASSERT( y != 0 );
  1025. U.u[0] = V.u[0];
  1026. U.u[1] = *y;
  1027. U.u[2] = V.u[2];
  1028. U.u[3] = V.u[3];
  1029. return U;
  1030. #elif defined(_XM_SSE_INTRINSICS_)
  1031. XMASSERT( y != 0 );
  1032. // Swap y and x
  1033. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  1034. // Convert input to vector
  1035. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
  1036. // Replace the x component
  1037. vResult = _mm_move_ss(vResult,vTemp);
  1038. // Swap y and x again
  1039. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  1040. return vResult;
  1041. #else // _XM_VMX128_INTRINSICS_
  1042. #endif // _XM_VMX128_INTRINSICS_
  1043. }
  1044. // Sets the Z component of a vector to an integer value passed by pointer
  1045. XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z)
  1046. {
  1047. #if defined(_XM_NO_INTRINSICS_)
  1048. XMVECTOR U;
  1049. XMASSERT( z != 0 );
  1050. U.u[0] = V.u[0];
  1051. U.u[1] = V.u[1];
  1052. U.u[2] = *z;
  1053. U.u[3] = V.u[3];
  1054. return U;
  1055. #elif defined(_XM_SSE_INTRINSICS_)
  1056. XMASSERT( z != 0 );
  1057. // Swap z and x
  1058. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  1059. // Convert input to vector
  1060. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
  1061. // Replace the x component
  1062. vResult = _mm_move_ss(vResult,vTemp);
  1063. // Swap z and x again
  1064. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  1065. return vResult;
  1066. #else // _XM_VMX128_INTRINSICS_
  1067. #endif // _XM_VMX128_INTRINSICS_
  1068. }
  1069. // Sets the W component of a vector to an integer value passed by pointer
  1070. XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w)
  1071. {
  1072. #if defined(_XM_NO_INTRINSICS_)
  1073. XMVECTOR U;
  1074. XMASSERT( w != 0 );
  1075. U.u[0] = V.u[0];
  1076. U.u[1] = V.u[1];
  1077. U.u[2] = V.u[2];
  1078. U.u[3] = *w;
  1079. return U;
  1080. #elif defined(_XM_SSE_INTRINSICS_)
  1081. XMASSERT( w != 0 );
  1082. // Swap w and x
  1083. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  1084. // Convert input to vector
  1085. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
  1086. // Replace the x component
  1087. vResult = _mm_move_ss(vResult,vTemp);
  1088. // Swap w and x again
  1089. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  1090. return vResult;
  1091. #else // _XM_VMX128_INTRINSICS_
  1092. #endif // _XM_VMX128_INTRINSICS_
  1093. }
  1094. //------------------------------------------------------------------------------
  1095. // Define a control vector to be used in XMVectorPermute
  1096. // operations. Visualize the two vectors V1 and V2 given
  1097. // in a permute as arranged back to back in a linear fashion,
  1098. // such that they form an array of 8 floating point values.
  1099. // The four integers specified in XMVectorPermuteControl
  1100. // will serve as indices into the array to select components
  1101. // from the two vectors. ElementIndex0 is used to select
  1102. // an element from the vectors to be placed in the first
  1103. // component of the resulting vector, ElementIndex1 is used
  1104. // to select an element for the second component, etc.
  1105. XMFINLINE XMVECTOR XMVectorPermuteControl
  1106. (
  1107. UINT ElementIndex0,
  1108. UINT ElementIndex1,
  1109. UINT ElementIndex2,
  1110. UINT ElementIndex3
  1111. )
  1112. {
  1113. #if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_)
  1114. XMVECTORU32 vControl;
  1115. static CONST UINT ControlElement[] = {
  1116. XM_PERMUTE_0X,
  1117. XM_PERMUTE_0Y,
  1118. XM_PERMUTE_0Z,
  1119. XM_PERMUTE_0W,
  1120. XM_PERMUTE_1X,
  1121. XM_PERMUTE_1Y,
  1122. XM_PERMUTE_1Z,
  1123. XM_PERMUTE_1W
  1124. };
  1125. XMASSERT(ElementIndex0 < 8);
  1126. XMASSERT(ElementIndex1 < 8);
  1127. XMASSERT(ElementIndex2 < 8);
  1128. XMASSERT(ElementIndex3 < 8);
  1129. vControl.u[0] = ControlElement[ElementIndex0];
  1130. vControl.u[1] = ControlElement[ElementIndex1];
  1131. vControl.u[2] = ControlElement[ElementIndex2];
  1132. vControl.u[3] = ControlElement[ElementIndex3];
  1133. return vControl.v;
  1134. #else
  1135. #endif
  1136. }
  1137. //------------------------------------------------------------------------------
  1138. // Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte
  1139. // entries into a single 16 byte vector and return it. Index 0-15 = V1,
  1140. // 16-31 = V2
  1141. XMFINLINE XMVECTOR XMVectorPermute
  1142. (
  1143. FXMVECTOR V1,
  1144. FXMVECTOR V2,
  1145. FXMVECTOR Control
  1146. )
  1147. {
  1148. #if defined(_XM_NO_INTRINSICS_)
  1149. const BYTE *aByte[2];
  1150. XMVECTOR Result;
  1151. UINT i, uIndex, VectorIndex;
  1152. const BYTE *pControl;
  1153. BYTE *pWork;
  1154. // Indices must be in range from 0 to 31
  1155. XMASSERT((Control.u[0] & 0xE0E0E0E0) == 0);
  1156. XMASSERT((Control.u[1] & 0xE0E0E0E0) == 0);
  1157. XMASSERT((Control.u[2] & 0xE0E0E0E0) == 0);
  1158. XMASSERT((Control.u[3] & 0xE0E0E0E0) == 0);
  1159. // 0-15 = V1, 16-31 = V2
  1160. aByte[0] = (const BYTE*)(&V1);
  1161. aByte[1] = (const BYTE*)(&V2);
  1162. i = 16;
  1163. pControl = (const BYTE *)(&Control);
  1164. pWork = (BYTE *)(&Result);
  1165. do {
  1166. // Get the byte to map from
  1167. uIndex = pControl[0];
  1168. ++pControl;
  1169. VectorIndex = (uIndex>>4)&1;
  1170. uIndex &= 0x0F;
  1171. #if defined(_XM_X86_) || defined(_XM_X64_)
  1172. uIndex ^= 3; // Swap byte ordering on little endian machines
  1173. #endif
  1174. pWork[0] = aByte[VectorIndex][uIndex];
  1175. ++pWork;
  1176. } while (--i);
  1177. return Result;
  1178. #elif defined(_XM_SSE_INTRINSICS_)
  1179. #if defined(_PREFAST_) || defined(XMDEBUG)
  1180. // Indices must be in range from 0 to 31
  1181. static const XMVECTORI32 g_PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0};
  1182. XMVECTOR vAssert = _mm_and_ps(Control,g_PremuteTest);
  1183. __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero);
  1184. XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf);
  1185. #endif
  1186. // Store the vectors onto local memory on the stack
  1187. XMVECTOR Array[2];
  1188. Array[0] = V1;
  1189. Array[1] = V2;
  1190. // Output vector, on the stack
  1191. XMVECTORU8 vResult;
  1192. // Get pointer to the two vectors on the stack
  1193. const BYTE *pInput = reinterpret_cast<const BYTE *>(Array);
  1194. // Store the Control vector on the stack to access the bytes
  1195. // don't use Control, it can cause a register variable to spill on the stack.
  1196. XMVECTORU8 vControl;
  1197. vControl.v = Control; // Write to memory
  1198. UINT i = 0;
  1199. do {
  1200. UINT ComponentIndex = vControl.u[i] & 0x1FU;
  1201. ComponentIndex ^= 3; // Swap byte ordering
  1202. vResult.u[i] = pInput[ComponentIndex];
  1203. } while (++i<16);
  1204. return vResult;
  1205. #else // _XM_SSE_INTRINSICS_
  1206. #endif // _XM_VMX128_INTRINSICS_
  1207. }
  1208. //------------------------------------------------------------------------------
  1209. // Define a control vector to be used in XMVectorSelect
  1210. // operations. The four integers specified in XMVectorSelectControl
  1211. // serve as indices to select between components in two vectors.
  1212. // The first index controls selection for the first component of
  1213. // the vectors involved in a select operation, the second index
  1214. // controls selection for the second component etc. A value of
  1215. // zero for an index causes the corresponding component from the first
  1216. // vector to be selected whereas a one causes the component from the
  1217. // second vector to be selected instead.
  1218. XMFINLINE XMVECTOR XMVectorSelectControl
  1219. (
  1220. UINT VectorIndex0,
  1221. UINT VectorIndex1,
  1222. UINT VectorIndex2,
  1223. UINT VectorIndex3
  1224. )
  1225. {
  1226. #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
  1227. // x=Index0,y=Index1,z=Index2,w=Index3
  1228. __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
  1229. // Any non-zero entries become 0xFFFFFFFF else 0
  1230. vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
  1231. return reinterpret_cast<__m128 *>(&vTemp)[0];
  1232. #else
  1233. XMVECTOR ControlVector;
  1234. CONST UINT ControlElement[] =
  1235. {
  1236. XM_SELECT_0,
  1237. XM_SELECT_1
  1238. };
  1239. XMASSERT(VectorIndex0 < 2);
  1240. XMASSERT(VectorIndex1 < 2);
  1241. XMASSERT(VectorIndex2 < 2);
  1242. XMASSERT(VectorIndex3 < 2);
  1243. ControlVector.u[0] = ControlElement[VectorIndex0];
  1244. ControlVector.u[1] = ControlElement[VectorIndex1];
  1245. ControlVector.u[2] = ControlElement[VectorIndex2];
  1246. ControlVector.u[3] = ControlElement[VectorIndex3];
  1247. return ControlVector;
  1248. #endif
  1249. }
  1250. //------------------------------------------------------------------------------
  1251. XMFINLINE XMVECTOR XMVectorSelect
  1252. (
  1253. FXMVECTOR V1,
  1254. FXMVECTOR V2,
  1255. FXMVECTOR Control
  1256. )
  1257. {
  1258. #if defined(_XM_NO_INTRINSICS_)
  1259. XMVECTOR Result;
  1260. Result.u[0] = (V1.u[0] & ~Control.u[0]) | (V2.u[0] & Control.u[0]);
  1261. Result.u[1] = (V1.u[1] & ~Control.u[1]) | (V2.u[1] & Control.u[1]);
  1262. Result.u[2] = (V1.u[2] & ~Control.u[2]) | (V2.u[2] & Control.u[2]);
  1263. Result.u[3] = (V1.u[3] & ~Control.u[3]) | (V2.u[3] & Control.u[3]);
  1264. return Result;
  1265. #elif defined(_XM_SSE_INTRINSICS_)
  1266. XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
  1267. XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
  1268. return _mm_or_ps(vTemp1,vTemp2);
  1269. #else // _XM_VMX128_INTRINSICS_
  1270. #endif // _XM_VMX128_INTRINSICS_
  1271. }
  1272. //------------------------------------------------------------------------------
  1273. XMFINLINE XMVECTOR XMVectorMergeXY
  1274. (
  1275. FXMVECTOR V1,
  1276. FXMVECTOR V2
  1277. )
  1278. {
  1279. #if defined(_XM_NO_INTRINSICS_)
  1280. XMVECTOR Result;
  1281. Result.u[0] = V1.u[0];
  1282. Result.u[1] = V2.u[0];
  1283. Result.u[2] = V1.u[1];
  1284. Result.u[3] = V2.u[1];
  1285. return Result;
  1286. #elif defined(_XM_SSE_INTRINSICS_)
  1287. return _mm_unpacklo_ps( V1, V2 );
  1288. #else // _XM_VMX128_INTRINSICS_
  1289. #endif // _XM_VMX128_INTRINSICS_
  1290. }
  1291. //------------------------------------------------------------------------------
  1292. XMFINLINE XMVECTOR XMVectorMergeZW
  1293. (
  1294. FXMVECTOR V1,
  1295. FXMVECTOR V2
  1296. )
  1297. {
  1298. #if defined(_XM_NO_INTRINSICS_)
  1299. XMVECTOR Result;
  1300. Result.u[0] = V1.u[2];
  1301. Result.u[1] = V2.u[2];
  1302. Result.u[2] = V1.u[3];
  1303. Result.u[3] = V2.u[3];
  1304. return Result;
  1305. #elif defined(_XM_SSE_INTRINSICS_)
  1306. return _mm_unpackhi_ps( V1, V2 );
  1307. #else // _XM_VMX128_INTRINSICS_
  1308. #endif // _XM_VMX128_INTRINSICS_
  1309. }
  1310. //------------------------------------------------------------------------------
  1311. // Comparison operations
  1312. //------------------------------------------------------------------------------
  1313. //------------------------------------------------------------------------------
  1314. XMFINLINE XMVECTOR XMVectorEqual
  1315. (
  1316. FXMVECTOR V1,
  1317. FXMVECTOR V2
  1318. )
  1319. {
  1320. #if defined(_XM_NO_INTRINSICS_)
  1321. XMVECTOR Control;
  1322. Control.u[0] = (V1.v[0] == V2.v[0]) ? 0xFFFFFFFF : 0;
  1323. Control.u[1] = (V1.v[1] == V2.v[1]) ? 0xFFFFFFFF : 0;
  1324. Control.u[2] = (V1.v[2] == V2.v[2]) ? 0xFFFFFFFF : 0;
  1325. Control.u[3] = (V1.v[3] == V2.v[3]) ? 0xFFFFFFFF : 0;
  1326. return Control;
  1327. #elif defined(_XM_SSE_INTRINSICS_)
  1328. return _mm_cmpeq_ps( V1, V2 );
  1329. #else // _XM_VMX128_INTRINSICS_
  1330. #endif // _XM_VMX128_INTRINSICS_
  1331. }
  1332. //------------------------------------------------------------------------------
  1333. XMFINLINE XMVECTOR XMVectorEqualR
  1334. (
  1335. UINT* pCR,
  1336. FXMVECTOR V1,
  1337. FXMVECTOR V2
  1338. )
  1339. {
  1340. #if defined(_XM_NO_INTRINSICS_)
  1341. UINT ux, uy, uz, uw, CR;
  1342. XMVECTOR Control;
  1343. XMASSERT( pCR );
  1344. ux = (V1.x == V2.x) ? 0xFFFFFFFFU : 0;
  1345. uy = (V1.y == V2.y) ? 0xFFFFFFFFU : 0;
  1346. uz = (V1.z == V2.z) ? 0xFFFFFFFFU : 0;
  1347. uw = (V1.w == V2.w) ? 0xFFFFFFFFU : 0;
  1348. CR = 0;
  1349. if (ux&uy&uz&uw)
  1350. {
  1351. // All elements are greater
  1352. CR = XM_CRMASK_CR6TRUE;
  1353. }
  1354. else if (!(ux|uy|uz|uw))
  1355. {
  1356. // All elements are not greater
  1357. CR = XM_CRMASK_CR6FALSE;
  1358. }
  1359. *pCR = CR;
  1360. Control.u[0] = ux;
  1361. Control.u[1] = uy;
  1362. Control.u[2] = uz;
  1363. Control.u[3] = uw;
  1364. return Control;
  1365. #elif defined(_XM_SSE_INTRINSICS_)
  1366. XMASSERT( pCR );
  1367. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  1368. UINT CR = 0;
  1369. int iTest = _mm_movemask_ps(vTemp);
  1370. if (iTest==0xf)
  1371. {
  1372. CR = XM_CRMASK_CR6TRUE;
  1373. }
  1374. else if (!iTest)
  1375. {
  1376. // All elements are not greater
  1377. CR = XM_CRMASK_CR6FALSE;
  1378. }
  1379. *pCR = CR;
  1380. return vTemp;
  1381. #else // _XM_VMX128_INTRINSICS_
  1382. #endif // _XM_VMX128_INTRINSICS_
  1383. }
  1384. //------------------------------------------------------------------------------
  1385. // Treat the components of the vectors as unsigned integers and
  1386. // compare individual bits between the two. This is useful for
  1387. // comparing control vectors and result vectors returned from
  1388. // other comparison operations.
  1389. XMFINLINE XMVECTOR XMVectorEqualInt
  1390. (
  1391. FXMVECTOR V1,
  1392. FXMVECTOR V2
  1393. )
  1394. {
  1395. #if defined(_XM_NO_INTRINSICS_)
  1396. XMVECTOR Control;
  1397. Control.u[0] = (V1.u[0] == V2.u[0]) ? 0xFFFFFFFF : 0;
  1398. Control.u[1] = (V1.u[1] == V2.u[1]) ? 0xFFFFFFFF : 0;
  1399. Control.u[2] = (V1.u[2] == V2.u[2]) ? 0xFFFFFFFF : 0;
  1400. Control.u[3] = (V1.u[3] == V2.u[3]) ? 0xFFFFFFFF : 0;
  1401. return Control;
  1402. #elif defined(_XM_SSE_INTRINSICS_)
  1403. __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
  1404. return reinterpret_cast<__m128 *>(&V)[0];
  1405. #else // _XM_VMX128_INTRINSICS_
  1406. #endif // _XM_VMX128_INTRINSICS_
  1407. }
  1408. //------------------------------------------------------------------------------
  1409. XMFINLINE XMVECTOR XMVectorEqualIntR
  1410. (
  1411. UINT* pCR,
  1412. FXMVECTOR V1,
  1413. FXMVECTOR V2
  1414. )
  1415. {
  1416. #if defined(_XM_NO_INTRINSICS_)
  1417. XMVECTOR Control;
  1418. XMASSERT(pCR);
  1419. Control = XMVectorEqualInt(V1, V2);
  1420. *pCR = 0;
  1421. if (XMVector4EqualInt(Control, XMVectorTrueInt()))
  1422. {
  1423. // All elements are equal
  1424. *pCR |= XM_CRMASK_CR6TRUE;
  1425. }
  1426. else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
  1427. {
  1428. // All elements are not equal
  1429. *pCR |= XM_CRMASK_CR6FALSE;
  1430. }
  1431. return Control;
  1432. #elif defined(_XM_SSE_INTRINSICS_)
  1433. XMASSERT(pCR);
  1434. __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
  1435. int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
  1436. UINT CR = 0;
  1437. if (iTemp==0x0F)
  1438. {
  1439. CR = XM_CRMASK_CR6TRUE;
  1440. }
  1441. else if (!iTemp)
  1442. {
  1443. CR = XM_CRMASK_CR6FALSE;
  1444. }
  1445. *pCR = CR;
  1446. return reinterpret_cast<__m128 *>(&V)[0];
  1447. #else // _XM_VMX128_INTRINSICS_
  1448. #endif // _XM_VMX128_INTRINSICS_
  1449. }
  1450. //------------------------------------------------------------------------------
  1451. XMFINLINE XMVECTOR XMVectorNearEqual
  1452. (
  1453. FXMVECTOR V1,
  1454. FXMVECTOR V2,
  1455. FXMVECTOR Epsilon
  1456. )
  1457. {
  1458. #if defined(_XM_NO_INTRINSICS_)
  1459. FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw;
  1460. XMVECTOR Control;
  1461. fDeltax = V1.x-V2.x;
  1462. fDeltay = V1.y-V2.y;
  1463. fDeltaz = V1.z-V2.z;
  1464. fDeltaw = V1.w-V2.w;
  1465. fDeltax = fabsf(fDeltax);
  1466. fDeltay = fabsf(fDeltay);
  1467. fDeltaz = fabsf(fDeltaz);
  1468. fDeltaw = fabsf(fDeltaw);
  1469. Control.u[0] = (fDeltax <= Epsilon.x) ? 0xFFFFFFFFU : 0;
  1470. Control.u[1] = (fDeltay <= Epsilon.y) ? 0xFFFFFFFFU : 0;
  1471. Control.u[2] = (fDeltaz <= Epsilon.z) ? 0xFFFFFFFFU : 0;
  1472. Control.u[3] = (fDeltaw <= Epsilon.w) ? 0xFFFFFFFFU : 0;
  1473. return Control;
  1474. #elif defined(_XM_SSE_INTRINSICS_)
  1475. // Get the difference
  1476. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  1477. // Get the absolute value of the difference
  1478. XMVECTOR vTemp = _mm_setzero_ps();
  1479. vTemp = _mm_sub_ps(vTemp,vDelta);
  1480. vTemp = _mm_max_ps(vTemp,vDelta);
  1481. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  1482. return vTemp;
  1483. #else // _XM_VMX128_INTRINSICS_
  1484. #endif // _XM_VMX128_INTRINSICS_
  1485. }
  1486. //------------------------------------------------------------------------------
  1487. XMFINLINE XMVECTOR XMVectorNotEqual
  1488. (
  1489. FXMVECTOR V1,
  1490. FXMVECTOR V2
  1491. )
  1492. {
  1493. #if defined(_XM_NO_INTRINSICS_)
  1494. XMVECTOR Control;
  1495. Control.u[0] = (V1.v[0] != V2.v[0]) ? 0xFFFFFFFF : 0;
  1496. Control.u[1] = (V1.v[1] != V2.v[1]) ? 0xFFFFFFFF : 0;
  1497. Control.u[2] = (V1.v[2] != V2.v[2]) ? 0xFFFFFFFF : 0;
  1498. Control.u[3] = (V1.v[3] != V2.v[3]) ? 0xFFFFFFFF : 0;
  1499. return Control;
  1500. #elif defined(_XM_SSE_INTRINSICS_)
  1501. return _mm_cmpneq_ps( V1, V2 );
  1502. #else // _XM_VMX128_INTRINSICS_
  1503. #endif // _XM_VMX128_INTRINSICS_
  1504. }
  1505. //------------------------------------------------------------------------------
  1506. XMFINLINE XMVECTOR XMVectorNotEqualInt
  1507. (
  1508. FXMVECTOR V1,
  1509. FXMVECTOR V2
  1510. )
  1511. {
  1512. #if defined(_XM_NO_INTRINSICS_)
  1513. XMVECTOR Control;
  1514. Control.u[0] = (V1.u[0] != V2.u[0]) ? 0xFFFFFFFFU : 0;
  1515. Control.u[1] = (V1.u[1] != V2.u[1]) ? 0xFFFFFFFFU : 0;
  1516. Control.u[2] = (V1.u[2] != V2.u[2]) ? 0xFFFFFFFFU : 0;
  1517. Control.u[3] = (V1.u[3] != V2.u[3]) ? 0xFFFFFFFFU : 0;
  1518. return Control;
  1519. #elif defined(_XM_SSE_INTRINSICS_)
  1520. __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
  1521. return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
  1522. #else // _XM_VMX128_INTRINSICS_
  1523. #endif // _XM_VMX128_INTRINSICS_
  1524. }
  1525. //------------------------------------------------------------------------------
  1526. XMFINLINE XMVECTOR XMVectorGreater
  1527. (
  1528. FXMVECTOR V1,
  1529. FXMVECTOR V2
  1530. )
  1531. {
  1532. #if defined(_XM_NO_INTRINSICS_)
  1533. XMVECTOR Control;
  1534. Control.u[0] = (V1.v[0] > V2.v[0]) ? 0xFFFFFFFF : 0;
  1535. Control.u[1] = (V1.v[1] > V2.v[1]) ? 0xFFFFFFFF : 0;
  1536. Control.u[2] = (V1.v[2] > V2.v[2]) ? 0xFFFFFFFF : 0;
  1537. Control.u[3] = (V1.v[3] > V2.v[3]) ? 0xFFFFFFFF : 0;
  1538. return Control;
  1539. #elif defined(_XM_SSE_INTRINSICS_)
  1540. return _mm_cmpgt_ps( V1, V2 );
  1541. #else // _XM_VMX128_INTRINSICS_
  1542. #endif // _XM_VMX128_INTRINSICS_
  1543. }
  1544. //------------------------------------------------------------------------------
  1545. XMFINLINE XMVECTOR XMVectorGreaterR
  1546. (
  1547. UINT* pCR,
  1548. FXMVECTOR V1,
  1549. FXMVECTOR V2
  1550. )
  1551. {
  1552. #if defined(_XM_NO_INTRINSICS_)
  1553. UINT ux, uy, uz, uw, CR;
  1554. XMVECTOR Control;
  1555. XMASSERT( pCR );
  1556. ux = (V1.x > V2.x) ? 0xFFFFFFFFU : 0;
  1557. uy = (V1.y > V2.y) ? 0xFFFFFFFFU : 0;
  1558. uz = (V1.z > V2.z) ? 0xFFFFFFFFU : 0;
  1559. uw = (V1.w > V2.w) ? 0xFFFFFFFFU : 0;
  1560. CR = 0;
  1561. if (ux&uy&uz&uw)
  1562. {
  1563. // All elements are greater
  1564. CR = XM_CRMASK_CR6TRUE;
  1565. }
  1566. else if (!(ux|uy|uz|uw))
  1567. {
  1568. // All elements are not greater
  1569. CR = XM_CRMASK_CR6FALSE;
  1570. }
  1571. *pCR = CR;
  1572. Control.u[0] = ux;
  1573. Control.u[1] = uy;
  1574. Control.u[2] = uz;
  1575. Control.u[3] = uw;
  1576. return Control;
  1577. #elif defined(_XM_SSE_INTRINSICS_)
  1578. XMASSERT( pCR );
  1579. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  1580. UINT CR = 0;
  1581. int iTest = _mm_movemask_ps(vTemp);
  1582. if (iTest==0xf)
  1583. {
  1584. CR = XM_CRMASK_CR6TRUE;
  1585. }
  1586. else if (!iTest)
  1587. {
  1588. // All elements are not greater
  1589. CR = XM_CRMASK_CR6FALSE;
  1590. }
  1591. *pCR = CR;
  1592. return vTemp;
  1593. #else // _XM_VMX128_INTRINSICS_
  1594. #endif // _XM_VMX128_INTRINSICS_
  1595. }
  1596. //------------------------------------------------------------------------------
  1597. XMFINLINE XMVECTOR XMVectorGreaterOrEqual
  1598. (
  1599. FXMVECTOR V1,
  1600. FXMVECTOR V2
  1601. )
  1602. {
  1603. #if defined(_XM_NO_INTRINSICS_)
  1604. XMVECTOR Control;
  1605. Control.u[0] = (V1.v[0] >= V2.v[0]) ? 0xFFFFFFFF : 0;
  1606. Control.u[1] = (V1.v[1] >= V2.v[1]) ? 0xFFFFFFFF : 0;
  1607. Control.u[2] = (V1.v[2] >= V2.v[2]) ? 0xFFFFFFFF : 0;
  1608. Control.u[3] = (V1.v[3] >= V2.v[3]) ? 0xFFFFFFFF : 0;
  1609. return Control;
  1610. #elif defined(_XM_SSE_INTRINSICS_)
  1611. return _mm_cmpge_ps( V1, V2 );
  1612. #else // _XM_VMX128_INTRINSICS_
  1613. #endif // _XM_VMX128_INTRINSICS_
  1614. }
  1615. //------------------------------------------------------------------------------
  1616. XMFINLINE XMVECTOR XMVectorGreaterOrEqualR
  1617. (
  1618. UINT* pCR,
  1619. FXMVECTOR V1,
  1620. FXMVECTOR V2
  1621. )
  1622. {
  1623. #if defined(_XM_NO_INTRINSICS_)
  1624. UINT ux, uy, uz, uw, CR;
  1625. XMVECTOR Control;
  1626. XMASSERT( pCR );
  1627. ux = (V1.x >= V2.x) ? 0xFFFFFFFFU : 0;
  1628. uy = (V1.y >= V2.y) ? 0xFFFFFFFFU : 0;
  1629. uz = (V1.z >= V2.z) ? 0xFFFFFFFFU : 0;
  1630. uw = (V1.w >= V2.w) ? 0xFFFFFFFFU : 0;
  1631. CR = 0;
  1632. if (ux&uy&uz&uw)
  1633. {
  1634. // All elements are greater
  1635. CR = XM_CRMASK_CR6TRUE;
  1636. }
  1637. else if (!(ux|uy|uz|uw))
  1638. {
  1639. // All elements are not greater
  1640. CR = XM_CRMASK_CR6FALSE;
  1641. }
  1642. *pCR = CR;
  1643. Control.u[0] = ux;
  1644. Control.u[1] = uy;
  1645. Control.u[2] = uz;
  1646. Control.u[3] = uw;
  1647. return Control;
  1648. #elif defined(_XM_SSE_INTRINSICS_)
  1649. XMASSERT( pCR );
  1650. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  1651. UINT CR = 0;
  1652. int iTest = _mm_movemask_ps(vTemp);
  1653. if (iTest==0xf)
  1654. {
  1655. CR = XM_CRMASK_CR6TRUE;
  1656. }
  1657. else if (!iTest)
  1658. {
  1659. // All elements are not greater
  1660. CR = XM_CRMASK_CR6FALSE;
  1661. }
  1662. *pCR = CR;
  1663. return vTemp;
  1664. #else // _XM_VMX128_INTRINSICS_
  1665. #endif // _XM_VMX128_INTRINSICS_
  1666. }
  1667. //------------------------------------------------------------------------------
  1668. XMFINLINE XMVECTOR XMVectorLess
  1669. (
  1670. FXMVECTOR V1,
  1671. FXMVECTOR V2
  1672. )
  1673. {
  1674. #if defined(_XM_NO_INTRINSICS_)
  1675. XMVECTOR Control;
  1676. Control.u[0] = (V1.v[0] < V2.v[0]) ? 0xFFFFFFFF : 0;
  1677. Control.u[1] = (V1.v[1] < V2.v[1]) ? 0xFFFFFFFF : 0;
  1678. Control.u[2] = (V1.v[2] < V2.v[2]) ? 0xFFFFFFFF : 0;
  1679. Control.u[3] = (V1.v[3] < V2.v[3]) ? 0xFFFFFFFF : 0;
  1680. return Control;
  1681. #elif defined(_XM_SSE_INTRINSICS_)
  1682. return _mm_cmplt_ps( V1, V2 );
  1683. #else // _XM_VMX128_INTRINSICS_
  1684. #endif // _XM_VMX128_INTRINSICS_
  1685. }
  1686. //------------------------------------------------------------------------------
  1687. XMFINLINE XMVECTOR XMVectorLessOrEqual
  1688. (
  1689. FXMVECTOR V1,
  1690. FXMVECTOR V2
  1691. )
  1692. {
  1693. #if defined(_XM_NO_INTRINSICS_)
  1694. XMVECTOR Control;
  1695. Control.u[0] = (V1.v[0] <= V2.v[0]) ? 0xFFFFFFFF : 0;
  1696. Control.u[1] = (V1.v[1] <= V2.v[1]) ? 0xFFFFFFFF : 0;
  1697. Control.u[2] = (V1.v[2] <= V2.v[2]) ? 0xFFFFFFFF : 0;
  1698. Control.u[3] = (V1.v[3] <= V2.v[3]) ? 0xFFFFFFFF : 0;
  1699. return Control;
  1700. #elif defined(_XM_SSE_INTRINSICS_)
  1701. return _mm_cmple_ps( V1, V2 );
  1702. #else // _XM_VMX128_INTRINSICS_
  1703. #endif // _XM_VMX128_INTRINSICS_
  1704. }
  1705. //------------------------------------------------------------------------------
  1706. XMFINLINE XMVECTOR XMVectorInBounds
  1707. (
  1708. FXMVECTOR V,
  1709. FXMVECTOR Bounds
  1710. )
  1711. {
  1712. #if defined(_XM_NO_INTRINSICS_)
  1713. XMVECTOR Control;
  1714. Control.u[0] = (V.x <= Bounds.x && V.x >= -Bounds.x) ? 0xFFFFFFFF : 0;
  1715. Control.u[1] = (V.y <= Bounds.y && V.y >= -Bounds.y) ? 0xFFFFFFFF : 0;
  1716. Control.u[2] = (V.z <= Bounds.z && V.z >= -Bounds.z) ? 0xFFFFFFFF : 0;
  1717. Control.u[3] = (V.w <= Bounds.w && V.w >= -Bounds.w) ? 0xFFFFFFFF : 0;
  1718. return Control;
  1719. #elif defined(_XM_SSE_INTRINSICS_)
  1720. // Test if less than or equal
  1721. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  1722. // Negate the bounds
  1723. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  1724. // Test if greater or equal (Reversed)
  1725. vTemp2 = _mm_cmple_ps(vTemp2,V);
  1726. // Blend answers
  1727. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  1728. return vTemp1;
  1729. #else // _XM_VMX128_INTRINSICS_
  1730. #endif // _XM_VMX128_INTRINSICS_
  1731. }
  1732. //------------------------------------------------------------------------------
  1733. XMFINLINE XMVECTOR XMVectorInBoundsR
  1734. (
  1735. UINT* pCR,
  1736. FXMVECTOR V,
  1737. FXMVECTOR Bounds
  1738. )
  1739. {
  1740. #if defined(_XM_NO_INTRINSICS_)
  1741. UINT ux, uy, uz, uw, CR;
  1742. XMVECTOR Control;
  1743. XMASSERT( pCR != 0 );
  1744. ux = (V.x <= Bounds.x && V.x >= -Bounds.x) ? 0xFFFFFFFFU : 0;
  1745. uy = (V.y <= Bounds.y && V.y >= -Bounds.y) ? 0xFFFFFFFFU : 0;
  1746. uz = (V.z <= Bounds.z && V.z >= -Bounds.z) ? 0xFFFFFFFFU : 0;
  1747. uw = (V.w <= Bounds.w && V.w >= -Bounds.w) ? 0xFFFFFFFFU : 0;
  1748. CR = 0;
  1749. if (ux&uy&uz&uw)
  1750. {
  1751. // All elements are in bounds
  1752. CR = XM_CRMASK_CR6BOUNDS;
  1753. }
  1754. *pCR = CR;
  1755. Control.u[0] = ux;
  1756. Control.u[1] = uy;
  1757. Control.u[2] = uz;
  1758. Control.u[3] = uw;
  1759. return Control;
  1760. #elif defined(_XM_SSE_INTRINSICS_)
  1761. XMASSERT( pCR != 0 );
  1762. // Test if less than or equal
  1763. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  1764. // Negate the bounds
  1765. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  1766. // Test if greater or equal (Reversed)
  1767. vTemp2 = _mm_cmple_ps(vTemp2,V);
  1768. // Blend answers
  1769. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  1770. UINT CR = 0;
  1771. if (_mm_movemask_ps(vTemp1)==0xf) {
  1772. // All elements are in bounds
  1773. CR = XM_CRMASK_CR6BOUNDS;
  1774. }
  1775. *pCR = CR;
  1776. return vTemp1;
  1777. #else // _XM_VMX128_INTRINSICS_
  1778. #endif // _XM_VMX128_INTRINSICS_
  1779. }
  1780. //------------------------------------------------------------------------------
  1781. XMFINLINE XMVECTOR XMVectorIsNaN
  1782. (
  1783. FXMVECTOR V
  1784. )
  1785. {
  1786. #if defined(_XM_NO_INTRINSICS_)
  1787. XMVECTOR Control;
  1788. Control.u[0] = XMISNAN(V.x) ? 0xFFFFFFFFU : 0;
  1789. Control.u[1] = XMISNAN(V.y) ? 0xFFFFFFFFU : 0;
  1790. Control.u[2] = XMISNAN(V.z) ? 0xFFFFFFFFU : 0;
  1791. Control.u[3] = XMISNAN(V.w) ? 0xFFFFFFFFU : 0;
  1792. return Control;
  1793. #elif defined(_XM_SSE_INTRINSICS_)
  1794. // Mask off the exponent
  1795. __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
  1796. // Mask off the mantissa
  1797. __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
  1798. // Are any of the exponents == 0x7F800000?
  1799. vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
  1800. // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
  1801. vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
  1802. // Perform a not on the NaN test to be true on NON-zero mantissas
  1803. vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
  1804. // If any are NaN, the signs are true after the merge above
  1805. return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0];
  1806. #else // _XM_VMX128_INTRINSICS_
  1807. #endif // _XM_VMX128_INTRINSICS_
  1808. }
  1809. //------------------------------------------------------------------------------
  1810. XMFINLINE XMVECTOR XMVectorIsInfinite
  1811. (
  1812. FXMVECTOR V
  1813. )
  1814. {
  1815. #if defined(_XM_NO_INTRINSICS_)
  1816. XMVECTOR Control;
  1817. Control.u[0] = XMISINF(V.x) ? 0xFFFFFFFFU : 0;
  1818. Control.u[1] = XMISINF(V.y) ? 0xFFFFFFFFU : 0;
  1819. Control.u[2] = XMISINF(V.z) ? 0xFFFFFFFFU : 0;
  1820. Control.u[3] = XMISINF(V.w) ? 0xFFFFFFFFU : 0;
  1821. return Control;
  1822. #elif defined(_XM_SSE_INTRINSICS_)
  1823. // Mask off the sign bit
  1824. __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
  1825. // Compare to infinity
  1826. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  1827. // If any are infinity, the signs are true.
  1828. return vTemp;
  1829. #else // _XM_VMX128_INTRINSICS_
  1830. #endif // _XM_VMX128_INTRINSICS_
  1831. }
  1832. //------------------------------------------------------------------------------
  1833. // Rounding and clamping operations
  1834. //------------------------------------------------------------------------------
  1835. //------------------------------------------------------------------------------
  1836. XMFINLINE XMVECTOR XMVectorMin
  1837. (
  1838. FXMVECTOR V1,
  1839. FXMVECTOR V2
  1840. )
  1841. {
  1842. #if defined(_XM_NO_INTRINSICS_)
  1843. XMVECTOR Result;
  1844. Result.v[0] = (V1.v[0] < V2.v[0]) ? V1.v[0] : V2.v[0];
  1845. Result.v[1] = (V1.v[1] < V2.v[1]) ? V1.v[1] : V2.v[1];
  1846. Result.v[2] = (V1.v[2] < V2.v[2]) ? V1.v[2] : V2.v[2];
  1847. Result.v[3] = (V1.v[3] < V2.v[3]) ? V1.v[3] : V2.v[3];
  1848. return Result;
  1849. #elif defined(_XM_SSE_INTRINSICS_)
  1850. return _mm_min_ps( V1, V2 );
  1851. #else // _XM_VMX128_INTRINSICS_
  1852. #endif // _XM_VMX128_INTRINSICS_
  1853. }
  1854. //------------------------------------------------------------------------------
  1855. XMFINLINE XMVECTOR XMVectorMax
  1856. (
  1857. FXMVECTOR V1,
  1858. FXMVECTOR V2
  1859. )
  1860. {
  1861. #if defined(_XM_NO_INTRINSICS_)
  1862. XMVECTOR Result;
  1863. Result.v[0] = (V1.v[0] > V2.v[0]) ? V1.v[0] : V2.v[0];
  1864. Result.v[1] = (V1.v[1] > V2.v[1]) ? V1.v[1] : V2.v[1];
  1865. Result.v[2] = (V1.v[2] > V2.v[2]) ? V1.v[2] : V2.v[2];
  1866. Result.v[3] = (V1.v[3] > V2.v[3]) ? V1.v[3] : V2.v[3];
  1867. return Result;
  1868. #elif defined(_XM_SSE_INTRINSICS_)
  1869. return _mm_max_ps( V1, V2 );
  1870. #else // _XM_VMX128_INTRINSICS_
  1871. #endif // _XM_VMX128_INTRINSICS_
  1872. }
  1873. //------------------------------------------------------------------------------
  1874. XMFINLINE XMVECTOR XMVectorRound
  1875. (
  1876. FXMVECTOR V
  1877. )
  1878. {
  1879. #if defined(_XM_NO_INTRINSICS_)
  1880. XMVECTOR Result;
  1881. XMVECTOR Bias;
  1882. CONST XMVECTOR Zero = XMVectorZero();
  1883. CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f);
  1884. CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
  1885. Bias = XMVectorLess(V, Zero);
  1886. Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
  1887. Result = XMVectorAdd(V, Bias);
  1888. Result = XMVectorTruncate(Result);
  1889. return Result;
  1890. #elif defined(_XM_SSE_INTRINSICS_)
  1891. // To handle NAN, INF and numbers greater than 8388608, use masking
  1892. // Get the abs value
  1893. __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
  1894. // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
  1895. vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
  1896. // Convert to int and back to float for rounding
  1897. __m128i vInt = _mm_cvtps_epi32(V);
  1898. // Convert back to floats
  1899. XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
  1900. // All numbers less than 8388608 will use the round to int
  1901. vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1902. // All others, use the ORIGINAL value
  1903. vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
  1904. vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1905. return vResult;
  1906. #else // _XM_VMX128_INTRINSICS_
  1907. #endif // _XM_VMX128_INTRINSICS_
  1908. }
  1909. //------------------------------------------------------------------------------
  1910. XMFINLINE XMVECTOR XMVectorTruncate
  1911. (
  1912. FXMVECTOR V
  1913. )
  1914. {
  1915. #if defined(_XM_NO_INTRINSICS_)
  1916. XMVECTOR Result;
  1917. Result.x = (FLOAT)((INT)V.x);
  1918. Result.y = (FLOAT)((INT)V.y);
  1919. Result.z = (FLOAT)((INT)V.z);
  1920. Result.w = (FLOAT)((INT)V.w);
  1921. return Result;
  1922. #elif defined(_XM_SSE_INTRINSICS_)
  1923. // To handle NAN, INF and numbers greater than 8388608, use masking
  1924. // Get the abs value
  1925. __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
  1926. // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
  1927. vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
  1928. // Convert to int and back to float for rounding with truncation
  1929. __m128i vInt = _mm_cvttps_epi32(V);
  1930. // Convert back to floats
  1931. XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
  1932. // All numbers less than 8388608 will use the round to int
  1933. vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1934. // All others, use the ORIGINAL value
  1935. vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
  1936. vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1937. return vResult;
  1938. #else // _XM_VMX128_INTRINSICS_
  1939. #endif // _XM_VMX128_INTRINSICS_
  1940. }
  1941. //------------------------------------------------------------------------------
  1942. XMFINLINE XMVECTOR XMVectorFloor
  1943. (
  1944. FXMVECTOR V
  1945. )
  1946. {
  1947. #if defined(_XM_NO_INTRINSICS_)
  1948. XMVECTOR vResult = {
  1949. floorf(V.x),
  1950. floorf(V.y),
  1951. floorf(V.z),
  1952. floorf(V.w)
  1953. };
  1954. return vResult;
  1955. #elif defined(_XM_SSE_INTRINSICS_)
  1956. XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
  1957. __m128i vInt = _mm_cvtps_epi32(vResult);
  1958. vResult = _mm_cvtepi32_ps(vInt);
  1959. return vResult;
  1960. #else // _XM_VMX128_INTRINSICS_
  1961. #endif // _XM_VMX128_INTRINSICS_
  1962. }
  1963. //------------------------------------------------------------------------------
  1964. XMFINLINE XMVECTOR XMVectorCeiling
  1965. (
  1966. FXMVECTOR V
  1967. )
  1968. {
  1969. #if defined(_XM_NO_INTRINSICS_)
  1970. XMVECTOR vResult = {
  1971. ceilf(V.x),
  1972. ceilf(V.y),
  1973. ceilf(V.z),
  1974. ceilf(V.w)
  1975. };
  1976. return vResult;
  1977. #elif defined(_XM_SSE_INTRINSICS_)
  1978. XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
  1979. __m128i vInt = _mm_cvtps_epi32(vResult);
  1980. vResult = _mm_cvtepi32_ps(vInt);
  1981. return vResult;
  1982. #else // _XM_VMX128_INTRINSICS_
  1983. #endif // _XM_VMX128_INTRINSICS_
  1984. }
  1985. //------------------------------------------------------------------------------
  1986. XMFINLINE XMVECTOR XMVectorClamp
  1987. (
  1988. FXMVECTOR V,
  1989. FXMVECTOR Min,
  1990. FXMVECTOR Max
  1991. )
  1992. {
  1993. #if defined(_XM_NO_INTRINSICS_)
  1994. XMVECTOR Result;
  1995. XMASSERT(XMVector4LessOrEqual(Min, Max));
  1996. Result = XMVectorMax(Min, V);
  1997. Result = XMVectorMin(Max, Result);
  1998. return Result;
  1999. #elif defined(_XM_SSE_INTRINSICS_)
  2000. XMVECTOR vResult;
  2001. XMASSERT(XMVector4LessOrEqual(Min, Max));
  2002. vResult = _mm_max_ps(Min,V);
  2003. vResult = _mm_min_ps(vResult,Max);
  2004. return vResult;
  2005. #else // _XM_VMX128_INTRINSICS_
  2006. #endif // _XM_VMX128_INTRINSICS_
  2007. }
  2008. //------------------------------------------------------------------------------
  2009. XMFINLINE XMVECTOR XMVectorSaturate
  2010. (
  2011. FXMVECTOR V
  2012. )
  2013. {
  2014. #if defined(_XM_NO_INTRINSICS_)
  2015. CONST XMVECTOR Zero = XMVectorZero();
  2016. return XMVectorClamp(V, Zero, g_XMOne.v);
  2017. #elif defined(_XM_SSE_INTRINSICS_)
  2018. // Set <0 to 0
  2019. XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
  2020. // Set>1 to 1
  2021. return _mm_min_ps(vResult,g_XMOne);
  2022. #else // _XM_VMX128_INTRINSICS_
  2023. #endif // _XM_VMX128_INTRINSICS_
  2024. }
  2025. //------------------------------------------------------------------------------
  2026. // Bitwise logical operations
  2027. //------------------------------------------------------------------------------
  2028. XMFINLINE XMVECTOR XMVectorAndInt
  2029. (
  2030. FXMVECTOR V1,
  2031. FXMVECTOR V2
  2032. )
  2033. {
  2034. #if defined(_XM_NO_INTRINSICS_)
  2035. XMVECTOR Result;
  2036. Result.u[0] = V1.u[0] & V2.u[0];
  2037. Result.u[1] = V1.u[1] & V2.u[1];
  2038. Result.u[2] = V1.u[2] & V2.u[2];
  2039. Result.u[3] = V1.u[3] & V2.u[3];
  2040. return Result;
  2041. #elif defined(_XM_SSE_INTRINSICS_)
  2042. return _mm_and_ps(V1,V2);
  2043. #else // _XM_VMX128_INTRINSICS_
  2044. #endif // _XM_VMX128_INTRINSICS_
  2045. }
  2046. //------------------------------------------------------------------------------
  2047. XMFINLINE XMVECTOR XMVectorAndCInt
  2048. (
  2049. FXMVECTOR V1,
  2050. FXMVECTOR V2
  2051. )
  2052. {
  2053. #if defined(_XM_NO_INTRINSICS_)
  2054. XMVECTOR Result;
  2055. Result.u[0] = V1.u[0] & ~V2.u[0];
  2056. Result.u[1] = V1.u[1] & ~V2.u[1];
  2057. Result.u[2] = V1.u[2] & ~V2.u[2];
  2058. Result.u[3] = V1.u[3] & ~V2.u[3];
  2059. return Result;
  2060. #elif defined(_XM_SSE_INTRINSICS_)
  2061. __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] );
  2062. return reinterpret_cast<__m128 *>(&V)[0];
  2063. #else // _XM_VMX128_INTRINSICS_
  2064. #endif // _XM_VMX128_INTRINSICS_
  2065. }
  2066. //------------------------------------------------------------------------------
  2067. XMFINLINE XMVECTOR XMVectorOrInt
  2068. (
  2069. FXMVECTOR V1,
  2070. FXMVECTOR V2
  2071. )
  2072. {
  2073. #if defined(_XM_NO_INTRINSICS_)
  2074. XMVECTOR Result;
  2075. Result.u[0] = V1.u[0] | V2.u[0];
  2076. Result.u[1] = V1.u[1] | V2.u[1];
  2077. Result.u[2] = V1.u[2] | V2.u[2];
  2078. Result.u[3] = V1.u[3] | V2.u[3];
  2079. return Result;
  2080. #elif defined(_XM_SSE_INTRINSICS_)
  2081. __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
  2082. return reinterpret_cast<__m128 *>(&V)[0];
  2083. #else // _XM_VMX128_INTRINSICS_
  2084. #endif // _XM_VMX128_INTRINSICS_
  2085. }
  2086. //------------------------------------------------------------------------------
  2087. XMFINLINE XMVECTOR XMVectorNorInt
  2088. (
  2089. FXMVECTOR V1,
  2090. FXMVECTOR V2
  2091. )
  2092. {
  2093. #if defined(_XM_NO_INTRINSICS_)
  2094. XMVECTOR Result;
  2095. Result.u[0] = ~(V1.u[0] | V2.u[0]);
  2096. Result.u[1] = ~(V1.u[1] | V2.u[1]);
  2097. Result.u[2] = ~(V1.u[2] | V2.u[2]);
  2098. Result.u[3] = ~(V1.u[3] | V2.u[3]);
  2099. return Result;
  2100. #elif defined(_XM_SSE_INTRINSICS_)
  2101. __m128i Result;
  2102. Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
  2103. Result = _mm_andnot_si128( Result,g_XMNegOneMask);
  2104. return reinterpret_cast<__m128 *>(&Result)[0];
  2105. #else // _XM_VMX128_INTRINSICS_
  2106. #endif // _XM_VMX128_INTRINSICS_
  2107. }
  2108. //------------------------------------------------------------------------------
  2109. XMFINLINE XMVECTOR XMVectorXorInt
  2110. (
  2111. FXMVECTOR V1,
  2112. FXMVECTOR V2
  2113. )
  2114. {
  2115. #if defined(_XM_NO_INTRINSICS_)
  2116. XMVECTOR Result;
  2117. Result.u[0] = V1.u[0] ^ V2.u[0];
  2118. Result.u[1] = V1.u[1] ^ V2.u[1];
  2119. Result.u[2] = V1.u[2] ^ V2.u[2];
  2120. Result.u[3] = V1.u[3] ^ V2.u[3];
  2121. return Result;
  2122. #elif defined(_XM_SSE_INTRINSICS_)
  2123. __m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
  2124. return reinterpret_cast<__m128 *>(&V)[0];
  2125. #else // _XM_VMX128_INTRINSICS_
  2126. #endif // _XM_VMX128_INTRINSICS_
  2127. }
  2128. //------------------------------------------------------------------------------
  2129. // Computation operations
  2130. //------------------------------------------------------------------------------
  2131. //------------------------------------------------------------------------------
  2132. XMFINLINE XMVECTOR XMVectorNegate
  2133. (
  2134. FXMVECTOR V
  2135. )
  2136. {
  2137. #if defined(_XM_NO_INTRINSICS_)
  2138. XMVECTOR Result;
  2139. Result.v[0] = -V.v[0];
  2140. Result.v[1] = -V.v[1];
  2141. Result.v[2] = -V.v[2];
  2142. Result.v[3] = -V.v[3];
  2143. return Result;
  2144. #elif defined(_XM_SSE_INTRINSICS_)
  2145. XMVECTOR Z;
  2146. Z = _mm_setzero_ps();
  2147. return _mm_sub_ps( Z, V );
  2148. #else // _XM_VMX128_INTRINSICS_
  2149. #endif // _XM_VMX128_INTRINSICS_
  2150. }
  2151. //------------------------------------------------------------------------------
  2152. XMFINLINE XMVECTOR XMVectorAdd
  2153. (
  2154. FXMVECTOR V1,
  2155. FXMVECTOR V2
  2156. )
  2157. {
  2158. #if defined(_XM_NO_INTRINSICS_)
  2159. XMVECTOR Result;
  2160. Result.v[0] = V1.v[0] + V2.v[0];
  2161. Result.v[1] = V1.v[1] + V2.v[1];
  2162. Result.v[2] = V1.v[2] + V2.v[2];
  2163. Result.v[3] = V1.v[3] + V2.v[3];
  2164. return Result;
  2165. #elif defined(_XM_SSE_INTRINSICS_)
  2166. return _mm_add_ps( V1, V2 );
  2167. #else // _XM_VMX128_INTRINSICS_
  2168. #endif // _XM_VMX128_INTRINSICS_
  2169. }
  2170. //------------------------------------------------------------------------------
  2171. XMFINLINE XMVECTOR XMVectorAddAngles
  2172. (
  2173. FXMVECTOR V1,
  2174. FXMVECTOR V2
  2175. )
  2176. {
  2177. #if defined(_XM_NO_INTRINSICS_)
  2178. XMVECTOR Mask;
  2179. XMVECTOR Offset;
  2180. XMVECTOR Result;
  2181. CONST XMVECTOR Zero = XMVectorZero();
  2182. // Add the given angles together. If the range of V1 is such
  2183. // that -Pi <= V1 < Pi and the range of V2 is such that
  2184. // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
  2185. // will be -Pi <= Result < Pi.
  2186. Result = XMVectorAdd(V1, V2);
  2187. Mask = XMVectorLess(Result, g_XMNegativePi.v);
  2188. Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
  2189. Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
  2190. Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
  2191. Result = XMVectorAdd(Result, Offset);
  2192. return Result;
  2193. #elif defined(_XM_SSE_INTRINSICS_)
  2194. // Adjust the angles
  2195. XMVECTOR vResult = _mm_add_ps(V1,V2);
  2196. // Less than Pi?
  2197. XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
  2198. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2199. // Add 2Pi to all entries less than -Pi
  2200. vResult = _mm_add_ps(vResult,vOffset);
  2201. // Greater than or equal to Pi?
  2202. vOffset = _mm_cmpge_ps(vResult,g_XMPi);
  2203. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2204. // Sub 2Pi to all entries greater than Pi
  2205. vResult = _mm_sub_ps(vResult,vOffset);
  2206. return vResult;
  2207. #else // _XM_VMX128_INTRINSICS_
  2208. #endif // _XM_VMX128_INTRINSICS_
  2209. }
  2210. //------------------------------------------------------------------------------
  2211. XMFINLINE XMVECTOR XMVectorSubtract
  2212. (
  2213. FXMVECTOR V1,
  2214. FXMVECTOR V2
  2215. )
  2216. {
  2217. #if defined(_XM_NO_INTRINSICS_)
  2218. XMVECTOR Result;
  2219. Result.x = V1.x - V2.x;
  2220. Result.y = V1.y - V2.y;
  2221. Result.z = V1.z - V2.z;
  2222. Result.w = V1.w - V2.w;
  2223. return Result;
  2224. #elif defined(_XM_SSE_INTRINSICS_)
  2225. return _mm_sub_ps( V1, V2 );
  2226. #else // _XM_VMX128_INTRINSICS_
  2227. #endif // _XM_VMX128_INTRINSICS_
  2228. }
  2229. //------------------------------------------------------------------------------
  2230. XMFINLINE XMVECTOR XMVectorSubtractAngles
  2231. (
  2232. FXMVECTOR V1,
  2233. FXMVECTOR V2
  2234. )
  2235. {
  2236. #if defined(_XM_NO_INTRINSICS_)
  2237. XMVECTOR Mask;
  2238. XMVECTOR Offset;
  2239. XMVECTOR Result;
  2240. CONST XMVECTOR Zero = XMVectorZero();
  2241. // Subtract the given angles. If the range of V1 is such
  2242. // that -Pi <= V1 < Pi and the range of V2 is such that
  2243. // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
  2244. // will be -Pi <= Result < Pi.
  2245. Result = XMVectorSubtract(V1, V2);
  2246. Mask = XMVectorLess(Result, g_XMNegativePi.v);
  2247. Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
  2248. Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
  2249. Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
  2250. Result = XMVectorAdd(Result, Offset);
  2251. return Result;
  2252. #elif defined(_XM_SSE_INTRINSICS_)
  2253. // Adjust the angles
  2254. XMVECTOR vResult = _mm_sub_ps(V1,V2);
  2255. // Less than Pi?
  2256. XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
  2257. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2258. // Add 2Pi to all entries less than -Pi
  2259. vResult = _mm_add_ps(vResult,vOffset);
  2260. // Greater than or equal to Pi?
  2261. vOffset = _mm_cmpge_ps(vResult,g_XMPi);
  2262. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2263. // Sub 2Pi to all entries greater than Pi
  2264. vResult = _mm_sub_ps(vResult,vOffset);
  2265. return vResult;
  2266. #else // _XM_VMX128_INTRINSICS_
  2267. #endif // _XM_VMX128_INTRINSICS_
  2268. }
  2269. //------------------------------------------------------------------------------
  2270. XMFINLINE XMVECTOR XMVectorMultiply
  2271. (
  2272. FXMVECTOR V1,
  2273. FXMVECTOR V2
  2274. )
  2275. {
  2276. #if defined(_XM_NO_INTRINSICS_)
  2277. XMVECTOR Result = {
  2278. V1.x * V2.x,
  2279. V1.y * V2.y,
  2280. V1.z * V2.z,
  2281. V1.w * V2.w
  2282. };
  2283. return Result;
  2284. #elif defined(_XM_SSE_INTRINSICS_)
  2285. return _mm_mul_ps( V1, V2 );
  2286. #else // _XM_VMX128_INTRINSICS_
  2287. #endif // _XM_VMX128_INTRINSICS_
  2288. }
  2289. //------------------------------------------------------------------------------
  2290. XMFINLINE XMVECTOR XMVectorMultiplyAdd
  2291. (
  2292. FXMVECTOR V1,
  2293. FXMVECTOR V2,
  2294. FXMVECTOR V3
  2295. )
  2296. {
  2297. #if defined(_XM_NO_INTRINSICS_)
  2298. XMVECTOR vResult = {
  2299. (V1.x * V2.x) + V3.x,
  2300. (V1.y * V2.y) + V3.y,
  2301. (V1.z * V2.z) + V3.z,
  2302. (V1.w * V2.w) + V3.w
  2303. };
  2304. return vResult;
  2305. #elif defined(_XM_SSE_INTRINSICS_)
  2306. XMVECTOR vResult = _mm_mul_ps( V1, V2 );
  2307. return _mm_add_ps(vResult, V3 );
  2308. #else // _XM_VMX128_INTRINSICS_
  2309. #endif // _XM_VMX128_INTRINSICS_
  2310. }
  2311. //------------------------------------------------------------------------------
  2312. XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract
  2313. (
  2314. FXMVECTOR V1,
  2315. FXMVECTOR V2,
  2316. FXMVECTOR V3
  2317. )
  2318. {
  2319. #if defined(_XM_NO_INTRINSICS_)
  2320. XMVECTOR vResult = {
  2321. V3.x - (V1.x * V2.x),
  2322. V3.y - (V1.y * V2.y),
  2323. V3.z - (V1.z * V2.z),
  2324. V3.w - (V1.w * V2.w)
  2325. };
  2326. return vResult;
  2327. #elif defined(_XM_SSE_INTRINSICS_)
  2328. XMVECTOR R = _mm_mul_ps( V1, V2 );
  2329. return _mm_sub_ps( V3, R );
  2330. #else // _XM_VMX128_INTRINSICS_
  2331. #endif // _XM_VMX128_INTRINSICS_
  2332. }
  2333. //------------------------------------------------------------------------------
  2334. XMFINLINE XMVECTOR XMVectorScale
  2335. (
  2336. FXMVECTOR V,
  2337. FLOAT ScaleFactor
  2338. )
  2339. {
  2340. #if defined(_XM_NO_INTRINSICS_)
  2341. XMVECTOR vResult = {
  2342. V.x * ScaleFactor,
  2343. V.y * ScaleFactor,
  2344. V.z * ScaleFactor,
  2345. V.w * ScaleFactor
  2346. };
  2347. return vResult;
  2348. #elif defined(_XM_SSE_INTRINSICS_)
  2349. XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
  2350. return _mm_mul_ps(vResult,V);
  2351. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  2352. #endif // _XM_VMX128_INTRINSICS_
  2353. }
  2354. //------------------------------------------------------------------------------
  2355. XMFINLINE XMVECTOR XMVectorReciprocalEst
  2356. (
  2357. FXMVECTOR V
  2358. )
  2359. {
  2360. #if defined(_XM_NO_INTRINSICS_)
  2361. XMVECTOR Result;
  2362. UINT i;
  2363. // Avoid C4701
  2364. Result.v[0] = 0.0f;
  2365. for (i = 0; i < 4; i++)
  2366. {
  2367. if (XMISINF(V.v[i]))
  2368. {
  2369. Result.v[i] = (V.v[i] < 0.0f) ? -0.0f : 0.0f;
  2370. }
  2371. else if (V.v[i] == -0.0f)
  2372. {
  2373. Result.u[i] = 0xFF800000;
  2374. }
  2375. else if (V.v[i] == 0.0f)
  2376. {
  2377. Result.u[i] = 0x7F800000;
  2378. }
  2379. else
  2380. {
  2381. Result.v[i] = 1.0f / V.v[i];
  2382. }
  2383. }
  2384. return Result;
  2385. #elif defined(_XM_SSE_INTRINSICS_)
  2386. return _mm_rcp_ps(V);
  2387. #else // _XM_VMX128_INTRINSICS_
  2388. #endif // _XM_VMX128_INTRINSICS_
  2389. }
  2390. //------------------------------------------------------------------------------
  2391. XMFINLINE XMVECTOR XMVectorReciprocal
  2392. (
  2393. FXMVECTOR V
  2394. )
  2395. {
  2396. #if defined(_XM_NO_INTRINSICS_)
  2397. return XMVectorReciprocalEst(V);
  2398. #elif defined(_XM_SSE_INTRINSICS_)
  2399. return _mm_div_ps(g_XMOne,V);
  2400. #else // _XM_VMX128_INTRINSICS_
  2401. #endif // _XM_VMX128_INTRINSICS_
  2402. }
  2403. //------------------------------------------------------------------------------
  2404. // Return an estimated square root
  2405. XMFINLINE XMVECTOR XMVectorSqrtEst
  2406. (
  2407. FXMVECTOR V
  2408. )
  2409. {
  2410. #if defined(_XM_NO_INTRINSICS_)
  2411. XMVECTOR Select;
  2412. // if (x == +Infinity) sqrt(x) = +Infinity
  2413. // if (x == +0.0f) sqrt(x) = +0.0f
  2414. // if (x == -0.0f) sqrt(x) = -0.0f
  2415. // if (x < -0.0f) sqrt(x) = QNaN
  2416. XMVECTOR Result = XMVectorReciprocalSqrtEst(V);
  2417. XMVECTOR Zero = XMVectorZero();
  2418. XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
  2419. XMVECTOR VEqualsZero = XMVectorEqual(V, Zero);
  2420. Result = XMVectorMultiply(V, Result);
  2421. Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
  2422. Result = XMVectorSelect(V, Result, Select);
  2423. return Result;
  2424. #elif defined(_XM_SSE_INTRINSICS_)
  2425. return _mm_sqrt_ps(V);
  2426. #else // _XM_VMX128_INTRINSICS_
  2427. #endif // _XM_VMX128_INTRINSICS_
  2428. }
  2429. //------------------------------------------------------------------------------
  2430. XMFINLINE XMVECTOR XMVectorSqrt
  2431. (
  2432. FXMVECTOR V
  2433. )
  2434. {
  2435. #if defined(_XM_NO_INTRINSICS_)
  2436. XMVECTOR Zero;
  2437. XMVECTOR VEqualsInfinity, VEqualsZero;
  2438. XMVECTOR Select;
  2439. XMVECTOR Result;
  2440. // if (x == +Infinity) sqrt(x) = +Infinity
  2441. // if (x == +0.0f) sqrt(x) = +0.0f
  2442. // if (x == -0.0f) sqrt(x) = -0.0f
  2443. // if (x < -0.0f) sqrt(x) = QNaN
  2444. Result = XMVectorReciprocalSqrt(V);
  2445. Zero = XMVectorZero();
  2446. VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
  2447. VEqualsZero = XMVectorEqual(V, Zero);
  2448. Result = XMVectorMultiply(V, Result);
  2449. Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
  2450. Result = XMVectorSelect(V, Result, Select);
  2451. return Result;
  2452. #elif defined(_XM_SSE_INTRINSICS_)
  2453. return _mm_sqrt_ps(V);
  2454. #else // _XM_VMX128_INTRINSICS_
  2455. #endif // _XM_VMX128_INTRINSICS_
  2456. }
  2457. //------------------------------------------------------------------------------
  2458. XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst
  2459. (
  2460. FXMVECTOR V
  2461. )
  2462. {
  2463. #if defined(_XM_NO_INTRINSICS_)
  2464. XMVECTOR Result;
  2465. UINT i;
  2466. // Avoid C4701
  2467. Result.v[0] = 0.0f;
  2468. for (i = 0; i < 4; i++)
  2469. {
  2470. if (V.v[i] == 0.0f)
  2471. {
  2472. Result.u[i] = 0x7F800000;
  2473. }
  2474. else if (V.v[i] == -0.0f)
  2475. {
  2476. Result.u[i] = 0xFF800000;
  2477. }
  2478. else if (V.v[i] < 0.0f)
  2479. {
  2480. Result.u[i] = 0x7FFFFFFF;
  2481. }
  2482. else if (XMISINF(V.v[i]))
  2483. {
  2484. Result.v[i] = 0.0f;
  2485. }
  2486. else
  2487. {
  2488. Result.v[i] = 1.0f / sqrtf(V.v[i]);
  2489. }
  2490. }
  2491. return Result;
  2492. #elif defined(_XM_SSE_INTRINSICS_)
  2493. return _mm_rsqrt_ps(V);
  2494. #else // _XM_VMX128_INTRINSICS_
  2495. #endif // _XM_VMX128_INTRINSICS_
  2496. }
  2497. //------------------------------------------------------------------------------
  2498. XMFINLINE XMVECTOR XMVectorReciprocalSqrt
  2499. (
  2500. FXMVECTOR V
  2501. )
  2502. {
  2503. #if defined(_XM_NO_INTRINSICS_)
  2504. return XMVectorReciprocalSqrtEst(V);
  2505. #elif defined(_XM_SSE_INTRINSICS_)
  2506. XMVECTOR vResult = _mm_sqrt_ps(V);
  2507. vResult = _mm_div_ps(g_XMOne,vResult);
  2508. return vResult;
  2509. #else // _XM_VMX128_INTRINSICS_
  2510. #endif // _XM_VMX128_INTRINSICS_
  2511. }
  2512. //------------------------------------------------------------------------------
  2513. XMFINLINE XMVECTOR XMVectorExpEst
  2514. (
  2515. FXMVECTOR V
  2516. )
  2517. {
  2518. #if defined(_XM_NO_INTRINSICS_)
  2519. XMVECTOR Result;
  2520. Result.x = powf(2.0f, V.x);
  2521. Result.y = powf(2.0f, V.y);
  2522. Result.z = powf(2.0f, V.z);
  2523. Result.w = powf(2.0f, V.w);
  2524. return Result;
  2525. #elif defined(_XM_SSE_INTRINSICS_)
  2526. XMVECTOR vResult = _mm_setr_ps(
  2527. powf(2.0f,XMVectorGetX(V)),
  2528. powf(2.0f,XMVectorGetY(V)),
  2529. powf(2.0f,XMVectorGetZ(V)),
  2530. powf(2.0f,XMVectorGetW(V)));
  2531. return vResult;
  2532. #else // _XM_VMX128_INTRINSICS_
  2533. #endif // _XM_VMX128_INTRINSICS_
  2534. }
  2535. //------------------------------------------------------------------------------
  2536. XMINLINE XMVECTOR XMVectorExp
  2537. (
  2538. FXMVECTOR V
  2539. )
  2540. {
  2541. #if defined(_XM_NO_INTRINSICS_)
  2542. XMVECTOR E, S;
  2543. XMVECTOR R, R2, R3, R4;
  2544. XMVECTOR V0, V1;
  2545. XMVECTOR C0X, C0Y, C0Z, C0W;
  2546. XMVECTOR C1X, C1Y, C1Z, C1W;
  2547. XMVECTOR Result;
  2548. static CONST XMVECTOR C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
  2549. static CONST XMVECTOR C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
  2550. R = XMVectorFloor(V);
  2551. E = XMVectorExpEst(R);
  2552. R = XMVectorSubtract(V, R);
  2553. R2 = XMVectorMultiply(R, R);
  2554. R3 = XMVectorMultiply(R, R2);
  2555. R4 = XMVectorMultiply(R2, R2);
  2556. C0X = XMVectorSplatX(C0);
  2557. C0Y = XMVectorSplatY(C0);
  2558. C0Z = XMVectorSplatZ(C0);
  2559. C0W = XMVectorSplatW(C0);
  2560. C1X = XMVectorSplatX(C1);
  2561. C1Y = XMVectorSplatY(C1);
  2562. C1Z = XMVectorSplatZ(C1);
  2563. C1W = XMVectorSplatW(C1);
  2564. V0 = XMVectorMultiplyAdd(R, C0Y, C0X);
  2565. V0 = XMVectorMultiplyAdd(R2, C0Z, V0);
  2566. V0 = XMVectorMultiplyAdd(R3, C0W, V0);
  2567. V1 = XMVectorMultiplyAdd(R, C1Y, C1X);
  2568. V1 = XMVectorMultiplyAdd(R2, C1Z, V1);
  2569. V1 = XMVectorMultiplyAdd(R3, C1W, V1);
  2570. S = XMVectorMultiplyAdd(R4, V1, V0);
  2571. S = XMVectorReciprocal(S);
  2572. Result = XMVectorMultiply(E, S);
  2573. return Result;
  2574. #elif defined(_XM_SSE_INTRINSICS_)
  2575. static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
  2576. static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
  2577. // Get the integer of the input
  2578. XMVECTOR R = XMVectorFloor(V);
  2579. // Get the exponent estimate
  2580. XMVECTOR E = XMVectorExpEst(R);
  2581. // Get the fractional only
  2582. R = _mm_sub_ps(V,R);
  2583. // Get R^2
  2584. XMVECTOR R2 = _mm_mul_ps(R,R);
  2585. // And R^3
  2586. XMVECTOR R3 = _mm_mul_ps(R,R2);
  2587. XMVECTOR V0 = _mm_load_ps1(&C0.f[1]);
  2588. V0 = _mm_mul_ps(V0,R);
  2589. XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]);
  2590. V0 = _mm_add_ps(V0,vConstants);
  2591. vConstants = _mm_load_ps1(&C0.f[2]);
  2592. vConstants = _mm_mul_ps(vConstants,R2);
  2593. V0 = _mm_add_ps(V0,vConstants);
  2594. vConstants = _mm_load_ps1(&C0.f[3]);
  2595. vConstants = _mm_mul_ps(vConstants,R3);
  2596. V0 = _mm_add_ps(V0,vConstants);
  2597. XMVECTOR V1 = _mm_load_ps1(&C1.f[1]);
  2598. V1 = _mm_mul_ps(V1,R);
  2599. vConstants = _mm_load_ps1(&C1.f[0]);
  2600. V1 = _mm_add_ps(V1,vConstants);
  2601. vConstants = _mm_load_ps1(&C1.f[2]);
  2602. vConstants = _mm_mul_ps(vConstants,R2);
  2603. V1 = _mm_add_ps(V1,vConstants);
  2604. vConstants = _mm_load_ps1(&C1.f[3]);
  2605. vConstants = _mm_mul_ps(vConstants,R3);
  2606. V1 = _mm_add_ps(V1,vConstants);
  2607. // R2 = R^4
  2608. R2 = _mm_mul_ps(R2,R2);
  2609. R2 = _mm_mul_ps(R2,V1);
  2610. R2 = _mm_add_ps(R2,V0);
  2611. E = _mm_div_ps(E,R2);
  2612. return E;
  2613. #else // _XM_VMX128_INTRINSICS_
  2614. #endif // _XM_VMX128_INTRINSICS_
  2615. }
  2616. //------------------------------------------------------------------------------
  2617. XMFINLINE XMVECTOR XMVectorLogEst
  2618. (
  2619. FXMVECTOR V
  2620. )
  2621. {
  2622. #if defined(_XM_NO_INTRINSICS_)
  2623. FLOAT fScale = (1.0f / logf(2.0f));
  2624. XMVECTOR Result;
  2625. Result.x = logf(V.x)*fScale;
  2626. Result.y = logf(V.y)*fScale;
  2627. Result.z = logf(V.z)*fScale;
  2628. Result.w = logf(V.w)*fScale;
  2629. return Result;
  2630. #elif defined(_XM_SSE_INTRINSICS_)
  2631. XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
  2632. XMVECTOR vResult = _mm_setr_ps(
  2633. logf(XMVectorGetX(V)),
  2634. logf(XMVectorGetY(V)),
  2635. logf(XMVectorGetZ(V)),
  2636. logf(XMVectorGetW(V)));
  2637. vResult = _mm_mul_ps(vResult,vScale);
  2638. return vResult;
  2639. #else // _XM_VMX128_INTRINSICS_
  2640. #endif // _XM_VMX128_INTRINSICS_
  2641. }
  2642. //------------------------------------------------------------------------------
  2643. XMINLINE XMVECTOR XMVectorLog
  2644. (
  2645. FXMVECTOR V
  2646. )
  2647. {
  2648. #if defined(_XM_NO_INTRINSICS_)
  2649. FLOAT fScale = (1.0f / logf(2.0f));
  2650. XMVECTOR Result;
  2651. Result.x = logf(V.x)*fScale;
  2652. Result.y = logf(V.y)*fScale;
  2653. Result.z = logf(V.z)*fScale;
  2654. Result.w = logf(V.w)*fScale;
  2655. return Result;
  2656. #elif defined(_XM_SSE_INTRINSICS_)
  2657. XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
  2658. XMVECTOR vResult = _mm_setr_ps(
  2659. logf(XMVectorGetX(V)),
  2660. logf(XMVectorGetY(V)),
  2661. logf(XMVectorGetZ(V)),
  2662. logf(XMVectorGetW(V)));
  2663. vResult = _mm_mul_ps(vResult,vScale);
  2664. return vResult;
  2665. #else // _XM_VMX128_INTRINSICS_
  2666. #endif // _XM_VMX128_INTRINSICS_
  2667. }
  2668. //------------------------------------------------------------------------------
  2669. XMFINLINE XMVECTOR XMVectorPowEst
  2670. (
  2671. FXMVECTOR V1,
  2672. FXMVECTOR V2
  2673. )
  2674. {
  2675. #if defined(_XM_NO_INTRINSICS_)
  2676. XMVECTOR Result;
  2677. Result.x = powf(V1.x, V2.x);
  2678. Result.y = powf(V1.y, V2.y);
  2679. Result.z = powf(V1.z, V2.z);
  2680. Result.w = powf(V1.w, V2.w);
  2681. return Result;
  2682. #elif defined(_XM_SSE_INTRINSICS_)
  2683. XMVECTOR vResult = _mm_setr_ps(
  2684. powf(XMVectorGetX(V1),XMVectorGetX(V2)),
  2685. powf(XMVectorGetY(V1),XMVectorGetY(V2)),
  2686. powf(XMVectorGetZ(V1),XMVectorGetZ(V2)),
  2687. powf(XMVectorGetW(V1),XMVectorGetW(V2)));
  2688. return vResult;
  2689. #else // _XM_VMX128_INTRINSICS_
  2690. #endif // _XM_VMX128_INTRINSICS_
  2691. }
  2692. //------------------------------------------------------------------------------
  2693. XMFINLINE XMVECTOR XMVectorPow
  2694. (
  2695. FXMVECTOR V1,
  2696. FXMVECTOR V2
  2697. )
  2698. {
  2699. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
  2700. return XMVectorPowEst(V1, V2);
  2701. #else // _XM_VMX128_INTRINSICS_
  2702. #endif // _XM_VMX128_INTRINSICS_
  2703. }
  2704. //------------------------------------------------------------------------------
  2705. XMFINLINE XMVECTOR XMVectorAbs
  2706. (
  2707. FXMVECTOR V
  2708. )
  2709. {
  2710. #if defined(_XM_NO_INTRINSICS_)
  2711. XMVECTOR vResult = {
  2712. fabsf(V.x),
  2713. fabsf(V.y),
  2714. fabsf(V.z),
  2715. fabsf(V.w)
  2716. };
  2717. return vResult;
  2718. #elif defined(_XM_SSE_INTRINSICS_)
  2719. XMVECTOR vResult = _mm_setzero_ps();
  2720. vResult = _mm_sub_ps(vResult,V);
  2721. vResult = _mm_max_ps(vResult,V);
  2722. return vResult;
  2723. #else // _XM_VMX128_INTRINSICS_
  2724. #endif // _XM_VMX128_INTRINSICS_
  2725. }
  2726. //------------------------------------------------------------------------------
  2727. XMFINLINE XMVECTOR XMVectorMod
  2728. (
  2729. FXMVECTOR V1,
  2730. FXMVECTOR V2
  2731. )
  2732. {
  2733. #if defined(_XM_NO_INTRINSICS_)
  2734. XMVECTOR Reciprocal;
  2735. XMVECTOR Quotient;
  2736. XMVECTOR Result;
  2737. // V1 % V2 = V1 - V2 * truncate(V1 / V2)
  2738. Reciprocal = XMVectorReciprocal(V2);
  2739. Quotient = XMVectorMultiply(V1, Reciprocal);
  2740. Quotient = XMVectorTruncate(Quotient);
  2741. Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
  2742. return Result;
  2743. #elif defined(_XM_SSE_INTRINSICS_)
  2744. XMVECTOR vResult = _mm_div_ps(V1, V2);
  2745. vResult = XMVectorTruncate(vResult);
  2746. vResult = _mm_mul_ps(vResult,V2);
  2747. vResult = _mm_sub_ps(V1,vResult);
  2748. return vResult;
  2749. #else // _XM_VMX128_INTRINSICS_
  2750. #endif // _XM_VMX128_INTRINSICS_
  2751. }
  2752. //------------------------------------------------------------------------------
  2753. XMFINLINE XMVECTOR XMVectorModAngles
  2754. (
  2755. FXMVECTOR Angles
  2756. )
  2757. {
  2758. #if defined(_XM_NO_INTRINSICS_)
  2759. XMVECTOR V;
  2760. XMVECTOR Result;
  2761. // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
  2762. V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
  2763. V = XMVectorRound(V);
  2764. Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
  2765. return Result;
  2766. #elif defined(_XM_SSE_INTRINSICS_)
  2767. // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
  2768. XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
  2769. // Use the inline function due to complexity for rounding
  2770. vResult = XMVectorRound(vResult);
  2771. vResult = _mm_mul_ps(vResult,g_XMTwoPi);
  2772. vResult = _mm_sub_ps(Angles,vResult);
  2773. return vResult;
  2774. #else // _XM_VMX128_INTRINSICS_
  2775. #endif // _XM_VMX128_INTRINSICS_
  2776. }
  2777. //------------------------------------------------------------------------------
  2778. XMINLINE XMVECTOR XMVectorSin
  2779. (
  2780. FXMVECTOR V
  2781. )
  2782. {
  2783. #if defined(_XM_NO_INTRINSICS_)
  2784. XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23;
  2785. XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
  2786. XMVECTOR Result;
  2787. V1 = XMVectorModAngles(V);
  2788. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
  2789. // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  2790. V2 = XMVectorMultiply(V1, V1);
  2791. V3 = XMVectorMultiply(V2, V1);
  2792. V5 = XMVectorMultiply(V3, V2);
  2793. V7 = XMVectorMultiply(V5, V2);
  2794. V9 = XMVectorMultiply(V7, V2);
  2795. V11 = XMVectorMultiply(V9, V2);
  2796. V13 = XMVectorMultiply(V11, V2);
  2797. V15 = XMVectorMultiply(V13, V2);
  2798. V17 = XMVectorMultiply(V15, V2);
  2799. V19 = XMVectorMultiply(V17, V2);
  2800. V21 = XMVectorMultiply(V19, V2);
  2801. V23 = XMVectorMultiply(V21, V2);
  2802. S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
  2803. S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
  2804. S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
  2805. S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
  2806. S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
  2807. S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
  2808. S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
  2809. S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
  2810. S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
  2811. S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
  2812. S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
  2813. Result = XMVectorMultiplyAdd(S1, V3, V1);
  2814. Result = XMVectorMultiplyAdd(S2, V5, Result);
  2815. Result = XMVectorMultiplyAdd(S3, V7, Result);
  2816. Result = XMVectorMultiplyAdd(S4, V9, Result);
  2817. Result = XMVectorMultiplyAdd(S5, V11, Result);
  2818. Result = XMVectorMultiplyAdd(S6, V13, Result);
  2819. Result = XMVectorMultiplyAdd(S7, V15, Result);
  2820. Result = XMVectorMultiplyAdd(S8, V17, Result);
  2821. Result = XMVectorMultiplyAdd(S9, V19, Result);
  2822. Result = XMVectorMultiplyAdd(S10, V21, Result);
  2823. Result = XMVectorMultiplyAdd(S11, V23, Result);
  2824. return Result;
  2825. #elif defined(_XM_SSE_INTRINSICS_)
  2826. // Force the value within the bounds of pi
  2827. XMVECTOR vResult = XMVectorModAngles(V);
  2828. // Each on is V to the "num" power
  2829. // V2 = V1^2
  2830. XMVECTOR V2 = _mm_mul_ps(vResult,vResult);
  2831. // V1^3
  2832. XMVECTOR vPower = _mm_mul_ps(vResult,V2);
  2833. XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
  2834. vConstants = _mm_mul_ps(vConstants,vPower);
  2835. vResult = _mm_add_ps(vResult,vConstants);
  2836. // V^5
  2837. vPower = _mm_mul_ps(vPower,V2);
  2838. vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
  2839. vConstants = _mm_mul_ps(vConstants,vPower);
  2840. vResult = _mm_add_ps(vResult,vConstants);
  2841. // V^7
  2842. vPower = _mm_mul_ps(vPower,V2);
  2843. vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
  2844. vConstants = _mm_mul_ps(vConstants,vPower);
  2845. vResult = _mm_add_ps(vResult,vConstants);
  2846. // V^9
  2847. vPower = _mm_mul_ps(vPower,V2);
  2848. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
  2849. vConstants = _mm_mul_ps(vConstants,vPower);
  2850. vResult = _mm_add_ps(vResult,vConstants);
  2851. // V^11
  2852. vPower = _mm_mul_ps(vPower,V2);
  2853. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
  2854. vConstants = _mm_mul_ps(vConstants,vPower);
  2855. vResult = _mm_add_ps(vResult,vConstants);
  2856. // V^13
  2857. vPower = _mm_mul_ps(vPower,V2);
  2858. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
  2859. vConstants = _mm_mul_ps(vConstants,vPower);
  2860. vResult = _mm_add_ps(vResult,vConstants);
  2861. // V^15
  2862. vPower = _mm_mul_ps(vPower,V2);
  2863. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
  2864. vConstants = _mm_mul_ps(vConstants,vPower);
  2865. vResult = _mm_add_ps(vResult,vConstants);
  2866. // V^17
  2867. vPower = _mm_mul_ps(vPower,V2);
  2868. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
  2869. vConstants = _mm_mul_ps(vConstants,vPower);
  2870. vResult = _mm_add_ps(vResult,vConstants);
  2871. // V^19
  2872. vPower = _mm_mul_ps(vPower,V2);
  2873. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
  2874. vConstants = _mm_mul_ps(vConstants,vPower);
  2875. vResult = _mm_add_ps(vResult,vConstants);
  2876. // V^21
  2877. vPower = _mm_mul_ps(vPower,V2);
  2878. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
  2879. vConstants = _mm_mul_ps(vConstants,vPower);
  2880. vResult = _mm_add_ps(vResult,vConstants);
  2881. // V^23
  2882. vPower = _mm_mul_ps(vPower,V2);
  2883. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
  2884. vConstants = _mm_mul_ps(vConstants,vPower);
  2885. vResult = _mm_add_ps(vResult,vConstants);
  2886. return vResult;
  2887. #else // _XM_VMX128_INTRINSICS_
  2888. #endif // _XM_VMX128_INTRINSICS_
  2889. }
  2890. //------------------------------------------------------------------------------
  2891. XMINLINE XMVECTOR XMVectorCos
  2892. (
  2893. FXMVECTOR V
  2894. )
  2895. {
  2896. #if defined(_XM_NO_INTRINSICS_)
  2897. XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22;
  2898. XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  2899. XMVECTOR Result;
  2900. V1 = XMVectorModAngles(V);
  2901. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
  2902. // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  2903. V2 = XMVectorMultiply(V1, V1);
  2904. V4 = XMVectorMultiply(V2, V2);
  2905. V6 = XMVectorMultiply(V4, V2);
  2906. V8 = XMVectorMultiply(V4, V4);
  2907. V10 = XMVectorMultiply(V6, V4);
  2908. V12 = XMVectorMultiply(V6, V6);
  2909. V14 = XMVectorMultiply(V8, V6);
  2910. V16 = XMVectorMultiply(V8, V8);
  2911. V18 = XMVectorMultiply(V10, V8);
  2912. V20 = XMVectorMultiply(V10, V10);
  2913. V22 = XMVectorMultiply(V12, V10);
  2914. C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
  2915. C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
  2916. C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
  2917. C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
  2918. C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
  2919. C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
  2920. C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
  2921. C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
  2922. C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
  2923. C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
  2924. C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
  2925. Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
  2926. Result = XMVectorMultiplyAdd(C2, V4, Result);
  2927. Result = XMVectorMultiplyAdd(C3, V6, Result);
  2928. Result = XMVectorMultiplyAdd(C4, V8, Result);
  2929. Result = XMVectorMultiplyAdd(C5, V10, Result);
  2930. Result = XMVectorMultiplyAdd(C6, V12, Result);
  2931. Result = XMVectorMultiplyAdd(C7, V14, Result);
  2932. Result = XMVectorMultiplyAdd(C8, V16, Result);
  2933. Result = XMVectorMultiplyAdd(C9, V18, Result);
  2934. Result = XMVectorMultiplyAdd(C10, V20, Result);
  2935. Result = XMVectorMultiplyAdd(C11, V22, Result);
  2936. return Result;
  2937. #elif defined(_XM_SSE_INTRINSICS_)
  2938. // Force the value within the bounds of pi
  2939. XMVECTOR V2 = XMVectorModAngles(V);
  2940. // Each on is V to the "num" power
  2941. // V2 = V1^2
  2942. V2 = _mm_mul_ps(V2,V2);
  2943. // V^2
  2944. XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
  2945. vConstants = _mm_mul_ps(vConstants,V2);
  2946. XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne);
  2947. // V^4
  2948. XMVECTOR vPower = _mm_mul_ps(V2,V2);
  2949. vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
  2950. vConstants = _mm_mul_ps(vConstants,vPower);
  2951. vResult = _mm_add_ps(vResult,vConstants);
  2952. // V^6
  2953. vPower = _mm_mul_ps(vPower,V2);
  2954. vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
  2955. vConstants = _mm_mul_ps(vConstants,vPower);
  2956. vResult = _mm_add_ps(vResult,vConstants);
  2957. // V^8
  2958. vPower = _mm_mul_ps(vPower,V2);
  2959. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
  2960. vConstants = _mm_mul_ps(vConstants,vPower);
  2961. vResult = _mm_add_ps(vResult,vConstants);
  2962. // V^10
  2963. vPower = _mm_mul_ps(vPower,V2);
  2964. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
  2965. vConstants = _mm_mul_ps(vConstants,vPower);
  2966. vResult = _mm_add_ps(vResult,vConstants);
  2967. // V^12
  2968. vPower = _mm_mul_ps(vPower,V2);
  2969. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
  2970. vConstants = _mm_mul_ps(vConstants,vPower);
  2971. vResult = _mm_add_ps(vResult,vConstants);
  2972. // V^14
  2973. vPower = _mm_mul_ps(vPower,V2);
  2974. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
  2975. vConstants = _mm_mul_ps(vConstants,vPower);
  2976. vResult = _mm_add_ps(vResult,vConstants);
  2977. // V^16
  2978. vPower = _mm_mul_ps(vPower,V2);
  2979. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
  2980. vConstants = _mm_mul_ps(vConstants,vPower);
  2981. vResult = _mm_add_ps(vResult,vConstants);
  2982. // V^18
  2983. vPower = _mm_mul_ps(vPower,V2);
  2984. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
  2985. vConstants = _mm_mul_ps(vConstants,vPower);
  2986. vResult = _mm_add_ps(vResult,vConstants);
  2987. // V^20
  2988. vPower = _mm_mul_ps(vPower,V2);
  2989. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
  2990. vConstants = _mm_mul_ps(vConstants,vPower);
  2991. vResult = _mm_add_ps(vResult,vConstants);
  2992. // V^22
  2993. vPower = _mm_mul_ps(vPower,V2);
  2994. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
  2995. vConstants = _mm_mul_ps(vConstants,vPower);
  2996. vResult = _mm_add_ps(vResult,vConstants);
  2997. return vResult;
  2998. #else // _XM_VMX128_INTRINSICS_
  2999. #endif // _XM_VMX128_INTRINSICS_
  3000. }
  3001. //------------------------------------------------------------------------------
  3002. XMINLINE VOID XMVectorSinCos
  3003. (
  3004. XMVECTOR* pSin,
  3005. XMVECTOR* pCos,
  3006. FXMVECTOR V
  3007. )
  3008. {
  3009. #if defined(_XM_NO_INTRINSICS_)
  3010. XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
  3011. XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
  3012. XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
  3013. XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3014. XMVECTOR Sin, Cos;
  3015. XMASSERT(pSin);
  3016. XMASSERT(pCos);
  3017. V1 = XMVectorModAngles(V);
  3018. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
  3019. // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  3020. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
  3021. // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  3022. V2 = XMVectorMultiply(V1, V1);
  3023. V3 = XMVectorMultiply(V2, V1);
  3024. V4 = XMVectorMultiply(V2, V2);
  3025. V5 = XMVectorMultiply(V3, V2);
  3026. V6 = XMVectorMultiply(V3, V3);
  3027. V7 = XMVectorMultiply(V4, V3);
  3028. V8 = XMVectorMultiply(V4, V4);
  3029. V9 = XMVectorMultiply(V5, V4);
  3030. V10 = XMVectorMultiply(V5, V5);
  3031. V11 = XMVectorMultiply(V6, V5);
  3032. V12 = XMVectorMultiply(V6, V6);
  3033. V13 = XMVectorMultiply(V7, V6);
  3034. V14 = XMVectorMultiply(V7, V7);
  3035. V15 = XMVectorMultiply(V8, V7);
  3036. V16 = XMVectorMultiply(V8, V8);
  3037. V17 = XMVectorMultiply(V9, V8);
  3038. V18 = XMVectorMultiply(V9, V9);
  3039. V19 = XMVectorMultiply(V10, V9);
  3040. V20 = XMVectorMultiply(V10, V10);
  3041. V21 = XMVectorMultiply(V11, V10);
  3042. V22 = XMVectorMultiply(V11, V11);
  3043. V23 = XMVectorMultiply(V12, V11);
  3044. S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
  3045. S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
  3046. S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
  3047. S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
  3048. S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
  3049. S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
  3050. S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
  3051. S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
  3052. S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
  3053. S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
  3054. S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
  3055. C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
  3056. C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
  3057. C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
  3058. C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
  3059. C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
  3060. C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
  3061. C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
  3062. C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
  3063. C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
  3064. C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
  3065. C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
  3066. Sin = XMVectorMultiplyAdd(S1, V3, V1);
  3067. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  3068. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  3069. Sin = XMVectorMultiplyAdd(S4, V9, Sin);
  3070. Sin = XMVectorMultiplyAdd(S5, V11, Sin);
  3071. Sin = XMVectorMultiplyAdd(S6, V13, Sin);
  3072. Sin = XMVectorMultiplyAdd(S7, V15, Sin);
  3073. Sin = XMVectorMultiplyAdd(S8, V17, Sin);
  3074. Sin = XMVectorMultiplyAdd(S9, V19, Sin);
  3075. Sin = XMVectorMultiplyAdd(S10, V21, Sin);
  3076. Sin = XMVectorMultiplyAdd(S11, V23, Sin);
  3077. Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
  3078. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  3079. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  3080. Cos = XMVectorMultiplyAdd(C4, V8, Cos);
  3081. Cos = XMVectorMultiplyAdd(C5, V10, Cos);
  3082. Cos = XMVectorMultiplyAdd(C6, V12, Cos);
  3083. Cos = XMVectorMultiplyAdd(C7, V14, Cos);
  3084. Cos = XMVectorMultiplyAdd(C8, V16, Cos);
  3085. Cos = XMVectorMultiplyAdd(C9, V18, Cos);
  3086. Cos = XMVectorMultiplyAdd(C10, V20, Cos);
  3087. Cos = XMVectorMultiplyAdd(C11, V22, Cos);
  3088. *pSin = Sin;
  3089. *pCos = Cos;
  3090. #elif defined(_XM_SSE_INTRINSICS_)
  3091. XMASSERT(pSin);
  3092. XMASSERT(pCos);
  3093. XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
  3094. XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
  3095. XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
  3096. XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3097. XMVECTOR Sin, Cos;
  3098. V1 = XMVectorModAngles(V);
  3099. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
  3100. // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  3101. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
  3102. // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  3103. V2 = XMVectorMultiply(V1, V1);
  3104. V3 = XMVectorMultiply(V2, V1);
  3105. V4 = XMVectorMultiply(V2, V2);
  3106. V5 = XMVectorMultiply(V3, V2);
  3107. V6 = XMVectorMultiply(V3, V3);
  3108. V7 = XMVectorMultiply(V4, V3);
  3109. V8 = XMVectorMultiply(V4, V4);
  3110. V9 = XMVectorMultiply(V5, V4);
  3111. V10 = XMVectorMultiply(V5, V5);
  3112. V11 = XMVectorMultiply(V6, V5);
  3113. V12 = XMVectorMultiply(V6, V6);
  3114. V13 = XMVectorMultiply(V7, V6);
  3115. V14 = XMVectorMultiply(V7, V7);
  3116. V15 = XMVectorMultiply(V8, V7);
  3117. V16 = XMVectorMultiply(V8, V8);
  3118. V17 = XMVectorMultiply(V9, V8);
  3119. V18 = XMVectorMultiply(V9, V9);
  3120. V19 = XMVectorMultiply(V10, V9);
  3121. V20 = XMVectorMultiply(V10, V10);
  3122. V21 = XMVectorMultiply(V11, V10);
  3123. V22 = XMVectorMultiply(V11, V11);
  3124. V23 = XMVectorMultiply(V12, V11);
  3125. S1 = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
  3126. S2 = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
  3127. S3 = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
  3128. S4 = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
  3129. S5 = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
  3130. S6 = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
  3131. S7 = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
  3132. S8 = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
  3133. S9 = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
  3134. S10 = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
  3135. S11 = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
  3136. C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
  3137. C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
  3138. C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
  3139. C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
  3140. C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
  3141. C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
  3142. C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
  3143. C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
  3144. C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
  3145. C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
  3146. C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
  3147. S1 = _mm_mul_ps(S1,V3);
  3148. Sin = _mm_add_ps(S1,V1);
  3149. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  3150. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  3151. Sin = XMVectorMultiplyAdd(S4, V9, Sin);
  3152. Sin = XMVectorMultiplyAdd(S5, V11, Sin);
  3153. Sin = XMVectorMultiplyAdd(S6, V13, Sin);
  3154. Sin = XMVectorMultiplyAdd(S7, V15, Sin);
  3155. Sin = XMVectorMultiplyAdd(S8, V17, Sin);
  3156. Sin = XMVectorMultiplyAdd(S9, V19, Sin);
  3157. Sin = XMVectorMultiplyAdd(S10, V21, Sin);
  3158. Sin = XMVectorMultiplyAdd(S11, V23, Sin);
  3159. Cos = _mm_mul_ps(C1,V2);
  3160. Cos = _mm_add_ps(Cos,g_XMOne);
  3161. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  3162. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  3163. Cos = XMVectorMultiplyAdd(C4, V8, Cos);
  3164. Cos = XMVectorMultiplyAdd(C5, V10, Cos);
  3165. Cos = XMVectorMultiplyAdd(C6, V12, Cos);
  3166. Cos = XMVectorMultiplyAdd(C7, V14, Cos);
  3167. Cos = XMVectorMultiplyAdd(C8, V16, Cos);
  3168. Cos = XMVectorMultiplyAdd(C9, V18, Cos);
  3169. Cos = XMVectorMultiplyAdd(C10, V20, Cos);
  3170. Cos = XMVectorMultiplyAdd(C11, V22, Cos);
  3171. *pSin = Sin;
  3172. *pCos = Cos;
  3173. #else // _XM_VMX128_INTRINSICS_
  3174. #endif // _XM_VMX128_INTRINSICS_
  3175. }
  3176. //------------------------------------------------------------------------------
  3177. XMINLINE XMVECTOR XMVectorTan
  3178. (
  3179. FXMVECTOR V
  3180. )
  3181. {
  3182. #if defined(_XM_NO_INTRINSICS_)
  3183. // Cody and Waite algorithm to compute tangent.
  3184. XMVECTOR VA, VB, VC, VC2;
  3185. XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
  3186. XMVECTOR C0, C1, TwoDivPi, Epsilon;
  3187. XMVECTOR N, D;
  3188. XMVECTOR R0, R1;
  3189. XMVECTOR VIsZero, VCNearZero, VBIsEven;
  3190. XMVECTOR Zero;
  3191. XMVECTOR Result;
  3192. UINT i;
  3193. static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
  3194. static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
  3195. static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
  3196. static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
  3197. TwoDivPi = XMVectorSplatW(TanConstants);
  3198. Zero = XMVectorZero();
  3199. C0 = XMVectorSplatX(TanConstants);
  3200. C1 = XMVectorSplatY(TanConstants);
  3201. Epsilon = XMVectorSplatZ(TanConstants);
  3202. VA = XMVectorMultiply(V, TwoDivPi);
  3203. VA = XMVectorRound(VA);
  3204. VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
  3205. VB = XMVectorAbs(VA);
  3206. VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
  3207. for (i = 0; i < 4; i++)
  3208. {
  3209. VB.u[i] = (UINT)VB.v[i];
  3210. }
  3211. VC2 = XMVectorMultiply(VC, VC);
  3212. T7 = XMVectorSplatW(TanCoefficients1);
  3213. T6 = XMVectorSplatZ(TanCoefficients1);
  3214. T4 = XMVectorSplatX(TanCoefficients1);
  3215. T3 = XMVectorSplatW(TanCoefficients0);
  3216. T5 = XMVectorSplatY(TanCoefficients1);
  3217. T2 = XMVectorSplatZ(TanCoefficients0);
  3218. T1 = XMVectorSplatY(TanCoefficients0);
  3219. T0 = XMVectorSplatX(TanCoefficients0);
  3220. VBIsEven = XMVectorAndInt(VB, Mask.v);
  3221. VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
  3222. N = XMVectorMultiplyAdd(VC2, T7, T6);
  3223. D = XMVectorMultiplyAdd(VC2, T4, T3);
  3224. N = XMVectorMultiplyAdd(VC2, N, T5);
  3225. D = XMVectorMultiplyAdd(VC2, D, T2);
  3226. N = XMVectorMultiply(VC2, N);
  3227. D = XMVectorMultiplyAdd(VC2, D, T1);
  3228. N = XMVectorMultiplyAdd(VC, N, VC);
  3229. VCNearZero = XMVectorInBounds(VC, Epsilon);
  3230. D = XMVectorMultiplyAdd(VC2, D, T0);
  3231. N = XMVectorSelect(N, VC, VCNearZero);
  3232. D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
  3233. R0 = XMVectorNegate(N);
  3234. R1 = XMVectorReciprocal(D);
  3235. R0 = XMVectorReciprocal(R0);
  3236. R1 = XMVectorMultiply(N, R1);
  3237. R0 = XMVectorMultiply(D, R0);
  3238. VIsZero = XMVectorEqual(V, Zero);
  3239. Result = XMVectorSelect(R0, R1, VBIsEven);
  3240. Result = XMVectorSelect(Result, Zero, VIsZero);
  3241. return Result;
  3242. #elif defined(_XM_SSE_INTRINSICS_)
  3243. // Cody and Waite algorithm to compute tangent.
  3244. XMVECTOR VA, VB, VC, VC2;
  3245. XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
  3246. XMVECTOR C0, C1, TwoDivPi, Epsilon;
  3247. XMVECTOR N, D;
  3248. XMVECTOR R0, R1;
  3249. XMVECTOR VIsZero, VCNearZero, VBIsEven;
  3250. XMVECTOR Zero;
  3251. XMVECTOR Result;
  3252. static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
  3253. static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
  3254. static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
  3255. static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1};
  3256. TwoDivPi = XMVectorSplatW(TanConstants);
  3257. Zero = XMVectorZero();
  3258. C0 = XMVectorSplatX(TanConstants);
  3259. C1 = XMVectorSplatY(TanConstants);
  3260. Epsilon = XMVectorSplatZ(TanConstants);
  3261. VA = XMVectorMultiply(V, TwoDivPi);
  3262. VA = XMVectorRound(VA);
  3263. VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
  3264. VB = XMVectorAbs(VA);
  3265. VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
  3266. reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
  3267. VC2 = XMVectorMultiply(VC, VC);
  3268. T7 = XMVectorSplatW(TanCoefficients1);
  3269. T6 = XMVectorSplatZ(TanCoefficients1);
  3270. T4 = XMVectorSplatX(TanCoefficients1);
  3271. T3 = XMVectorSplatW(TanCoefficients0);
  3272. T5 = XMVectorSplatY(TanCoefficients1);
  3273. T2 = XMVectorSplatZ(TanCoefficients0);
  3274. T1 = XMVectorSplatY(TanCoefficients0);
  3275. T0 = XMVectorSplatX(TanCoefficients0);
  3276. VBIsEven = XMVectorAndInt(VB,Mask);
  3277. VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
  3278. N = XMVectorMultiplyAdd(VC2, T7, T6);
  3279. D = XMVectorMultiplyAdd(VC2, T4, T3);
  3280. N = XMVectorMultiplyAdd(VC2, N, T5);
  3281. D = XMVectorMultiplyAdd(VC2, D, T2);
  3282. N = XMVectorMultiply(VC2, N);
  3283. D = XMVectorMultiplyAdd(VC2, D, T1);
  3284. N = XMVectorMultiplyAdd(VC, N, VC);
  3285. VCNearZero = XMVectorInBounds(VC, Epsilon);
  3286. D = XMVectorMultiplyAdd(VC2, D, T0);
  3287. N = XMVectorSelect(N, VC, VCNearZero);
  3288. D = XMVectorSelect(D, g_XMOne, VCNearZero);
  3289. R0 = XMVectorNegate(N);
  3290. R1 = _mm_div_ps(N,D);
  3291. R0 = _mm_div_ps(D,R0);
  3292. VIsZero = XMVectorEqual(V, Zero);
  3293. Result = XMVectorSelect(R0, R1, VBIsEven);
  3294. Result = XMVectorSelect(Result, Zero, VIsZero);
  3295. return Result;
  3296. #else // _XM_VMX128_INTRINSICS_
  3297. #endif // _XM_VMX128_INTRINSICS_
  3298. }
  3299. //------------------------------------------------------------------------------
  3300. XMINLINE XMVECTOR XMVectorSinH
  3301. (
  3302. FXMVECTOR V
  3303. )
  3304. {
  3305. #if defined(_XM_NO_INTRINSICS_)
  3306. XMVECTOR V1, V2;
  3307. XMVECTOR E1, E2;
  3308. XMVECTOR Result;
  3309. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3310. V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
  3311. V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
  3312. E1 = XMVectorExp(V1);
  3313. E2 = XMVectorExp(V2);
  3314. Result = XMVectorSubtract(E1, E2);
  3315. return Result;
  3316. #elif defined(_XM_SSE_INTRINSICS_)
  3317. XMVECTOR V1, V2;
  3318. XMVECTOR E1, E2;
  3319. XMVECTOR Result;
  3320. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3321. V1 = _mm_mul_ps(V, Scale);
  3322. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  3323. V2 = _mm_mul_ps(V, Scale);
  3324. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  3325. E1 = XMVectorExp(V1);
  3326. E2 = XMVectorExp(V2);
  3327. Result = _mm_sub_ps(E1, E2);
  3328. return Result;
  3329. #else // _XM_VMX128_INTRINSICS_
  3330. #endif // _XM_VMX128_INTRINSICS_
  3331. }
  3332. //------------------------------------------------------------------------------
  3333. XMINLINE XMVECTOR XMVectorCosH
  3334. (
  3335. FXMVECTOR V
  3336. )
  3337. {
  3338. #if defined(_XM_NO_INTRINSICS_)
  3339. XMVECTOR V1, V2;
  3340. XMVECTOR E1, E2;
  3341. XMVECTOR Result;
  3342. static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3343. V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
  3344. V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
  3345. E1 = XMVectorExp(V1);
  3346. E2 = XMVectorExp(V2);
  3347. Result = XMVectorAdd(E1, E2);
  3348. return Result;
  3349. #elif defined(_XM_SSE_INTRINSICS_)
  3350. XMVECTOR V1, V2;
  3351. XMVECTOR E1, E2;
  3352. XMVECTOR Result;
  3353. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3354. V1 = _mm_mul_ps(V,Scale);
  3355. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  3356. V2 = _mm_mul_ps(V, Scale);
  3357. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  3358. E1 = XMVectorExp(V1);
  3359. E2 = XMVectorExp(V2);
  3360. Result = _mm_add_ps(E1, E2);
  3361. return Result;
  3362. #else // _XM_VMX128_INTRINSICS_
  3363. #endif // _XM_VMX128_INTRINSICS_
  3364. }
  3365. //------------------------------------------------------------------------------
  3366. XMINLINE XMVECTOR XMVectorTanH
  3367. (
  3368. FXMVECTOR V
  3369. )
  3370. {
  3371. #if defined(_XM_NO_INTRINSICS_)
  3372. XMVECTOR E;
  3373. XMVECTOR Result;
  3374. static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  3375. E = XMVectorMultiply(V, Scale.v);
  3376. E = XMVectorExp(E);
  3377. E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
  3378. E = XMVectorReciprocal(E);
  3379. Result = XMVectorSubtract(g_XMOne.v, E);
  3380. return Result;
  3381. #elif defined(_XM_SSE_INTRINSICS_)
  3382. static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  3383. XMVECTOR E = _mm_mul_ps(V, Scale);
  3384. E = XMVectorExp(E);
  3385. E = _mm_mul_ps(E,g_XMOneHalf);
  3386. E = _mm_add_ps(E,g_XMOneHalf);
  3387. E = XMVectorReciprocal(E);
  3388. E = _mm_sub_ps(g_XMOne, E);
  3389. return E;
  3390. #else // _XM_VMX128_INTRINSICS_
  3391. #endif // _XM_VMX128_INTRINSICS_
  3392. }
  3393. //------------------------------------------------------------------------------
  3394. XMINLINE XMVECTOR XMVectorASin
  3395. (
  3396. FXMVECTOR V
  3397. )
  3398. {
  3399. #if defined(_XM_NO_INTRINSICS_)
  3400. XMVECTOR V2, V3, AbsV;
  3401. XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3402. XMVECTOR R0, R1, R2, R3, R4;
  3403. XMVECTOR OneMinusAbsV;
  3404. XMVECTOR Rsq;
  3405. XMVECTOR Result;
  3406. static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3407. // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
  3408. // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
  3409. AbsV = XMVectorAbs(V);
  3410. V2 = XMVectorMultiply(V, V);
  3411. V3 = XMVectorMultiply(V2, AbsV);
  3412. R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
  3413. OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
  3414. Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
  3415. C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
  3416. C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
  3417. C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
  3418. C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
  3419. C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
  3420. C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
  3421. C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
  3422. C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
  3423. C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
  3424. C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
  3425. C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
  3426. C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
  3427. R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
  3428. R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
  3429. R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
  3430. R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
  3431. R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
  3432. R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
  3433. R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
  3434. R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
  3435. R0 = XMVectorMultiplyAdd(R2, V3, R0);
  3436. R1 = XMVectorMultiplyAdd(R3, V3, R1);
  3437. R0 = XMVectorMultiply(V, R0);
  3438. R1 = XMVectorMultiply(R4, R1);
  3439. Result = XMVectorMultiplyAdd(R1, Rsq, R0);
  3440. return Result;
  3441. #elif defined(_XM_SSE_INTRINSICS_)
  3442. static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3443. // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
  3444. // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
  3445. // Get abs(V)
  3446. XMVECTOR vAbsV = _mm_setzero_ps();
  3447. vAbsV = _mm_sub_ps(vAbsV,V);
  3448. vAbsV = _mm_max_ps(vAbsV,V);
  3449. XMVECTOR R0 = vAbsV;
  3450. XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
  3451. R0 = _mm_mul_ps(R0,vConstants);
  3452. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
  3453. R0 = _mm_add_ps(R0,vConstants);
  3454. XMVECTOR R1 = vAbsV;
  3455. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
  3456. R1 = _mm_mul_ps(R1,vConstants);
  3457. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
  3458. R1 = _mm_add_ps(R1, vConstants);
  3459. XMVECTOR R2 = vAbsV;
  3460. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
  3461. R2 = _mm_mul_ps(R2,vConstants);
  3462. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
  3463. R2 = _mm_add_ps(R2, vConstants);
  3464. XMVECTOR R3 = vAbsV;
  3465. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
  3466. R3 = _mm_mul_ps(R3,vConstants);
  3467. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
  3468. R3 = _mm_add_ps(R3, vConstants);
  3469. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
  3470. R0 = _mm_mul_ps(R0,vAbsV);
  3471. R0 = _mm_add_ps(R0,vConstants);
  3472. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
  3473. R1 = _mm_mul_ps(R1,vAbsV);
  3474. R1 = _mm_add_ps(R1,vConstants);
  3475. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
  3476. R2 = _mm_mul_ps(R2,vAbsV);
  3477. R2 = _mm_add_ps(R2,vConstants);
  3478. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
  3479. R3 = _mm_mul_ps(R3,vAbsV);
  3480. R3 = _mm_add_ps(R3,vConstants);
  3481. // V3 = V^3
  3482. vConstants = _mm_mul_ps(V,V);
  3483. vConstants = _mm_mul_ps(vConstants, vAbsV);
  3484. // Mul by V^3
  3485. R2 = _mm_mul_ps(R2,vConstants);
  3486. R3 = _mm_mul_ps(R3,vConstants);
  3487. // Merge the results
  3488. R0 = _mm_add_ps(R0,R2);
  3489. R1 = _mm_add_ps(R1,R3);
  3490. R0 = _mm_mul_ps(R0,V);
  3491. // vConstants = V-(V^2 retaining sign)
  3492. vConstants = _mm_mul_ps(vAbsV, V);
  3493. vConstants = _mm_sub_ps(V,vConstants);
  3494. R1 = _mm_mul_ps(R1,vConstants);
  3495. vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV);
  3496. // Do NOT use rsqrt/mul. This needs the precision
  3497. vConstants = _mm_sqrt_ps(vConstants);
  3498. R1 = _mm_div_ps(R1,vConstants);
  3499. R0 = _mm_add_ps(R0,R1);
  3500. return R0;
  3501. #else // _XM_VMX128_INTRINSICS_
  3502. #endif // _XM_VMX128_INTRINSICS_
  3503. }
  3504. //------------------------------------------------------------------------------
  3505. XMINLINE XMVECTOR XMVectorACos
  3506. (
  3507. FXMVECTOR V
  3508. )
  3509. {
  3510. #if defined(_XM_NO_INTRINSICS_)
  3511. XMVECTOR V2, V3, AbsV;
  3512. XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3513. XMVECTOR R0, R1, R2, R3, R4;
  3514. XMVECTOR OneMinusAbsV;
  3515. XMVECTOR Rsq;
  3516. XMVECTOR Result;
  3517. static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3518. // acos(V) = PI / 2 - asin(V)
  3519. AbsV = XMVectorAbs(V);
  3520. V2 = XMVectorMultiply(V, V);
  3521. V3 = XMVectorMultiply(V2, AbsV);
  3522. R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
  3523. OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
  3524. Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
  3525. C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
  3526. C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
  3527. C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
  3528. C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
  3529. C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
  3530. C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
  3531. C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
  3532. C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
  3533. C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
  3534. C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
  3535. C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
  3536. C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
  3537. R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
  3538. R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
  3539. R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
  3540. R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
  3541. R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
  3542. R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
  3543. R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
  3544. R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
  3545. R0 = XMVectorMultiplyAdd(R2, V3, R0);
  3546. R1 = XMVectorMultiplyAdd(R3, V3, R1);
  3547. R0 = XMVectorMultiply(V, R0);
  3548. R1 = XMVectorMultiply(R4, R1);
  3549. Result = XMVectorMultiplyAdd(R1, Rsq, R0);
  3550. Result = XMVectorSubtract(g_XMHalfPi.v, Result);
  3551. return Result;
  3552. #elif defined(_XM_SSE_INTRINSICS_)
  3553. static CONST XMVECTORF32 s_OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3554. // Uses only 6 registers for good code on x86 targets
  3555. // acos(V) = PI / 2 - asin(V)
  3556. // Get abs(V)
  3557. XMVECTOR vAbsV = _mm_setzero_ps();
  3558. vAbsV = _mm_sub_ps(vAbsV,V);
  3559. vAbsV = _mm_max_ps(vAbsV,V);
  3560. // Perform the series in precision groups to
  3561. // retain precision across 20 bits. (3 bits of imprecision due to operations)
  3562. XMVECTOR R0 = vAbsV;
  3563. XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
  3564. R0 = _mm_mul_ps(R0,vConstants);
  3565. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
  3566. R0 = _mm_add_ps(R0,vConstants);
  3567. R0 = _mm_mul_ps(R0,vAbsV);
  3568. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
  3569. R0 = _mm_add_ps(R0,vConstants);
  3570. XMVECTOR R1 = vAbsV;
  3571. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
  3572. R1 = _mm_mul_ps(R1,vConstants);
  3573. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
  3574. R1 = _mm_add_ps(R1,vConstants);
  3575. R1 = _mm_mul_ps(R1, vAbsV);
  3576. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
  3577. R1 = _mm_add_ps(R1,vConstants);
  3578. XMVECTOR R2 = vAbsV;
  3579. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
  3580. R2 = _mm_mul_ps(R2,vConstants);
  3581. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
  3582. R2 = _mm_add_ps(R2,vConstants);
  3583. R2 = _mm_mul_ps(R2, vAbsV);
  3584. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
  3585. R2 = _mm_add_ps(R2,vConstants);
  3586. XMVECTOR R3 = vAbsV;
  3587. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
  3588. R3 = _mm_mul_ps(R3,vConstants);
  3589. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
  3590. R3 = _mm_add_ps(R3,vConstants);
  3591. R3 = _mm_mul_ps(R3, vAbsV);
  3592. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
  3593. R3 = _mm_add_ps(R3,vConstants);
  3594. // vConstants = V^3
  3595. vConstants = _mm_mul_ps(V,V);
  3596. vConstants = _mm_mul_ps(vConstants,vAbsV);
  3597. R2 = _mm_mul_ps(R2,vConstants);
  3598. R3 = _mm_mul_ps(R3,vConstants);
  3599. // Add the pair of values together here to retain
  3600. // as much precision as possible
  3601. R0 = _mm_add_ps(R0,R2);
  3602. R1 = _mm_add_ps(R1,R3);
  3603. R0 = _mm_mul_ps(R0,V);
  3604. // vConstants = V-(V*abs(V))
  3605. vConstants = _mm_mul_ps(V,vAbsV);
  3606. vConstants = _mm_sub_ps(V,vConstants);
  3607. R1 = _mm_mul_ps(R1,vConstants);
  3608. // Episilon exists to allow 1.0 as an answer
  3609. vConstants = _mm_sub_ps(s_OnePlusEpsilon, vAbsV);
  3610. // Use sqrt instead of rsqrt for precision
  3611. vConstants = _mm_sqrt_ps(vConstants);
  3612. R1 = _mm_div_ps(R1,vConstants);
  3613. R1 = _mm_add_ps(R1,R0);
  3614. vConstants = _mm_sub_ps(g_XMHalfPi,R1);
  3615. return vConstants;
  3616. #else // _XM_VMX128_INTRINSICS_
  3617. #endif // _XM_VMX128_INTRINSICS_
  3618. }
  3619. //------------------------------------------------------------------------------
  3620. XMINLINE XMVECTOR XMVectorATan
  3621. (
  3622. FXMVECTOR V
  3623. )
  3624. {
  3625. #if defined(_XM_NO_INTRINSICS_)
  3626. // Cody and Waite algorithm to compute inverse tangent.
  3627. XMVECTOR N, D;
  3628. XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB;
  3629. XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3;
  3630. XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV;
  3631. XMVECTOR Zero;
  3632. XMVECTOR NegativeHalfPi;
  3633. XMVECTOR Angle1, Angle2;
  3634. XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV;
  3635. XMVECTOR NegativeResult, Result;
  3636. XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3;
  3637. static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
  3638. static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
  3639. static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
  3640. static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
  3641. Zero = XMVectorZero();
  3642. P0 = XMVectorSplatX(ATanConstants0);
  3643. P1 = XMVectorSplatY(ATanConstants0);
  3644. P2 = XMVectorSplatZ(ATanConstants0);
  3645. P3 = XMVectorSplatW(ATanConstants0);
  3646. Q0 = XMVectorSplatX(ATanConstants1);
  3647. Q1 = XMVectorSplatY(ATanConstants1);
  3648. Q2 = XMVectorSplatZ(ATanConstants1);
  3649. Q3 = XMVectorSplatW(ATanConstants1);
  3650. Sqrt3 = XMVectorSplatX(ATanConstants2);
  3651. Sqrt3MinusOne = XMVectorSplatY(ATanConstants2);
  3652. TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2);
  3653. Epsilon = XMVectorSplatW(ATanConstants2);
  3654. HalfPi = XMVectorSplatX(ATanConstants3);
  3655. OneThirdPi = XMVectorSplatY(ATanConstants3);
  3656. OneSixthPi = XMVectorSplatZ(ATanConstants3);
  3657. MaxV = XMVectorSplatW(ATanConstants3);
  3658. VF = XMVectorAbs(V);
  3659. ReciprocalF = XMVectorReciprocal(VF);
  3660. F_GT_One = XMVectorGreater(VF, g_XMOne.v);
  3661. VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
  3662. Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
  3663. Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One);
  3664. F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3);
  3665. FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF);
  3666. FA = XMVectorAdd(FA, g_XMNegativeOne.v);
  3667. FB = XMVectorAdd(VF, Sqrt3);
  3668. FB = XMVectorReciprocal(FB);
  3669. FA = XMVectorMultiply(FA, FB);
  3670. VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3);
  3671. Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3);
  3672. AbsF = XMVectorAbs(VF);
  3673. AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon);
  3674. G = XMVectorMultiply(VF, VF);
  3675. D = XMVectorAdd(G, Q3);
  3676. D = XMVectorMultiplyAdd(D, G, Q2);
  3677. D = XMVectorMultiplyAdd(D, G, Q1);
  3678. D = XMVectorMultiplyAdd(D, G, Q0);
  3679. D = XMVectorReciprocal(D);
  3680. N = XMVectorMultiplyAdd(P3, G, P2);
  3681. N = XMVectorMultiplyAdd(N, G, P1);
  3682. N = XMVectorMultiplyAdd(N, G, P0);
  3683. N = XMVectorMultiply(N, G);
  3684. Result = XMVectorMultiply(N, D);
  3685. Result = XMVectorMultiplyAdd(Result, VF, VF);
  3686. Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon);
  3687. NegativeResult = XMVectorNegate(Result);
  3688. Result = XMVectorSelect(Result, NegativeResult, F_GT_One);
  3689. Result = XMVectorAdd(Result, Angle1);
  3690. V_LT_Zero = XMVectorLess(V, Zero);
  3691. NegativeResult = XMVectorNegate(Result);
  3692. Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero);
  3693. MinV = XMVectorNegate(MaxV);
  3694. NegativeHalfPi = XMVectorNegate(HalfPi);
  3695. V_GT_MaxV = XMVectorGreater(V, MaxV);
  3696. V_LT_MinV = XMVectorLess(V, MinV);
  3697. Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV);
  3698. Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV);
  3699. return Result;
  3700. #elif defined(_XM_SSE_INTRINSICS_)
  3701. static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
  3702. static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
  3703. static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
  3704. static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
  3705. XMVECTOR VF = XMVectorAbs(V);
  3706. XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne);
  3707. XMVECTOR ReciprocalF = XMVectorReciprocal(VF);
  3708. VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
  3709. XMVECTOR Zero = XMVectorZero();
  3710. XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]);
  3711. XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
  3712. // Pi/3
  3713. XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]);
  3714. // Pi/6
  3715. XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]);
  3716. Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One);
  3717. // 1-sqrt(3)
  3718. XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]);
  3719. FA = _mm_mul_ps(FA,VF);
  3720. FA = _mm_add_ps(FA,VF);
  3721. FA = _mm_add_ps(FA,g_XMNegativeOne);
  3722. // sqrt(3)
  3723. vConstants = _mm_load_ps1(&ATanConstants2.f[0]);
  3724. vConstants = _mm_add_ps(vConstants,VF);
  3725. FA = _mm_div_ps(FA,vConstants);
  3726. // 2-sqrt(3)
  3727. vConstants = _mm_load_ps1(&ATanConstants2.f[2]);
  3728. // >2-sqrt(3)?
  3729. vConstants = _mm_cmpgt_ps(VF,vConstants);
  3730. VF = XMVectorSelect(VF, FA, vConstants);
  3731. Angle1 = XMVectorSelect(Angle1, Angle2, vConstants);
  3732. XMVECTOR AbsF = XMVectorAbs(VF);
  3733. XMVECTOR G = _mm_mul_ps(VF,VF);
  3734. XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]);
  3735. D = _mm_add_ps(D,G);
  3736. D = _mm_mul_ps(D,G);
  3737. vConstants = _mm_load_ps1(&ATanConstants1.f[2]);
  3738. D = _mm_add_ps(D,vConstants);
  3739. D = _mm_mul_ps(D,G);
  3740. vConstants = _mm_load_ps1(&ATanConstants1.f[1]);
  3741. D = _mm_add_ps(D,vConstants);
  3742. D = _mm_mul_ps(D,G);
  3743. vConstants = _mm_load_ps1(&ATanConstants1.f[0]);
  3744. D = _mm_add_ps(D,vConstants);
  3745. XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]);
  3746. N = _mm_mul_ps(N,G);
  3747. vConstants = _mm_load_ps1(&ATanConstants0.f[2]);
  3748. N = _mm_add_ps(N,vConstants);
  3749. N = _mm_mul_ps(N,G);
  3750. vConstants = _mm_load_ps1(&ATanConstants0.f[1]);
  3751. N = _mm_add_ps(N,vConstants);
  3752. N = _mm_mul_ps(N,G);
  3753. vConstants = _mm_load_ps1(&ATanConstants0.f[0]);
  3754. N = _mm_add_ps(N,vConstants);
  3755. N = _mm_mul_ps(N,G);
  3756. XMVECTOR Result = _mm_div_ps(N,D);
  3757. Result = _mm_mul_ps(Result,VF);
  3758. Result = _mm_add_ps(Result,VF);
  3759. // Epsilon
  3760. vConstants = _mm_load_ps1(&ATanConstants2.f[3]);
  3761. vConstants = _mm_cmpge_ps(vConstants,AbsF);
  3762. Result = XMVectorSelect(Result,VF,vConstants);
  3763. XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
  3764. Result = XMVectorSelect(Result,NegativeResult,F_GT_One);
  3765. Result = _mm_add_ps(Result,Angle1);
  3766. Zero = _mm_cmpge_ps(Zero,V);
  3767. NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
  3768. Result = XMVectorSelect(Result,NegativeResult,Zero);
  3769. XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]);
  3770. XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne);
  3771. // Negate HalfPi
  3772. HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne);
  3773. MaxV = _mm_cmple_ps(MaxV,V);
  3774. MinV = _mm_cmpge_ps(MinV,V);
  3775. Result = XMVectorSelect(Result,g_XMHalfPi,MaxV);
  3776. // HalfPi = -HalfPi
  3777. Result = XMVectorSelect(Result,HalfPi,MinV);
  3778. return Result;
  3779. #else // _XM_VMX128_INTRINSICS_
  3780. #endif // _XM_VMX128_INTRINSICS_
  3781. }
  3782. //------------------------------------------------------------------------------
  3783. XMINLINE XMVECTOR XMVectorATan2
  3784. (
  3785. FXMVECTOR Y,
  3786. FXMVECTOR X
  3787. )
  3788. {
  3789. #if defined(_XM_NO_INTRINSICS_)
  3790. // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
  3791. // Y == 0 and X is Negative -> Pi with the sign of Y
  3792. // Y == 0 and X is Positive -> 0 with the sign of Y
  3793. // Y != 0 and X == 0 -> Pi / 2 with the sign of Y
  3794. // X == -Infinity and Finite Y > 0 -> Pi with the sign of Y
  3795. // X == +Infinity and Finite Y > 0 -> 0 with the sign of Y
  3796. // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y
  3797. // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
  3798. // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
  3799. // TODO: Return Y / X if the result underflows
  3800. XMVECTOR Reciprocal;
  3801. XMVECTOR V;
  3802. XMVECTOR YSign;
  3803. XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
  3804. XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero;
  3805. XMVECTOR ATanResultValid;
  3806. XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7;
  3807. XMVECTOR Zero;
  3808. XMVECTOR Result;
  3809. static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  3810. Zero = XMVectorZero();
  3811. ATanResultValid = XMVectorTrueInt();
  3812. Pi = XMVectorSplatX(ATan2Constants);
  3813. PiOverTwo = XMVectorSplatY(ATan2Constants);
  3814. PiOverFour = XMVectorSplatZ(ATan2Constants);
  3815. ThreePiOverFour = XMVectorSplatW(ATan2Constants);
  3816. YEqualsZero = XMVectorEqual(Y, Zero);
  3817. XEqualsZero = XMVectorEqual(X, Zero);
  3818. XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
  3819. XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
  3820. YEqualsInfinity = XMVectorIsInfinite(Y);
  3821. XEqualsInfinity = XMVectorIsInfinite(X);
  3822. FiniteYGreaterZero = XMVectorGreater(Y, Zero);
  3823. FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity);
  3824. YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
  3825. Pi = XMVectorOrInt(Pi, YSign);
  3826. PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
  3827. PiOverFour = XMVectorOrInt(PiOverFour, YSign);
  3828. ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
  3829. R1 = XMVectorSelect(Pi, YSign, XIsPositive);
  3830. R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
  3831. R3 = XMVectorSelect(R2, R1, YEqualsZero);
  3832. R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  3833. R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
  3834. R6 = XMVectorSelect(R3, R5, YEqualsInfinity);
  3835. R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero);
  3836. Result = XMVectorSelect(R6, R7, XEqualsInfinity);
  3837. ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
  3838. Reciprocal = XMVectorReciprocal(X);
  3839. V = XMVectorMultiply(Y, Reciprocal);
  3840. R0 = XMVectorATan(V);
  3841. Result = XMVectorSelect(Result, R0, ATanResultValid);
  3842. return Result;
  3843. #elif defined(_XM_SSE_INTRINSICS_)
  3844. static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  3845. // Mask if Y>0 && Y!=INF
  3846. XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero);
  3847. XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
  3848. FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero);
  3849. // Get the sign of (Y&0x80000000)
  3850. XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
  3851. // Get the sign bits of X
  3852. XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
  3853. // Change them to masks
  3854. XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
  3855. // Get Pi
  3856. XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]);
  3857. // Copy the sign of Y
  3858. R1 = _mm_or_ps(R1,YSign);
  3859. R1 = XMVectorSelect(R1,YSign,XIsPositive);
  3860. // Mask for X==0
  3861. XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
  3862. // Get Pi/2 with with sign of Y
  3863. XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
  3864. PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
  3865. XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
  3866. // Mask for Y==0
  3867. vConstants = _mm_cmpeq_ps(Y,g_XMZero);
  3868. R2 = XMVectorSelect(R2,R1,vConstants);
  3869. // Get Pi/4 with sign of Y
  3870. XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
  3871. PiOverFour = _mm_or_ps(PiOverFour,YSign);
  3872. // Get (Pi*3)/4 with sign of Y
  3873. XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
  3874. ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
  3875. vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  3876. XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
  3877. vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
  3878. XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
  3879. vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero);
  3880. // At this point, any entry that's zero will get the result
  3881. // from XMVectorATan(), otherwise, return the failsafe value
  3882. vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
  3883. // Any entries not 0xFFFFFFFF, are considered precalculated
  3884. XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
  3885. // Let's do the ATan2 function
  3886. vConstants = _mm_div_ps(Y,X);
  3887. vConstants = XMVectorATan(vConstants);
  3888. // Discard entries that have been declared void
  3889. vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
  3890. return vResult;
  3891. #else // _XM_VMX128_INTRINSICS_
  3892. #endif // _XM_VMX128_INTRINSICS_
  3893. }
  3894. //------------------------------------------------------------------------------
  3895. XMFINLINE XMVECTOR XMVectorSinEst
  3896. (
  3897. FXMVECTOR V
  3898. )
  3899. {
  3900. #if defined(_XM_NO_INTRINSICS_)
  3901. XMVECTOR V2, V3, V5, V7;
  3902. XMVECTOR S1, S2, S3;
  3903. XMVECTOR Result;
  3904. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  3905. V2 = XMVectorMultiply(V, V);
  3906. V3 = XMVectorMultiply(V2, V);
  3907. V5 = XMVectorMultiply(V3, V2);
  3908. V7 = XMVectorMultiply(V5, V2);
  3909. S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
  3910. S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
  3911. S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
  3912. Result = XMVectorMultiplyAdd(S1, V3, V);
  3913. Result = XMVectorMultiplyAdd(S2, V5, Result);
  3914. Result = XMVectorMultiplyAdd(S3, V7, Result);
  3915. return Result;
  3916. #elif defined(_XM_SSE_INTRINSICS_)
  3917. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  3918. XMVECTOR V2 = _mm_mul_ps(V,V);
  3919. XMVECTOR V3 = _mm_mul_ps(V2,V);
  3920. XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
  3921. vResult = _mm_mul_ps(vResult,V3);
  3922. vResult = _mm_add_ps(vResult,V);
  3923. XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
  3924. // V^5
  3925. V3 = _mm_mul_ps(V3,V2);
  3926. vConstants = _mm_mul_ps(vConstants,V3);
  3927. vResult = _mm_add_ps(vResult,vConstants);
  3928. vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
  3929. // V^7
  3930. V3 = _mm_mul_ps(V3,V2);
  3931. vConstants = _mm_mul_ps(vConstants,V3);
  3932. vResult = _mm_add_ps(vResult,vConstants);
  3933. return vResult;
  3934. #else // _XM_VMX128_INTRINSICS_
  3935. #endif // _XM_VMX128_INTRINSICS_
  3936. }
  3937. //------------------------------------------------------------------------------
  3938. XMFINLINE XMVECTOR XMVectorCosEst
  3939. (
  3940. FXMVECTOR V
  3941. )
  3942. {
  3943. #if defined(_XM_NO_INTRINSICS_)
  3944. XMVECTOR V2, V4, V6;
  3945. XMVECTOR C0, C1, C2, C3;
  3946. XMVECTOR Result;
  3947. V2 = XMVectorMultiply(V, V);
  3948. V4 = XMVectorMultiply(V2, V2);
  3949. V6 = XMVectorMultiply(V4, V2);
  3950. C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
  3951. C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
  3952. C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
  3953. C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
  3954. Result = XMVectorMultiplyAdd(C1, V2, C0);
  3955. Result = XMVectorMultiplyAdd(C2, V4, Result);
  3956. Result = XMVectorMultiplyAdd(C3, V6, Result);
  3957. return Result;
  3958. #elif defined(_XM_SSE_INTRINSICS_)
  3959. // Get V^2
  3960. XMVECTOR V2 = _mm_mul_ps(V,V);
  3961. XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
  3962. vResult = _mm_mul_ps(vResult,V2);
  3963. XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
  3964. vResult = _mm_add_ps(vResult,vConstants);
  3965. vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
  3966. // Get V^4
  3967. XMVECTOR V4 = _mm_mul_ps(V2, V2);
  3968. vConstants = _mm_mul_ps(vConstants,V4);
  3969. vResult = _mm_add_ps(vResult,vConstants);
  3970. vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
  3971. // It's really V^6
  3972. V4 = _mm_mul_ps(V4,V2);
  3973. vConstants = _mm_mul_ps(vConstants,V4);
  3974. vResult = _mm_add_ps(vResult,vConstants);
  3975. return vResult;
  3976. #else // _XM_VMX128_INTRINSICS_
  3977. #endif // _XM_VMX128_INTRINSICS_
  3978. }
  3979. //------------------------------------------------------------------------------
  3980. XMFINLINE VOID XMVectorSinCosEst
  3981. (
  3982. XMVECTOR* pSin,
  3983. XMVECTOR* pCos,
  3984. FXMVECTOR V
  3985. )
  3986. {
  3987. #if defined(_XM_NO_INTRINSICS_)
  3988. XMVECTOR V2, V3, V4, V5, V6, V7;
  3989. XMVECTOR S1, S2, S3;
  3990. XMVECTOR C0, C1, C2, C3;
  3991. XMVECTOR Sin, Cos;
  3992. XMASSERT(pSin);
  3993. XMASSERT(pCos);
  3994. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  3995. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  3996. V2 = XMVectorMultiply(V, V);
  3997. V3 = XMVectorMultiply(V2, V);
  3998. V4 = XMVectorMultiply(V2, V2);
  3999. V5 = XMVectorMultiply(V3, V2);
  4000. V6 = XMVectorMultiply(V3, V3);
  4001. V7 = XMVectorMultiply(V4, V3);
  4002. S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
  4003. S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
  4004. S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
  4005. C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
  4006. C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
  4007. C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
  4008. C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
  4009. Sin = XMVectorMultiplyAdd(S1, V3, V);
  4010. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  4011. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  4012. Cos = XMVectorMultiplyAdd(C1, V2, C0);
  4013. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  4014. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  4015. *pSin = Sin;
  4016. *pCos = Cos;
  4017. #elif defined(_XM_SSE_INTRINSICS_)
  4018. XMASSERT(pSin);
  4019. XMASSERT(pCos);
  4020. XMVECTOR V2, V3, V4, V5, V6, V7;
  4021. XMVECTOR S1, S2, S3;
  4022. XMVECTOR C0, C1, C2, C3;
  4023. XMVECTOR Sin, Cos;
  4024. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  4025. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  4026. V2 = XMVectorMultiply(V, V);
  4027. V3 = XMVectorMultiply(V2, V);
  4028. V4 = XMVectorMultiply(V2, V2);
  4029. V5 = XMVectorMultiply(V3, V2);
  4030. V6 = XMVectorMultiply(V3, V3);
  4031. V7 = XMVectorMultiply(V4, V3);
  4032. S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
  4033. S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
  4034. S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
  4035. C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
  4036. C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
  4037. C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
  4038. C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
  4039. Sin = XMVectorMultiplyAdd(S1, V3, V);
  4040. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  4041. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  4042. Cos = XMVectorMultiplyAdd(C1, V2, C0);
  4043. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  4044. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  4045. *pSin = Sin;
  4046. *pCos = Cos;
  4047. #else // _XM_VMX128_INTRINSICS_
  4048. #endif // _XM_VMX128_INTRINSICS_
  4049. }
  4050. //------------------------------------------------------------------------------
  4051. XMFINLINE XMVECTOR XMVectorTanEst
  4052. (
  4053. FXMVECTOR V
  4054. )
  4055. {
  4056. #if defined(_XM_NO_INTRINSICS_)
  4057. XMVECTOR V1, V2, V1T0, V1T1, V2T2;
  4058. XMVECTOR T0, T1, T2;
  4059. XMVECTOR N, D;
  4060. XMVECTOR OneOverPi;
  4061. XMVECTOR Result;
  4062. OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
  4063. V1 = XMVectorMultiply(V, OneOverPi);
  4064. V1 = XMVectorRound(V1);
  4065. V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
  4066. T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
  4067. T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
  4068. T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
  4069. V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
  4070. V2 = XMVectorMultiply(V1, V1);
  4071. V1T0 = XMVectorMultiply(V1, T0);
  4072. V1T1 = XMVectorMultiply(V1, T1);
  4073. D = XMVectorReciprocalEst(V2T2);
  4074. N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
  4075. Result = XMVectorMultiply(N, D);
  4076. return Result;
  4077. #elif defined(_XM_SSE_INTRINSICS_)
  4078. XMVECTOR V1, V2, V1T0, V1T1, V2T2;
  4079. XMVECTOR T0, T1, T2;
  4080. XMVECTOR N, D;
  4081. XMVECTOR OneOverPi;
  4082. XMVECTOR Result;
  4083. OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients);
  4084. V1 = XMVectorMultiply(V, OneOverPi);
  4085. V1 = XMVectorRound(V1);
  4086. V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V);
  4087. T0 = XMVectorSplatX(g_XMTanEstCoefficients);
  4088. T1 = XMVectorSplatY(g_XMTanEstCoefficients);
  4089. T2 = XMVectorSplatZ(g_XMTanEstCoefficients);
  4090. V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
  4091. V2 = XMVectorMultiply(V1, V1);
  4092. V1T0 = XMVectorMultiply(V1, T0);
  4093. V1T1 = XMVectorMultiply(V1, T1);
  4094. D = XMVectorReciprocalEst(V2T2);
  4095. N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
  4096. Result = XMVectorMultiply(N, D);
  4097. return Result;
  4098. #else // _XM_VMX128_INTRINSICS_
  4099. #endif // _XM_VMX128_INTRINSICS_
  4100. }
  4101. //------------------------------------------------------------------------------
  4102. XMFINLINE XMVECTOR XMVectorSinHEst
  4103. (
  4104. FXMVECTOR V
  4105. )
  4106. {
  4107. #if defined(_XM_NO_INTRINSICS_)
  4108. XMVECTOR V1, V2;
  4109. XMVECTOR E1, E2;
  4110. XMVECTOR Result;
  4111. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4112. V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
  4113. V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
  4114. E1 = XMVectorExpEst(V1);
  4115. E2 = XMVectorExpEst(V2);
  4116. Result = XMVectorSubtract(E1, E2);
  4117. return Result;
  4118. #elif defined(_XM_SSE_INTRINSICS_)
  4119. XMVECTOR V1, V2;
  4120. XMVECTOR E1, E2;
  4121. XMVECTOR Result;
  4122. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4123. V1 = _mm_mul_ps(V,Scale);
  4124. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  4125. V2 = _mm_mul_ps(V,Scale);
  4126. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  4127. E1 = XMVectorExpEst(V1);
  4128. E2 = XMVectorExpEst(V2);
  4129. Result = _mm_sub_ps(E1, E2);
  4130. return Result;
  4131. #else // _XM_VMX128_INTRINSICS_
  4132. #endif // _XM_VMX128_INTRINSICS_
  4133. }
  4134. //------------------------------------------------------------------------------
  4135. XMFINLINE XMVECTOR XMVectorCosHEst
  4136. (
  4137. FXMVECTOR V
  4138. )
  4139. {
  4140. #if defined(_XM_NO_INTRINSICS_)
  4141. XMVECTOR V1, V2;
  4142. XMVECTOR E1, E2;
  4143. XMVECTOR Result;
  4144. static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4145. V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
  4146. V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
  4147. E1 = XMVectorExpEst(V1);
  4148. E2 = XMVectorExpEst(V2);
  4149. Result = XMVectorAdd(E1, E2);
  4150. return Result;
  4151. #elif defined(_XM_SSE_INTRINSICS_)
  4152. XMVECTOR V1, V2;
  4153. XMVECTOR E1, E2;
  4154. XMVECTOR Result;
  4155. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4156. V1 = _mm_mul_ps(V,Scale);
  4157. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  4158. V2 = _mm_mul_ps(V, Scale);
  4159. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  4160. E1 = XMVectorExpEst(V1);
  4161. E2 = XMVectorExpEst(V2);
  4162. Result = _mm_add_ps(E1, E2);
  4163. return Result;
  4164. #else // _XM_VMX128_INTRINSICS_
  4165. #endif // _XM_VMX128_INTRINSICS_
  4166. }
  4167. //------------------------------------------------------------------------------
  4168. XMFINLINE XMVECTOR XMVectorTanHEst
  4169. (
  4170. FXMVECTOR V
  4171. )
  4172. {
  4173. #if defined(_XM_NO_INTRINSICS_)
  4174. XMVECTOR E;
  4175. XMVECTOR Result;
  4176. static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  4177. E = XMVectorMultiply(V, Scale);
  4178. E = XMVectorExpEst(E);
  4179. E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
  4180. E = XMVectorReciprocalEst(E);
  4181. Result = XMVectorSubtract(g_XMOne.v, E);
  4182. return Result;
  4183. #elif defined(_XM_SSE_INTRINSICS_)
  4184. static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  4185. XMVECTOR E = _mm_mul_ps(V, Scale);
  4186. E = XMVectorExpEst(E);
  4187. E = _mm_mul_ps(E,g_XMOneHalf);
  4188. E = _mm_add_ps(E,g_XMOneHalf);
  4189. E = XMVectorReciprocalEst(E);
  4190. E = _mm_sub_ps(g_XMOne, E);
  4191. return E;
  4192. #else // _XM_VMX128_INTRINSICS_
  4193. #endif // _XM_VMX128_INTRINSICS_
  4194. }
  4195. //------------------------------------------------------------------------------
  4196. XMFINLINE XMVECTOR XMVectorASinEst
  4197. (
  4198. FXMVECTOR V
  4199. )
  4200. {
  4201. #if defined(_XM_NO_INTRINSICS_)
  4202. XMVECTOR AbsV, V2, VD, VC0, V2C3;
  4203. XMVECTOR C0, C1, C2, C3;
  4204. XMVECTOR D, Rsq, SqrtD;
  4205. XMVECTOR OnePlusEps;
  4206. XMVECTOR Result;
  4207. AbsV = XMVectorAbs(V);
  4208. OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
  4209. C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
  4210. C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
  4211. C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
  4212. C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
  4213. D = XMVectorSubtract(OnePlusEps, AbsV);
  4214. Rsq = XMVectorReciprocalSqrtEst(D);
  4215. SqrtD = XMVectorMultiply(D, Rsq);
  4216. V2 = XMVectorMultiply(V, AbsV);
  4217. V2C3 = XMVectorMultiply(V2, C3);
  4218. VD = XMVectorMultiply(D, AbsV);
  4219. VC0 = XMVectorMultiply(V, C0);
  4220. Result = XMVectorMultiply(V, C1);
  4221. Result = XMVectorMultiplyAdd(V2, C2, Result);
  4222. Result = XMVectorMultiplyAdd(V2C3, VD, Result);
  4223. Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
  4224. return Result;
  4225. #elif defined(_XM_SSE_INTRINSICS_)
  4226. // Get abs(V)
  4227. XMVECTOR vAbsV = _mm_setzero_ps();
  4228. vAbsV = _mm_sub_ps(vAbsV,V);
  4229. vAbsV = _mm_max_ps(vAbsV,V);
  4230. XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
  4231. D = _mm_sub_ps(D,vAbsV);
  4232. // Since this is an estimate, rqsrt is okay
  4233. XMVECTOR vConstants = _mm_rsqrt_ps(D);
  4234. XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
  4235. // V2 = V^2 retaining sign
  4236. XMVECTOR V2 = _mm_mul_ps(V,vAbsV);
  4237. D = _mm_mul_ps(D,vAbsV);
  4238. XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
  4239. vResult = _mm_mul_ps(vResult,V);
  4240. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
  4241. vConstants = _mm_mul_ps(vConstants,V2);
  4242. vResult = _mm_add_ps(vResult,vConstants);
  4243. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
  4244. vConstants = _mm_mul_ps(vConstants,V2);
  4245. vConstants = _mm_mul_ps(vConstants,D);
  4246. vResult = _mm_add_ps(vResult,vConstants);
  4247. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
  4248. vConstants = _mm_mul_ps(vConstants,V);
  4249. vConstants = _mm_mul_ps(vConstants,SqrtD);
  4250. vResult = _mm_add_ps(vResult,vConstants);
  4251. return vResult;
  4252. #else // _XM_VMX128_INTRINSICS_
  4253. #endif // _XM_VMX128_INTRINSICS_
  4254. }
  4255. //------------------------------------------------------------------------------
  4256. XMFINLINE XMVECTOR XMVectorACosEst
  4257. (
  4258. FXMVECTOR V
  4259. )
  4260. {
  4261. #if defined(_XM_NO_INTRINSICS_)
  4262. XMVECTOR AbsV, V2, VD, VC0, V2C3;
  4263. XMVECTOR C0, C1, C2, C3;
  4264. XMVECTOR D, Rsq, SqrtD;
  4265. XMVECTOR OnePlusEps, HalfPi;
  4266. XMVECTOR Result;
  4267. // acos(V) = PI / 2 - asin(V)
  4268. AbsV = XMVectorAbs(V);
  4269. OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
  4270. HalfPi = XMVectorSplatY(g_XMASinEstConstants.v);
  4271. C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
  4272. C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
  4273. C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
  4274. C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
  4275. D = XMVectorSubtract(OnePlusEps, AbsV);
  4276. Rsq = XMVectorReciprocalSqrtEst(D);
  4277. SqrtD = XMVectorMultiply(D, Rsq);
  4278. V2 = XMVectorMultiply(V, AbsV);
  4279. V2C3 = XMVectorMultiply(V2, C3);
  4280. VD = XMVectorMultiply(D, AbsV);
  4281. VC0 = XMVectorMultiply(V, C0);
  4282. Result = XMVectorMultiply(V, C1);
  4283. Result = XMVectorMultiplyAdd(V2, C2, Result);
  4284. Result = XMVectorMultiplyAdd(V2C3, VD, Result);
  4285. Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
  4286. Result = XMVectorSubtract(HalfPi, Result);
  4287. return Result;
  4288. #elif defined(_XM_SSE_INTRINSICS_)
  4289. // acos(V) = PI / 2 - asin(V)
  4290. // Get abs(V)
  4291. XMVECTOR vAbsV = _mm_setzero_ps();
  4292. vAbsV = _mm_sub_ps(vAbsV,V);
  4293. vAbsV = _mm_max_ps(vAbsV,V);
  4294. // Calc D
  4295. XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
  4296. D = _mm_sub_ps(D,vAbsV);
  4297. // SqrtD = sqrt(D-abs(V)) estimated
  4298. XMVECTOR vConstants = _mm_rsqrt_ps(D);
  4299. XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
  4300. // V2 = V^2 while retaining sign
  4301. XMVECTOR V2 = _mm_mul_ps(V, vAbsV);
  4302. // Drop vAbsV here. D = (Const-abs(V))*abs(V)
  4303. D = _mm_mul_ps(D, vAbsV);
  4304. XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
  4305. vResult = _mm_mul_ps(vResult,V);
  4306. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
  4307. vConstants = _mm_mul_ps(vConstants,V2);
  4308. vResult = _mm_add_ps(vResult,vConstants);
  4309. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
  4310. vConstants = _mm_mul_ps(vConstants,V2);
  4311. vConstants = _mm_mul_ps(vConstants,D);
  4312. vResult = _mm_add_ps(vResult,vConstants);
  4313. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
  4314. vConstants = _mm_mul_ps(vConstants,V);
  4315. vConstants = _mm_mul_ps(vConstants,SqrtD);
  4316. vResult = _mm_add_ps(vResult,vConstants);
  4317. vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]);
  4318. vResult = _mm_sub_ps(vConstants,vResult);
  4319. return vResult;
  4320. #else // _XM_VMX128_INTRINSICS_
  4321. #endif // _XM_VMX128_INTRINSICS_
  4322. }
  4323. //------------------------------------------------------------------------------
  4324. XMFINLINE XMVECTOR XMVectorATanEst
  4325. (
  4326. FXMVECTOR V
  4327. )
  4328. {
  4329. #if defined(_XM_NO_INTRINSICS_)
  4330. XMVECTOR AbsV, V2S2, N, D;
  4331. XMVECTOR S0, S1, S2;
  4332. XMVECTOR HalfPi;
  4333. XMVECTOR Result;
  4334. S0 = XMVectorSplatX(g_XMATanEstCoefficients.v);
  4335. S1 = XMVectorSplatY(g_XMATanEstCoefficients.v);
  4336. S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v);
  4337. HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v);
  4338. AbsV = XMVectorAbs(V);
  4339. V2S2 = XMVectorMultiplyAdd(V, V, S2);
  4340. N = XMVectorMultiplyAdd(AbsV, HalfPi, S0);
  4341. D = XMVectorMultiplyAdd(AbsV, S1, V2S2);
  4342. N = XMVectorMultiply(N, V);
  4343. D = XMVectorReciprocalEst(D);
  4344. Result = XMVectorMultiply(N, D);
  4345. return Result;
  4346. #elif defined(_XM_SSE_INTRINSICS_)
  4347. // Get abs(V)
  4348. XMVECTOR vAbsV = _mm_setzero_ps();
  4349. vAbsV = _mm_sub_ps(vAbsV,V);
  4350. vAbsV = _mm_max_ps(vAbsV,V);
  4351. XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]);
  4352. vResult = _mm_mul_ps(vResult,vAbsV);
  4353. XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]);
  4354. vResult = _mm_add_ps(vResult,vConstants);
  4355. vResult = _mm_mul_ps(vResult,V);
  4356. XMVECTOR D = _mm_mul_ps(V,V);
  4357. vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]);
  4358. D = _mm_add_ps(D,vConstants);
  4359. vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]);
  4360. vConstants = _mm_mul_ps(vConstants,vAbsV);
  4361. D = _mm_add_ps(D,vConstants);
  4362. vResult = _mm_div_ps(vResult,D);
  4363. return vResult;
  4364. #else // _XM_VMX128_INTRINSICS_
  4365. #endif // _XM_VMX128_INTRINSICS_
  4366. }
  4367. //------------------------------------------------------------------------------
  4368. XMFINLINE XMVECTOR XMVectorATan2Est
  4369. (
  4370. FXMVECTOR Y,
  4371. FXMVECTOR X
  4372. )
  4373. {
  4374. #if defined(_XM_NO_INTRINSICS_)
  4375. XMVECTOR Reciprocal;
  4376. XMVECTOR V;
  4377. XMVECTOR YSign;
  4378. XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
  4379. XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity, FiniteYGreaterZero;
  4380. XMVECTOR ATanResultValid;
  4381. XMVECTOR R0, R1, R2, R3, R4, R5, R6, R7;
  4382. XMVECTOR Zero;
  4383. XMVECTOR Result;
  4384. static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  4385. Zero = XMVectorZero();
  4386. ATanResultValid = XMVectorTrueInt();
  4387. Pi = XMVectorSplatX(ATan2Constants);
  4388. PiOverTwo = XMVectorSplatY(ATan2Constants);
  4389. PiOverFour = XMVectorSplatZ(ATan2Constants);
  4390. ThreePiOverFour = XMVectorSplatW(ATan2Constants);
  4391. YEqualsZero = XMVectorEqual(Y, Zero);
  4392. XEqualsZero = XMVectorEqual(X, Zero);
  4393. XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
  4394. XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
  4395. YEqualsInfinity = XMVectorIsInfinite(Y);
  4396. XEqualsInfinity = XMVectorIsInfinite(X);
  4397. FiniteYGreaterZero = XMVectorGreater(Y, Zero);
  4398. FiniteYGreaterZero = XMVectorSelect(FiniteYGreaterZero, Zero, YEqualsInfinity);
  4399. YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
  4400. Pi = XMVectorOrInt(Pi, YSign);
  4401. PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
  4402. PiOverFour = XMVectorOrInt(PiOverFour, YSign);
  4403. ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
  4404. R1 = XMVectorSelect(Pi, YSign, XIsPositive);
  4405. R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
  4406. R3 = XMVectorSelect(R2, R1, YEqualsZero);
  4407. R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  4408. R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
  4409. R6 = XMVectorSelect(R3, R5, YEqualsInfinity);
  4410. R7 = XMVectorSelect(R6, R1, FiniteYGreaterZero);
  4411. Result = XMVectorSelect(R6, R7, XEqualsInfinity);
  4412. ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
  4413. Reciprocal = XMVectorReciprocalEst(X);
  4414. V = XMVectorMultiply(Y, Reciprocal);
  4415. R0 = XMVectorATanEst(V);
  4416. Result = XMVectorSelect(Result, R0, ATanResultValid);
  4417. return Result;
  4418. #elif defined(_XM_SSE_INTRINSICS_)
  4419. static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  4420. // Mask if Y>0 && Y!=INF
  4421. XMVECTOR FiniteYGreaterZero = _mm_cmpgt_ps(Y,g_XMZero);
  4422. XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
  4423. FiniteYGreaterZero = _mm_andnot_ps(YEqualsInfinity,FiniteYGreaterZero);
  4424. // Get the sign of (Y&0x80000000)
  4425. XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
  4426. // Get the sign bits of X
  4427. XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
  4428. // Change them to masks
  4429. XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
  4430. // Get Pi
  4431. XMVECTOR R1 = _mm_load_ps1(&ATan2Constants.f[0]);
  4432. // Copy the sign of Y
  4433. R1 = _mm_or_ps(R1,YSign);
  4434. R1 = XMVectorSelect(R1,YSign,XIsPositive);
  4435. // Mask for X==0
  4436. XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
  4437. // Get Pi/2 with with sign of Y
  4438. XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
  4439. PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
  4440. XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
  4441. // Mask for Y==0
  4442. vConstants = _mm_cmpeq_ps(Y,g_XMZero);
  4443. R2 = XMVectorSelect(R2,R1,vConstants);
  4444. // Get Pi/4 with sign of Y
  4445. XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
  4446. PiOverFour = _mm_or_ps(PiOverFour,YSign);
  4447. // Get (Pi*3)/4 with sign of Y
  4448. XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
  4449. ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
  4450. vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  4451. XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
  4452. vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
  4453. XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
  4454. vConstants = XMVectorSelect(vResult,R1,FiniteYGreaterZero);
  4455. // At this point, any entry that's zero will get the result
  4456. // from XMVectorATan(), otherwise, return the failsafe value
  4457. vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
  4458. // Any entries not 0xFFFFFFFF, are considered precalculated
  4459. XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
  4460. // Let's do the ATan2 function
  4461. vConstants = _mm_div_ps(Y,X);
  4462. vConstants = XMVectorATanEst(vConstants);
  4463. // Discard entries that have been declared void
  4464. vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
  4465. return vResult;
  4466. #else // _XM_VMX128_INTRINSICS_
  4467. #endif // _XM_VMX128_INTRINSICS_
  4468. }
  4469. //------------------------------------------------------------------------------
  4470. XMFINLINE XMVECTOR XMVectorLerp
  4471. (
  4472. FXMVECTOR V0,
  4473. FXMVECTOR V1,
  4474. FLOAT t
  4475. )
  4476. {
  4477. #if defined(_XM_NO_INTRINSICS_)
  4478. XMVECTOR Scale;
  4479. XMVECTOR Length;
  4480. XMVECTOR Result;
  4481. // V0 + t * (V1 - V0)
  4482. Scale = XMVectorReplicate(t);
  4483. Length = XMVectorSubtract(V1, V0);
  4484. Result = XMVectorMultiplyAdd(Length, Scale, V0);
  4485. return Result;
  4486. #elif defined(_XM_SSE_INTRINSICS_)
  4487. XMVECTOR L, S;
  4488. XMVECTOR Result;
  4489. L = _mm_sub_ps( V1, V0 );
  4490. S = _mm_set_ps1( t );
  4491. Result = _mm_mul_ps( L, S );
  4492. return _mm_add_ps( Result, V0 );
  4493. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4494. #endif // _XM_VMX128_INTRINSICS_
  4495. }
  4496. //------------------------------------------------------------------------------
  4497. XMFINLINE XMVECTOR XMVectorLerpV
  4498. (
  4499. FXMVECTOR V0,
  4500. FXMVECTOR V1,
  4501. FXMVECTOR T
  4502. )
  4503. {
  4504. #if defined(_XM_NO_INTRINSICS_)
  4505. XMVECTOR Length;
  4506. XMVECTOR Result;
  4507. // V0 + T * (V1 - V0)
  4508. Length = XMVectorSubtract(V1, V0);
  4509. Result = XMVectorMultiplyAdd(Length, T, V0);
  4510. return Result;
  4511. #elif defined(_XM_SSE_INTRINSICS_)
  4512. XMVECTOR Length;
  4513. XMVECTOR Result;
  4514. Length = _mm_sub_ps( V1, V0 );
  4515. Result = _mm_mul_ps( Length, T );
  4516. return _mm_add_ps( Result, V0 );
  4517. #else // _XM_VMX128_INTRINSICS_
  4518. #endif // _XM_VMX128_INTRINSICS_
  4519. }
  4520. //------------------------------------------------------------------------------
  4521. XMFINLINE XMVECTOR XMVectorHermite
  4522. (
  4523. FXMVECTOR Position0,
  4524. FXMVECTOR Tangent0,
  4525. FXMVECTOR Position1,
  4526. CXMVECTOR Tangent1,
  4527. FLOAT t
  4528. )
  4529. {
  4530. #if defined(_XM_NO_INTRINSICS_)
  4531. XMVECTOR P0;
  4532. XMVECTOR T0;
  4533. XMVECTOR P1;
  4534. XMVECTOR T1;
  4535. XMVECTOR Result;
  4536. FLOAT t2;
  4537. FLOAT t3;
  4538. // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
  4539. // (t^3 - 2 * t^2 + t) * Tangent0 +
  4540. // (-2 * t^3 + 3 * t^2) * Position1 +
  4541. // (t^3 - t^2) * Tangent1
  4542. t2 = t * t;
  4543. t3 = t * t2;
  4544. P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
  4545. T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
  4546. P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
  4547. T1 = XMVectorReplicate(t3 - t2);
  4548. Result = XMVectorMultiply(P0, Position0);
  4549. Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
  4550. Result = XMVectorMultiplyAdd(P1, Position1, Result);
  4551. Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
  4552. return Result;
  4553. #elif defined(_XM_SSE_INTRINSICS_)
  4554. FLOAT t2 = t * t;
  4555. FLOAT t3 = t * t2;
  4556. XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
  4557. XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
  4558. XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
  4559. XMVECTOR T1 = _mm_set_ps1(t3 - t2);
  4560. XMVECTOR vResult = _mm_mul_ps(P0, Position0);
  4561. XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
  4562. vResult = _mm_add_ps(vResult,vTemp);
  4563. vTemp = _mm_mul_ps(P1, Position1);
  4564. vResult = _mm_add_ps(vResult,vTemp);
  4565. vTemp = _mm_mul_ps(T1, Tangent1);
  4566. vResult = _mm_add_ps(vResult,vTemp);
  4567. return vResult;
  4568. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4569. #endif // _XM_VMX128_INTRINSICS_
  4570. }
  4571. //------------------------------------------------------------------------------
  4572. XMFINLINE XMVECTOR XMVectorHermiteV
  4573. (
  4574. FXMVECTOR Position0,
  4575. FXMVECTOR Tangent0,
  4576. FXMVECTOR Position1,
  4577. CXMVECTOR Tangent1,
  4578. CXMVECTOR T
  4579. )
  4580. {
  4581. #if defined(_XM_NO_INTRINSICS_)
  4582. XMVECTOR P0;
  4583. XMVECTOR T0;
  4584. XMVECTOR P1;
  4585. XMVECTOR T1;
  4586. XMVECTOR Result;
  4587. XMVECTOR T2;
  4588. XMVECTOR T3;
  4589. // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
  4590. // (t^3 - 2 * t^2 + t) * Tangent0 +
  4591. // (-2 * t^3 + 3 * t^2) * Position1 +
  4592. // (t^3 - t^2) * Tangent1
  4593. T2 = XMVectorMultiply(T, T);
  4594. T3 = XMVectorMultiply(T , T2);
  4595. P0 = XMVectorReplicate(2.0f * T3.v[0] - 3.0f * T2.v[0] + 1.0f);
  4596. T0 = XMVectorReplicate(T3.v[1] - 2.0f * T2.v[1] + T.v[1]);
  4597. P1 = XMVectorReplicate(-2.0f * T3.v[2] + 3.0f * T2.v[2]);
  4598. T1 = XMVectorReplicate(T3.v[3] - T2.v[3]);
  4599. Result = XMVectorMultiply(P0, Position0);
  4600. Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
  4601. Result = XMVectorMultiplyAdd(P1, Position1, Result);
  4602. Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
  4603. return Result;
  4604. #elif defined(_XM_SSE_INTRINSICS_)
  4605. static const XMVECTORF32 g_XMCatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
  4606. static const XMVECTORF32 g_XMCatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
  4607. // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
  4608. // (t^3 - 2 * t^2 + t) * Tangent0 +
  4609. // (-2 * t^3 + 3 * t^2) * Position1 +
  4610. // (t^3 - t^2) * Tangent1
  4611. XMVECTOR T2 = _mm_mul_ps(T,T);
  4612. XMVECTOR T3 = _mm_mul_ps(T,T2);
  4613. // Mul by the constants against t^2
  4614. T2 = _mm_mul_ps(T2,g_XMCatMulT2);
  4615. // Mul by the constants against t^3
  4616. T3 = _mm_mul_ps(T3,g_XMCatMulT3);
  4617. // T3 now has the pre-result.
  4618. T3 = _mm_add_ps(T3,T2);
  4619. // I need to add t.y only
  4620. T2 = _mm_and_ps(T,g_XMMaskY);
  4621. T3 = _mm_add_ps(T3,T2);
  4622. // Add 1.0f to x
  4623. T3 = _mm_add_ps(T3,g_XMIdentityR0);
  4624. // Now, I have the constants created
  4625. // Mul the x constant to Position0
  4626. XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0));
  4627. vResult = _mm_mul_ps(vResult,Position0);
  4628. // Mul the y constant to Tangent0
  4629. T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1));
  4630. T2 = _mm_mul_ps(T2,Tangent0);
  4631. vResult = _mm_add_ps(vResult,T2);
  4632. // Mul the z constant to Position1
  4633. T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2));
  4634. T2 = _mm_mul_ps(T2,Position1);
  4635. vResult = _mm_add_ps(vResult,T2);
  4636. // Mul the w constant to Tangent1
  4637. T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3));
  4638. T3 = _mm_mul_ps(T3,Tangent1);
  4639. vResult = _mm_add_ps(vResult,T3);
  4640. return vResult;
  4641. #else // _XM_VMX128_INTRINSICS_
  4642. #endif // _XM_VMX128_INTRINSICS_
  4643. }
  4644. //------------------------------------------------------------------------------
  4645. XMFINLINE XMVECTOR XMVectorCatmullRom
  4646. (
  4647. FXMVECTOR Position0,
  4648. FXMVECTOR Position1,
  4649. FXMVECTOR Position2,
  4650. CXMVECTOR Position3,
  4651. FLOAT t
  4652. )
  4653. {
  4654. #if defined(_XM_NO_INTRINSICS_)
  4655. XMVECTOR P0;
  4656. XMVECTOR P1;
  4657. XMVECTOR P2;
  4658. XMVECTOR P3;
  4659. XMVECTOR Result;
  4660. FLOAT t2;
  4661. FLOAT t3;
  4662. // Result = ((-t^3 + 2 * t^2 - t) * Position0 +
  4663. // (3 * t^3 - 5 * t^2 + 2) * Position1 +
  4664. // (-3 * t^3 + 4 * t^2 + t) * Position2 +
  4665. // (t^3 - t^2) * Position3) * 0.5
  4666. t2 = t * t;
  4667. t3 = t * t2;
  4668. P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
  4669. P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
  4670. P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
  4671. P3 = XMVectorReplicate((t3 - t2) * 0.5f);
  4672. Result = XMVectorMultiply(P0, Position0);
  4673. Result = XMVectorMultiplyAdd(P1, Position1, Result);
  4674. Result = XMVectorMultiplyAdd(P2, Position2, Result);
  4675. Result = XMVectorMultiplyAdd(P3, Position3, Result);
  4676. return Result;
  4677. #elif defined(_XM_SSE_INTRINSICS_)
  4678. FLOAT t2 = t * t;
  4679. FLOAT t3 = t * t2;
  4680. XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
  4681. XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
  4682. XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
  4683. XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
  4684. P0 = _mm_mul_ps(P0, Position0);
  4685. P1 = _mm_mul_ps(P1, Position1);
  4686. P2 = _mm_mul_ps(P2, Position2);
  4687. P3 = _mm_mul_ps(P3, Position3);
  4688. P0 = _mm_add_ps(P0,P1);
  4689. P2 = _mm_add_ps(P2,P3);
  4690. P0 = _mm_add_ps(P0,P2);
  4691. return P0;
  4692. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4693. #endif // _XM_VMX128_INTRINSICS_
  4694. }
  4695. //------------------------------------------------------------------------------
  4696. XMFINLINE XMVECTOR XMVectorCatmullRomV
  4697. (
  4698. FXMVECTOR Position0,
  4699. FXMVECTOR Position1,
  4700. FXMVECTOR Position2,
  4701. CXMVECTOR Position3,
  4702. CXMVECTOR T
  4703. )
  4704. {
  4705. #if defined(_XM_NO_INTRINSICS_)
  4706. float fx = T.x;
  4707. float fy = T.y;
  4708. float fz = T.z;
  4709. float fw = T.w;
  4710. XMVECTOR vResult = {
  4711. 0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.x+
  4712. (3*fx*fx*fx-5*fx*fx+2)*Position1.x+
  4713. (-3*fx*fx*fx+4*fx*fx+fx)*Position2.x+
  4714. (fx*fx*fx-fx*fx)*Position3.x),
  4715. 0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.y+
  4716. (3*fy*fy*fy-5*fy*fy+2)*Position1.y+
  4717. (-3*fy*fy*fy+4*fy*fy+fy)*Position2.y+
  4718. (fy*fy*fy-fy*fy)*Position3.y),
  4719. 0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.z+
  4720. (3*fz*fz*fz-5*fz*fz+2)*Position1.z+
  4721. (-3*fz*fz*fz+4*fz*fz+fz)*Position2.z+
  4722. (fz*fz*fz-fz*fz)*Position3.z),
  4723. 0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.w+
  4724. (3*fw*fw*fw-5*fw*fw+2)*Position1.w+
  4725. (-3*fw*fw*fw+4*fw*fw+fw)*Position2.w+
  4726. (fw*fw*fw-fw*fw)*Position3.w)
  4727. };
  4728. return vResult;
  4729. #elif defined(_XM_SSE_INTRINSICS_)
  4730. static const XMVECTORF32 g_Catmul2 = {2.0f,2.0f,2.0f,2.0f};
  4731. static const XMVECTORF32 g_Catmul3 = {3.0f,3.0f,3.0f,3.0f};
  4732. static const XMVECTORF32 g_Catmul4 = {4.0f,4.0f,4.0f,4.0f};
  4733. static const XMVECTORF32 g_Catmul5 = {5.0f,5.0f,5.0f,5.0f};
  4734. // Cache T^2 and T^3
  4735. XMVECTOR T2 = _mm_mul_ps(T,T);
  4736. XMVECTOR T3 = _mm_mul_ps(T,T2);
  4737. // Perform the Position0 term
  4738. XMVECTOR vResult = _mm_add_ps(T2,T2);
  4739. vResult = _mm_sub_ps(vResult,T);
  4740. vResult = _mm_sub_ps(vResult,T3);
  4741. vResult = _mm_mul_ps(vResult,Position0);
  4742. // Perform the Position1 term and add
  4743. XMVECTOR vTemp = _mm_mul_ps(T3,g_Catmul3);
  4744. XMVECTOR vTemp2 = _mm_mul_ps(T2,g_Catmul5);
  4745. vTemp = _mm_sub_ps(vTemp,vTemp2);
  4746. vTemp = _mm_add_ps(vTemp,g_Catmul2);
  4747. vTemp = _mm_mul_ps(vTemp,Position1);
  4748. vResult = _mm_add_ps(vResult,vTemp);
  4749. // Perform the Position2 term and add
  4750. vTemp = _mm_mul_ps(T2,g_Catmul4);
  4751. vTemp2 = _mm_mul_ps(T3,g_Catmul3);
  4752. vTemp = _mm_sub_ps(vTemp,vTemp2);
  4753. vTemp = _mm_add_ps(vTemp,T);
  4754. vTemp = _mm_mul_ps(vTemp,Position2);
  4755. vResult = _mm_add_ps(vResult,vTemp);
  4756. // Position3 is the last term
  4757. T3 = _mm_sub_ps(T3,T2);
  4758. T3 = _mm_mul_ps(T3,Position3);
  4759. vResult = _mm_add_ps(vResult,T3);
  4760. // Multiply by 0.5f and exit
  4761. vResult = _mm_mul_ps(vResult,g_XMOneHalf);
  4762. return vResult;
  4763. #else // _XM_VMX128_INTRINSICS_
  4764. #endif // _XM_VMX128_INTRINSICS_
  4765. }
  4766. //------------------------------------------------------------------------------
  4767. XMFINLINE XMVECTOR XMVectorBaryCentric
  4768. (
  4769. FXMVECTOR Position0,
  4770. FXMVECTOR Position1,
  4771. FXMVECTOR Position2,
  4772. FLOAT f,
  4773. FLOAT g
  4774. )
  4775. {
  4776. #if defined(_XM_NO_INTRINSICS_)
  4777. // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
  4778. XMVECTOR P10;
  4779. XMVECTOR P20;
  4780. XMVECTOR ScaleF;
  4781. XMVECTOR ScaleG;
  4782. XMVECTOR Result;
  4783. P10 = XMVectorSubtract(Position1, Position0);
  4784. ScaleF = XMVectorReplicate(f);
  4785. P20 = XMVectorSubtract(Position2, Position0);
  4786. ScaleG = XMVectorReplicate(g);
  4787. Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
  4788. Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
  4789. return Result;
  4790. #elif defined(_XM_SSE_INTRINSICS_)
  4791. XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
  4792. XMVECTOR SF = _mm_set_ps1(f);
  4793. XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
  4794. XMVECTOR SG = _mm_set_ps1(g);
  4795. R1 = _mm_mul_ps(R1,SF);
  4796. R2 = _mm_mul_ps(R2,SG);
  4797. R1 = _mm_add_ps(R1,Position0);
  4798. R1 = _mm_add_ps(R1,R2);
  4799. return R1;
  4800. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4801. #endif // _XM_VMX128_INTRINSICS_
  4802. }
  4803. //------------------------------------------------------------------------------
  4804. XMFINLINE XMVECTOR XMVectorBaryCentricV
  4805. (
  4806. FXMVECTOR Position0,
  4807. FXMVECTOR Position1,
  4808. FXMVECTOR Position2,
  4809. CXMVECTOR F,
  4810. CXMVECTOR G
  4811. )
  4812. {
  4813. #if defined(_XM_NO_INTRINSICS_)
  4814. // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
  4815. XMVECTOR P10;
  4816. XMVECTOR P20;
  4817. XMVECTOR Result;
  4818. P10 = XMVectorSubtract(Position1, Position0);
  4819. P20 = XMVectorSubtract(Position2, Position0);
  4820. Result = XMVectorMultiplyAdd(P10, F, Position0);
  4821. Result = XMVectorMultiplyAdd(P20, G, Result);
  4822. return Result;
  4823. #elif defined(_XM_SSE_INTRINSICS_)
  4824. XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
  4825. XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
  4826. R1 = _mm_mul_ps(R1,F);
  4827. R2 = _mm_mul_ps(R2,G);
  4828. R1 = _mm_add_ps(R1,Position0);
  4829. R1 = _mm_add_ps(R1,R2);
  4830. return R1;
  4831. #else // _XM_VMX128_INTRINSICS_
  4832. #endif // _XM_VMX128_INTRINSICS_
  4833. }
  4834. /****************************************************************************
  4835. *
  4836. * 2D Vector
  4837. *
  4838. ****************************************************************************/
  4839. //------------------------------------------------------------------------------
  4840. // Comparison operations
  4841. //------------------------------------------------------------------------------
  4842. //------------------------------------------------------------------------------
  4843. XMFINLINE BOOL XMVector2Equal
  4844. (
  4845. FXMVECTOR V1,
  4846. FXMVECTOR V2
  4847. )
  4848. {
  4849. #if defined(_XM_NO_INTRINSICS_)
  4850. return (((V1.x == V2.x) && (V1.y == V2.y)) != 0);
  4851. #elif defined(_XM_SSE_INTRINSICS_)
  4852. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  4853. // z and w are don't care
  4854. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  4855. #else // _XM_VMX128_INTRINSICS_
  4856. return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
  4857. #endif
  4858. }
  4859. //------------------------------------------------------------------------------
  4860. XMFINLINE UINT XMVector2EqualR
  4861. (
  4862. FXMVECTOR V1,
  4863. FXMVECTOR V2
  4864. )
  4865. {
  4866. #if defined(_XM_NO_INTRINSICS_)
  4867. UINT CR = 0;
  4868. if ((V1.x == V2.x) &&
  4869. (V1.y == V2.y))
  4870. {
  4871. CR = XM_CRMASK_CR6TRUE;
  4872. }
  4873. else if ((V1.x != V2.x) &&
  4874. (V1.y != V2.y))
  4875. {
  4876. CR = XM_CRMASK_CR6FALSE;
  4877. }
  4878. return CR;
  4879. #elif defined(_XM_SSE_INTRINSICS_)
  4880. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  4881. // z and w are don't care
  4882. int iTest = _mm_movemask_ps(vTemp)&3;
  4883. UINT CR = 0;
  4884. if (iTest==3)
  4885. {
  4886. CR = XM_CRMASK_CR6TRUE;
  4887. }
  4888. else if (!iTest)
  4889. {
  4890. CR = XM_CRMASK_CR6FALSE;
  4891. }
  4892. return CR;
  4893. #else // _XM_VMX128_INTRINSICS_
  4894. #endif // _XM_VMX128_INTRINSICS_
  4895. }
  4896. //------------------------------------------------------------------------------
  4897. XMFINLINE BOOL XMVector2EqualInt
  4898. (
  4899. FXMVECTOR V1,
  4900. FXMVECTOR V2
  4901. )
  4902. {
  4903. #if defined(_XM_NO_INTRINSICS_)
  4904. return (((V1.u[0] == V2.u[0]) && (V1.u[1] == V2.u[1])) != 0);
  4905. #elif defined(_XM_SSE_INTRINSICS_)
  4906. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  4907. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0);
  4908. #else // _XM_VMX128_INTRINSICS_
  4909. return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
  4910. #endif
  4911. }
  4912. //------------------------------------------------------------------------------
  4913. XMFINLINE UINT XMVector2EqualIntR
  4914. (
  4915. FXMVECTOR V1,
  4916. FXMVECTOR V2
  4917. )
  4918. {
  4919. #if defined(_XM_NO_INTRINSICS_)
  4920. UINT CR = 0;
  4921. if ((V1.u[0] == V2.u[0]) &&
  4922. (V1.u[1] == V2.u[1]))
  4923. {
  4924. CR = XM_CRMASK_CR6TRUE;
  4925. }
  4926. else if ((V1.u[0] != V2.u[0]) &&
  4927. (V1.u[1] != V2.u[1]))
  4928. {
  4929. CR = XM_CRMASK_CR6FALSE;
  4930. }
  4931. return CR;
  4932. #elif defined(_XM_SSE_INTRINSICS_)
  4933. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  4934. int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3;
  4935. UINT CR = 0;
  4936. if (iTest==3)
  4937. {
  4938. CR = XM_CRMASK_CR6TRUE;
  4939. }
  4940. else if (!iTest)
  4941. {
  4942. CR = XM_CRMASK_CR6FALSE;
  4943. }
  4944. return CR;
  4945. #else // _XM_VMX128_INTRINSICS_
  4946. #endif // _XM_VMX128_INTRINSICS_
  4947. }
  4948. //------------------------------------------------------------------------------
  4949. XMFINLINE BOOL XMVector2NearEqual
  4950. (
  4951. FXMVECTOR V1,
  4952. FXMVECTOR V2,
  4953. FXMVECTOR Epsilon
  4954. )
  4955. {
  4956. #if defined(_XM_NO_INTRINSICS_)
  4957. FLOAT dx, dy;
  4958. dx = fabsf(V1.x-V2.x);
  4959. dy = fabsf(V1.y-V2.y);
  4960. return ((dx <= Epsilon.x) &&
  4961. (dy <= Epsilon.y));
  4962. #elif defined(_XM_SSE_INTRINSICS_)
  4963. // Get the difference
  4964. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  4965. // Get the absolute value of the difference
  4966. XMVECTOR vTemp = _mm_setzero_ps();
  4967. vTemp = _mm_sub_ps(vTemp,vDelta);
  4968. vTemp = _mm_max_ps(vTemp,vDelta);
  4969. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  4970. // z and w are don't care
  4971. return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
  4972. #else // _XM_VMX128_INTRINSICS_
  4973. #endif // _XM_VMX128_INTRINSICS_
  4974. }
  4975. //------------------------------------------------------------------------------
  4976. XMFINLINE BOOL XMVector2NotEqual
  4977. (
  4978. FXMVECTOR V1,
  4979. FXMVECTOR V2
  4980. )
  4981. {
  4982. #if defined(_XM_NO_INTRINSICS_)
  4983. return (((V1.x != V2.x) || (V1.y != V2.y)) != 0);
  4984. #elif defined(_XM_SSE_INTRINSICS_)
  4985. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  4986. // z and w are don't care
  4987. return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
  4988. #else // _XM_VMX128_INTRINSICS_
  4989. return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
  4990. #endif
  4991. }
  4992. //------------------------------------------------------------------------------
  4993. XMFINLINE BOOL XMVector2NotEqualInt
  4994. (
  4995. FXMVECTOR V1,
  4996. FXMVECTOR V2
  4997. )
  4998. {
  4999. #if defined(_XM_NO_INTRINSICS_)
  5000. return (((V1.u[0] != V2.u[0]) || (V1.u[1] != V2.u[1])) != 0);
  5001. #elif defined(_XM_SSE_INTRINSICS_)
  5002. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  5003. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0);
  5004. #else // _XM_VMX128_INTRINSICS_
  5005. return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
  5006. #endif
  5007. }
  5008. //------------------------------------------------------------------------------
  5009. XMFINLINE BOOL XMVector2Greater
  5010. (
  5011. FXMVECTOR V1,
  5012. FXMVECTOR V2
  5013. )
  5014. {
  5015. #if defined(_XM_NO_INTRINSICS_)
  5016. return (((V1.x > V2.x) && (V1.y > V2.y)) != 0);
  5017. #elif defined(_XM_SSE_INTRINSICS_)
  5018. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  5019. // z and w are don't care
  5020. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5021. #else // _XM_VMX128_INTRINSICS_
  5022. return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
  5023. #endif
  5024. }
  5025. //------------------------------------------------------------------------------
  5026. XMFINLINE UINT XMVector2GreaterR
  5027. (
  5028. FXMVECTOR V1,
  5029. FXMVECTOR V2
  5030. )
  5031. {
  5032. #if defined(_XM_NO_INTRINSICS_)
  5033. UINT CR = 0;
  5034. if ((V1.x > V2.x) &&
  5035. (V1.y > V2.y))
  5036. {
  5037. CR = XM_CRMASK_CR6TRUE;
  5038. }
  5039. else if ((V1.x <= V2.x) &&
  5040. (V1.y <= V2.y))
  5041. {
  5042. CR = XM_CRMASK_CR6FALSE;
  5043. }
  5044. return CR;
  5045. #elif defined(_XM_SSE_INTRINSICS_)
  5046. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  5047. int iTest = _mm_movemask_ps(vTemp)&3;
  5048. UINT CR = 0;
  5049. if (iTest==3)
  5050. {
  5051. CR = XM_CRMASK_CR6TRUE;
  5052. }
  5053. else if (!iTest)
  5054. {
  5055. CR = XM_CRMASK_CR6FALSE;
  5056. }
  5057. return CR;
  5058. #else // _XM_VMX128_INTRINSICS_
  5059. #endif // _XM_VMX128_INTRINSICS_
  5060. }
  5061. //------------------------------------------------------------------------------
  5062. XMFINLINE BOOL XMVector2GreaterOrEqual
  5063. (
  5064. FXMVECTOR V1,
  5065. FXMVECTOR V2
  5066. )
  5067. {
  5068. #if defined(_XM_NO_INTRINSICS_)
  5069. return (((V1.x >= V2.x) && (V1.y >= V2.y)) != 0);
  5070. #elif defined(_XM_SSE_INTRINSICS_)
  5071. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  5072. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5073. #else // _XM_VMX128_INTRINSICS_
  5074. return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
  5075. #endif
  5076. }
  5077. //------------------------------------------------------------------------------
  5078. XMFINLINE UINT XMVector2GreaterOrEqualR
  5079. (
  5080. FXMVECTOR V1,
  5081. FXMVECTOR V2
  5082. )
  5083. {
  5084. #if defined(_XM_NO_INTRINSICS_)
  5085. UINT CR = 0;
  5086. if ((V1.x >= V2.x) &&
  5087. (V1.y >= V2.y))
  5088. {
  5089. CR = XM_CRMASK_CR6TRUE;
  5090. }
  5091. else if ((V1.x < V2.x) &&
  5092. (V1.y < V2.y))
  5093. {
  5094. CR = XM_CRMASK_CR6FALSE;
  5095. }
  5096. return CR;
  5097. #elif defined(_XM_SSE_INTRINSICS_)
  5098. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  5099. int iTest = _mm_movemask_ps(vTemp)&3;
  5100. UINT CR = 0;
  5101. if (iTest == 3)
  5102. {
  5103. CR = XM_CRMASK_CR6TRUE;
  5104. }
  5105. else if (!iTest)
  5106. {
  5107. CR = XM_CRMASK_CR6FALSE;
  5108. }
  5109. return CR;
  5110. #else // _XM_VMX128_INTRINSICS_
  5111. #endif // _XM_VMX128_INTRINSICS_
  5112. }
  5113. //------------------------------------------------------------------------------
  5114. XMFINLINE BOOL XMVector2Less
  5115. (
  5116. FXMVECTOR V1,
  5117. FXMVECTOR V2
  5118. )
  5119. {
  5120. #if defined(_XM_NO_INTRINSICS_)
  5121. return (((V1.x < V2.x) && (V1.y < V2.y)) != 0);
  5122. #elif defined(_XM_SSE_INTRINSICS_)
  5123. XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
  5124. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5125. #else // _XM_VMX128_INTRINSICS_
  5126. return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
  5127. #endif
  5128. }
  5129. //------------------------------------------------------------------------------
  5130. XMFINLINE BOOL XMVector2LessOrEqual
  5131. (
  5132. FXMVECTOR V1,
  5133. FXMVECTOR V2
  5134. )
  5135. {
  5136. #if defined(_XM_NO_INTRINSICS_)
  5137. return (((V1.x <= V2.x) && (V1.y <= V2.y)) != 0);
  5138. #elif defined(_XM_SSE_INTRINSICS_)
  5139. XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
  5140. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5141. #else // _XM_VMX128_INTRINSICS_
  5142. return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
  5143. #endif
  5144. }
  5145. //------------------------------------------------------------------------------
  5146. XMFINLINE BOOL XMVector2InBounds
  5147. (
  5148. FXMVECTOR V,
  5149. FXMVECTOR Bounds
  5150. )
  5151. {
  5152. #if defined(_XM_NO_INTRINSICS_)
  5153. return (((V.x <= Bounds.x && V.x >= -Bounds.x) &&
  5154. (V.y <= Bounds.y && V.y >= -Bounds.y)) != 0);
  5155. #elif defined(_XM_SSE_INTRINSICS_)
  5156. // Test if less than or equal
  5157. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  5158. // Negate the bounds
  5159. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  5160. // Test if greater or equal (Reversed)
  5161. vTemp2 = _mm_cmple_ps(vTemp2,V);
  5162. // Blend answers
  5163. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  5164. // x and y in bounds? (z and w are don't care)
  5165. return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
  5166. #else // _XM_VMX128_INTRINSICS_
  5167. return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
  5168. #endif
  5169. }
  5170. //------------------------------------------------------------------------------
  5171. XMFINLINE UINT XMVector2InBoundsR
  5172. (
  5173. FXMVECTOR V,
  5174. FXMVECTOR Bounds
  5175. )
  5176. {
  5177. #if defined(_XM_NO_INTRINSICS_)
  5178. UINT CR = 0;
  5179. if ((V.x <= Bounds.x && V.x >= -Bounds.x) &&
  5180. (V.y <= Bounds.y && V.y >= -Bounds.y))
  5181. {
  5182. CR = XM_CRMASK_CR6BOUNDS;
  5183. }
  5184. return CR;
  5185. #elif defined(_XM_SSE_INTRINSICS_)
  5186. // Test if less than or equal
  5187. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  5188. // Negate the bounds
  5189. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  5190. // Test if greater or equal (Reversed)
  5191. vTemp2 = _mm_cmple_ps(vTemp2,V);
  5192. // Blend answers
  5193. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  5194. // x and y in bounds? (z and w are don't care)
  5195. return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0;
  5196. #else // _XM_VMX128_INTRINSICS_
  5197. #endif // _XM_VMX128_INTRINSICS_
  5198. }
  5199. //------------------------------------------------------------------------------
  5200. XMFINLINE BOOL XMVector2IsNaN
  5201. (
  5202. FXMVECTOR V
  5203. )
  5204. {
  5205. #if defined(_XM_NO_INTRINSICS_)
  5206. return (XMISNAN(V.x) ||
  5207. XMISNAN(V.y));
  5208. #elif defined(_XM_SSE_INTRINSICS_)
  5209. // Mask off the exponent
  5210. __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
  5211. // Mask off the mantissa
  5212. __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
  5213. // Are any of the exponents == 0x7F800000?
  5214. vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
  5215. // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
  5216. vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
  5217. // Perform a not on the NaN test to be true on NON-zero mantissas
  5218. vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
  5219. // If x or y are NaN, the signs are true after the merge above
  5220. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0);
  5221. #else // _XM_VMX128_INTRINSICS_
  5222. #endif // _XM_VMX128_INTRINSICS_
  5223. }
  5224. //------------------------------------------------------------------------------
  5225. XMFINLINE BOOL XMVector2IsInfinite
  5226. (
  5227. FXMVECTOR V
  5228. )
  5229. {
  5230. #if defined(_XM_NO_INTRINSICS_)
  5231. return (XMISINF(V.x) ||
  5232. XMISINF(V.y));
  5233. #elif defined(_XM_SSE_INTRINSICS_)
  5234. // Mask off the sign bit
  5235. __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
  5236. // Compare to infinity
  5237. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  5238. // If x or z are infinity, the signs are true.
  5239. return ((_mm_movemask_ps(vTemp)&3) != 0);
  5240. #else // _XM_VMX128_INTRINSICS_
  5241. #endif // _XM_VMX128_INTRINSICS_
  5242. }
  5243. //------------------------------------------------------------------------------
  5244. // Computation operations
  5245. //------------------------------------------------------------------------------
  5246. //------------------------------------------------------------------------------
  5247. XMFINLINE XMVECTOR XMVector2Dot
  5248. (
  5249. FXMVECTOR V1,
  5250. FXMVECTOR V2
  5251. )
  5252. {
  5253. #if defined(_XM_NO_INTRINSICS_)
  5254. XMVECTOR Result;
  5255. Result.v[0] =
  5256. Result.v[1] =
  5257. Result.v[2] =
  5258. Result.v[3] = V1.v[0] * V2.v[0] + V1.v[1] * V2.v[1];
  5259. return Result;
  5260. #elif defined(_XM_SSE_INTRINSICS_)
  5261. // Perform the dot product on x and y
  5262. XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
  5263. // vTemp has y splatted
  5264. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5265. // x+y
  5266. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5267. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5268. return vLengthSq;
  5269. #else // _XM_VMX128_INTRINSICS_
  5270. #endif // _XM_VMX128_INTRINSICS_
  5271. }
  5272. //------------------------------------------------------------------------------
  5273. XMFINLINE XMVECTOR XMVector2Cross
  5274. (
  5275. FXMVECTOR V1,
  5276. FXMVECTOR V2
  5277. )
  5278. {
  5279. #if defined(_XM_NO_INTRINSICS_)
  5280. FLOAT fCross = (V1.x * V2.y) - (V1.y * V2.x);
  5281. XMVECTOR vResult = {
  5282. fCross,
  5283. fCross,
  5284. fCross,
  5285. fCross
  5286. };
  5287. return vResult;
  5288. #elif defined(_XM_SSE_INTRINSICS_)
  5289. // Swap x and y
  5290. XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1));
  5291. // Perform the muls
  5292. vResult = _mm_mul_ps(vResult,V1);
  5293. // Splat y
  5294. XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
  5295. // Sub the values
  5296. vResult = _mm_sub_ss(vResult,vTemp);
  5297. // Splat the cross product
  5298. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0));
  5299. return vResult;
  5300. #else // _XM_VMX128_INTRINSICS_
  5301. #endif // _XM_VMX128_INTRINSICS_
  5302. }
  5303. //------------------------------------------------------------------------------
  5304. XMFINLINE XMVECTOR XMVector2LengthSq
  5305. (
  5306. FXMVECTOR V
  5307. )
  5308. {
  5309. #if defined(_XM_NO_INTRINSICS_)
  5310. return XMVector2Dot(V, V);
  5311. #elif defined(_XM_SSE_INTRINSICS_)
  5312. // Perform the dot product on x and y
  5313. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5314. // vTemp has y splatted
  5315. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5316. // x+y
  5317. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5318. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5319. return vLengthSq;
  5320. #else
  5321. return XMVector2Dot(V, V);
  5322. #endif
  5323. }
  5324. //------------------------------------------------------------------------------
  5325. XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst
  5326. (
  5327. FXMVECTOR V
  5328. )
  5329. {
  5330. #if defined(_XM_NO_INTRINSICS_)
  5331. XMVECTOR Result;
  5332. Result = XMVector2LengthSq(V);
  5333. Result = XMVectorReciprocalSqrtEst(Result);
  5334. return Result;
  5335. #elif defined(_XM_SSE_INTRINSICS_)
  5336. // Perform the dot product on x and y
  5337. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5338. // vTemp has y splatted
  5339. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5340. // x+y
  5341. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5342. vLengthSq = _mm_rsqrt_ss(vLengthSq);
  5343. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5344. return vLengthSq;
  5345. #else // _XM_VMX128_INTRINSICS_
  5346. #endif // _XM_VMX128_INTRINSICS_
  5347. }
  5348. //------------------------------------------------------------------------------
  5349. XMFINLINE XMVECTOR XMVector2ReciprocalLength
  5350. (
  5351. FXMVECTOR V
  5352. )
  5353. {
  5354. #if defined(_XM_NO_INTRINSICS_)
  5355. XMVECTOR Result;
  5356. Result = XMVector2LengthSq(V);
  5357. Result = XMVectorReciprocalSqrt(Result);
  5358. return Result;
  5359. #elif defined(_XM_SSE_INTRINSICS_)
  5360. // Perform the dot product on x and y
  5361. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5362. // vTemp has y splatted
  5363. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5364. // x+y
  5365. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5366. vLengthSq = _mm_sqrt_ss(vLengthSq);
  5367. vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
  5368. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5369. return vLengthSq;
  5370. #else // _XM_VMX128_INTRINSICS_
  5371. #endif // _XM_VMX128_INTRINSICS_
  5372. }
  5373. //------------------------------------------------------------------------------
  5374. XMFINLINE XMVECTOR XMVector2LengthEst
  5375. (
  5376. FXMVECTOR V
  5377. )
  5378. {
  5379. #if defined(_XM_NO_INTRINSICS_)
  5380. XMVECTOR Result;
  5381. Result = XMVector2LengthSq(V);
  5382. Result = XMVectorSqrtEst(Result);
  5383. return Result;
  5384. #elif defined(_XM_SSE_INTRINSICS_)
  5385. // Perform the dot product on x and y
  5386. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5387. // vTemp has y splatted
  5388. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5389. // x+y
  5390. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5391. vLengthSq = _mm_sqrt_ss(vLengthSq);
  5392. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5393. return vLengthSq;
  5394. #else // _XM_VMX128_INTRINSICS_
  5395. #endif // _XM_VMX128_INTRINSICS_
  5396. }
  5397. //------------------------------------------------------------------------------
  5398. XMFINLINE XMVECTOR XMVector2Length
  5399. (
  5400. FXMVECTOR V
  5401. )
  5402. {
  5403. #if defined(_XM_NO_INTRINSICS_)
  5404. XMVECTOR Result;
  5405. Result = XMVector2LengthSq(V);
  5406. Result = XMVectorSqrt(Result);
  5407. return Result;
  5408. #elif defined(_XM_SSE_INTRINSICS_)
  5409. // Perform the dot product on x and y
  5410. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5411. // vTemp has y splatted
  5412. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5413. // x+y
  5414. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5415. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5416. vLengthSq = _mm_sqrt_ps(vLengthSq);
  5417. return vLengthSq;
  5418. #else // _XM_VMX128_INTRINSICS_
  5419. #endif // _XM_VMX128_INTRINSICS_
  5420. }
  5421. //------------------------------------------------------------------------------
  5422. // XMVector2NormalizeEst uses a reciprocal estimate and
  5423. // returns QNaN on zero and infinite vectors.
  5424. XMFINLINE XMVECTOR XMVector2NormalizeEst
  5425. (
  5426. FXMVECTOR V
  5427. )
  5428. {
  5429. #if defined(_XM_NO_INTRINSICS_)
  5430. XMVECTOR Result;
  5431. Result = XMVector2ReciprocalLength(V);
  5432. Result = XMVectorMultiply(V, Result);
  5433. return Result;
  5434. #elif defined(_XM_SSE_INTRINSICS_)
  5435. // Perform the dot product on x and y
  5436. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5437. // vTemp has y splatted
  5438. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5439. // x+y
  5440. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5441. vLengthSq = _mm_rsqrt_ss(vLengthSq);
  5442. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5443. vLengthSq = _mm_mul_ps(vLengthSq,V);
  5444. return vLengthSq;
  5445. #else // _XM_VMX128_INTRINSICS_
  5446. #endif // _XM_VMX128_INTRINSICS_
  5447. }
  5448. //------------------------------------------------------------------------------
  5449. XMFINLINE XMVECTOR XMVector2Normalize
  5450. (
  5451. FXMVECTOR V
  5452. )
  5453. {
  5454. #if defined(_XM_NO_INTRINSICS_)
  5455. XMVECTOR LengthSq;
  5456. XMVECTOR Zero;
  5457. XMVECTOR InfiniteLength;
  5458. XMVECTOR ZeroLength;
  5459. XMVECTOR Select;
  5460. XMVECTOR Result;
  5461. LengthSq = XMVector2LengthSq(V);
  5462. Zero = XMVectorZero();
  5463. Result = XMVectorReciprocalSqrt(LengthSq);
  5464. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  5465. ZeroLength = XMVectorEqual(LengthSq, Zero);
  5466. Result = XMVectorMultiply(V, Result);
  5467. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  5468. Result = XMVectorSelect(LengthSq, Result, Select);
  5469. return Result;
  5470. #elif defined(_XM_SSE_INTRINSICS_)
  5471. // Perform the dot product on x and y only
  5472. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5473. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5474. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5475. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5476. // Prepare for the division
  5477. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  5478. // Failsafe on zero (Or epsilon) length planes
  5479. // If the length is infinity, set the elements to zero
  5480. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  5481. // Reciprocal mul to perform the normalization
  5482. vResult = _mm_div_ps(V,vResult);
  5483. // Any that are infinity, set to zero
  5484. vResult = _mm_and_ps(vResult,vLengthSq);
  5485. return vResult;
  5486. #else // _XM_VMX128_INTRINSICS_
  5487. #endif // _XM_VMX128_INTRINSICS_
  5488. }
  5489. //------------------------------------------------------------------------------
  5490. XMFINLINE XMVECTOR XMVector2ClampLength
  5491. (
  5492. FXMVECTOR V,
  5493. FLOAT LengthMin,
  5494. FLOAT LengthMax
  5495. )
  5496. {
  5497. #if defined(_XM_NO_INTRINSICS_)
  5498. XMVECTOR ClampMax;
  5499. XMVECTOR ClampMin;
  5500. ClampMax = XMVectorReplicate(LengthMax);
  5501. ClampMin = XMVectorReplicate(LengthMin);
  5502. return XMVector2ClampLengthV(V, ClampMin, ClampMax);
  5503. #elif defined(_XM_SSE_INTRINSICS_)
  5504. XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
  5505. XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
  5506. return XMVector2ClampLengthV(V, ClampMin, ClampMax);
  5507. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  5508. #endif // _XM_VMX128_INTRINSICS_
  5509. }
  5510. //------------------------------------------------------------------------------
  5511. XMFINLINE XMVECTOR XMVector2ClampLengthV
  5512. (
  5513. FXMVECTOR V,
  5514. FXMVECTOR LengthMin,
  5515. FXMVECTOR LengthMax
  5516. )
  5517. {
  5518. #if defined(_XM_NO_INTRINSICS_)
  5519. XMVECTOR ClampLength;
  5520. XMVECTOR LengthSq;
  5521. XMVECTOR RcpLength;
  5522. XMVECTOR Length;
  5523. XMVECTOR Normal;
  5524. XMVECTOR Zero;
  5525. XMVECTOR InfiniteLength;
  5526. XMVECTOR ZeroLength;
  5527. XMVECTOR Select;
  5528. XMVECTOR ControlMax;
  5529. XMVECTOR ControlMin;
  5530. XMVECTOR Control;
  5531. XMVECTOR Result;
  5532. XMASSERT((LengthMin.y == LengthMin.x));
  5533. XMASSERT((LengthMax.y == LengthMax.x));
  5534. XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero()));
  5535. XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero()));
  5536. XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
  5537. LengthSq = XMVector2LengthSq(V);
  5538. Zero = XMVectorZero();
  5539. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  5540. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  5541. ZeroLength = XMVectorEqual(LengthSq, Zero);
  5542. Length = XMVectorMultiply(LengthSq, RcpLength);
  5543. Normal = XMVectorMultiply(V, RcpLength);
  5544. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  5545. Length = XMVectorSelect(LengthSq, Length, Select);
  5546. Normal = XMVectorSelect(LengthSq, Normal, Select);
  5547. ControlMax = XMVectorGreater(Length, LengthMax);
  5548. ControlMin = XMVectorLess(Length, LengthMin);
  5549. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  5550. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  5551. Result = XMVectorMultiply(Normal, ClampLength);
  5552. // Preserve the original vector (with no precision loss) if the length falls within the given range
  5553. Control = XMVectorEqualInt(ControlMax, ControlMin);
  5554. Result = XMVectorSelect(Result, V, Control);
  5555. return Result;
  5556. #elif defined(_XM_SSE_INTRINSICS_)
  5557. XMVECTOR ClampLength;
  5558. XMVECTOR LengthSq;
  5559. XMVECTOR RcpLength;
  5560. XMVECTOR Length;
  5561. XMVECTOR Normal;
  5562. XMVECTOR InfiniteLength;
  5563. XMVECTOR ZeroLength;
  5564. XMVECTOR Select;
  5565. XMVECTOR ControlMax;
  5566. XMVECTOR ControlMin;
  5567. XMVECTOR Control;
  5568. XMVECTOR Result;
  5569. XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
  5570. XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
  5571. XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
  5572. XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
  5573. XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
  5574. LengthSq = XMVector2LengthSq(V);
  5575. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  5576. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
  5577. ZeroLength = XMVectorEqual(LengthSq, g_XMZero);
  5578. Length = _mm_mul_ps(LengthSq, RcpLength);
  5579. Normal = _mm_mul_ps(V, RcpLength);
  5580. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  5581. Length = XMVectorSelect(LengthSq, Length, Select);
  5582. Normal = XMVectorSelect(LengthSq, Normal, Select);
  5583. ControlMax = XMVectorGreater(Length, LengthMax);
  5584. ControlMin = XMVectorLess(Length, LengthMin);
  5585. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  5586. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  5587. Result = _mm_mul_ps(Normal, ClampLength);
  5588. // Preserve the original vector (with no precision loss) if the length falls within the given range
  5589. Control = XMVectorEqualInt(ControlMax, ControlMin);
  5590. Result = XMVectorSelect(Result, V, Control);
  5591. return Result;
  5592. #else // _XM_VMX128_INTRINSICS_
  5593. #endif // _XM_VMX128_INTRINSICS_
  5594. }
  5595. //------------------------------------------------------------------------------
  5596. XMFINLINE XMVECTOR XMVector2Reflect
  5597. (
  5598. FXMVECTOR Incident,
  5599. FXMVECTOR Normal
  5600. )
  5601. {
  5602. #if defined(_XM_NO_INTRINSICS_)
  5603. XMVECTOR Result;
  5604. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  5605. Result = XMVector2Dot(Incident, Normal);
  5606. Result = XMVectorAdd(Result, Result);
  5607. Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
  5608. return Result;
  5609. #elif defined(_XM_SSE_INTRINSICS_)
  5610. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  5611. XMVECTOR Result = XMVector2Dot(Incident,Normal);
  5612. Result = _mm_add_ps(Result, Result);
  5613. Result = _mm_mul_ps(Result, Normal);
  5614. Result = _mm_sub_ps(Incident,Result);
  5615. return Result;
  5616. #else // _XM_VMX128_INTRINSICS_
  5617. #endif // _XM_VMX128_INTRINSICS_
  5618. }
  5619. //------------------------------------------------------------------------------
  5620. XMFINLINE XMVECTOR XMVector2Refract
  5621. (
  5622. FXMVECTOR Incident,
  5623. FXMVECTOR Normal,
  5624. FLOAT RefractionIndex
  5625. )
  5626. {
  5627. #if defined(_XM_NO_INTRINSICS_)
  5628. XMVECTOR Index;
  5629. Index = XMVectorReplicate(RefractionIndex);
  5630. return XMVector2RefractV(Incident, Normal, Index);
  5631. #elif defined(_XM_SSE_INTRINSICS_)
  5632. XMVECTOR Index = _mm_set_ps1(RefractionIndex);
  5633. return XMVector2RefractV(Incident,Normal,Index);
  5634. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  5635. #endif // _XM_VMX128_INTRINSICS_
  5636. }
  5637. //------------------------------------------------------------------------------
  5638. // Return the refraction of a 2D vector
  5639. XMFINLINE XMVECTOR XMVector2RefractV
  5640. (
  5641. FXMVECTOR Incident,
  5642. FXMVECTOR Normal,
  5643. FXMVECTOR RefractionIndex
  5644. )
  5645. {
  5646. #if defined(_XM_NO_INTRINSICS_)
  5647. float IDotN;
  5648. float RX,RY;
  5649. XMVECTOR vResult;
  5650. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  5651. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  5652. IDotN = (Incident.x*Normal.x)+(Incident.y*Normal.y);
  5653. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  5654. RY = 1.0f-(IDotN*IDotN);
  5655. RX = 1.0f-(RY*RefractionIndex.x*RefractionIndex.x);
  5656. RY = 1.0f-(RY*RefractionIndex.y*RefractionIndex.y);
  5657. if (RX>=0.0f) {
  5658. RX = (RefractionIndex.x*Incident.x)-(Normal.x*((RefractionIndex.x*IDotN)+sqrtf(RX)));
  5659. } else {
  5660. RX = 0.0f;
  5661. }
  5662. if (RY>=0.0f) {
  5663. RY = (RefractionIndex.y*Incident.y)-(Normal.y*((RefractionIndex.y*IDotN)+sqrtf(RY)));
  5664. } else {
  5665. RY = 0.0f;
  5666. }
  5667. vResult.x = RX;
  5668. vResult.y = RY;
  5669. vResult.z = 0.0f;
  5670. vResult.w = 0.0f;
  5671. return vResult;
  5672. #elif defined(_XM_SSE_INTRINSICS_)
  5673. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  5674. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  5675. // Get the 2D Dot product of Incident-Normal
  5676. XMVECTOR IDotN = _mm_mul_ps(Incident,Normal);
  5677. XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1));
  5678. IDotN = _mm_add_ss(IDotN,vTemp);
  5679. IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0));
  5680. // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  5681. vTemp = _mm_mul_ps(IDotN,IDotN);
  5682. vTemp = _mm_sub_ps(g_XMOne,vTemp);
  5683. vTemp = _mm_mul_ps(vTemp,RefractionIndex);
  5684. vTemp = _mm_mul_ps(vTemp,RefractionIndex);
  5685. vTemp = _mm_sub_ps(g_XMOne,vTemp);
  5686. // If any terms are <=0, sqrt() will fail, punt to zero
  5687. XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
  5688. // R = RefractionIndex * IDotN + sqrt(R)
  5689. vTemp = _mm_sqrt_ps(vTemp);
  5690. XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
  5691. vTemp = _mm_add_ps(vTemp,vResult);
  5692. // Result = RefractionIndex * Incident - Normal * R
  5693. vResult = _mm_mul_ps(RefractionIndex,Incident);
  5694. vTemp = _mm_mul_ps(vTemp,Normal);
  5695. vResult = _mm_sub_ps(vResult,vTemp);
  5696. vResult = _mm_and_ps(vResult,vMask);
  5697. return vResult;
  5698. #else // _XM_VMX128_INTRINSICS_
  5699. #endif // _XM_VMX128_INTRINSICS_
  5700. }
  5701. //------------------------------------------------------------------------------
  5702. XMFINLINE XMVECTOR XMVector2Orthogonal
  5703. (
  5704. FXMVECTOR V
  5705. )
  5706. {
  5707. #if defined(_XM_NO_INTRINSICS_)
  5708. XMVECTOR Result;
  5709. Result.v[0] = -V.v[1];
  5710. Result.v[1] = V.v[0];
  5711. return Result;
  5712. #elif defined(_XM_SSE_INTRINSICS_)
  5713. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  5714. vResult = _mm_mul_ps(vResult,g_XMNegateX);
  5715. return vResult;
  5716. #else // _XM_VMX128_INTRINSICS_
  5717. #endif // _XM_VMX128_INTRINSICS_
  5718. }
  5719. //------------------------------------------------------------------------------
  5720. XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst
  5721. (
  5722. FXMVECTOR N1,
  5723. FXMVECTOR N2
  5724. )
  5725. {
  5726. #if defined(_XM_NO_INTRINSICS_)
  5727. XMVECTOR NegativeOne;
  5728. XMVECTOR One;
  5729. XMVECTOR Result;
  5730. Result = XMVector2Dot(N1, N2);
  5731. NegativeOne = XMVectorSplatConstant(-1, 0);
  5732. One = XMVectorSplatOne();
  5733. Result = XMVectorClamp(Result, NegativeOne, One);
  5734. Result = XMVectorACosEst(Result);
  5735. return Result;
  5736. #elif defined(_XM_SSE_INTRINSICS_)
  5737. XMVECTOR vResult = XMVector2Dot(N1,N2);
  5738. // Clamp to -1.0f to 1.0f
  5739. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  5740. vResult = _mm_min_ps(vResult,g_XMOne);;
  5741. vResult = XMVectorACosEst(vResult);
  5742. return vResult;
  5743. #else // _XM_VMX128_INTRINSICS_
  5744. #endif // _XM_VMX128_INTRINSICS_
  5745. }
  5746. //------------------------------------------------------------------------------
  5747. XMFINLINE XMVECTOR XMVector2AngleBetweenNormals
  5748. (
  5749. FXMVECTOR N1,
  5750. FXMVECTOR N2
  5751. )
  5752. {
  5753. #if defined(_XM_NO_INTRINSICS_)
  5754. XMVECTOR NegativeOne;
  5755. XMVECTOR One;
  5756. XMVECTOR Result;
  5757. Result = XMVector2Dot(N1, N2);
  5758. NegativeOne = XMVectorSplatConstant(-1, 0);
  5759. One = XMVectorSplatOne();
  5760. Result = XMVectorClamp(Result, NegativeOne, One);
  5761. Result = XMVectorACos(Result);
  5762. return Result;
  5763. #elif defined(_XM_SSE_INTRINSICS_)
  5764. XMVECTOR vResult = XMVector2Dot(N1,N2);
  5765. // Clamp to -1.0f to 1.0f
  5766. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  5767. vResult = _mm_min_ps(vResult,g_XMOne);;
  5768. vResult = XMVectorACos(vResult);
  5769. return vResult;
  5770. #else // _XM_VMX128_INTRINSICS_
  5771. #endif // _XM_VMX128_INTRINSICS_
  5772. }
  5773. //------------------------------------------------------------------------------
  5774. XMFINLINE XMVECTOR XMVector2AngleBetweenVectors
  5775. (
  5776. FXMVECTOR V1,
  5777. FXMVECTOR V2
  5778. )
  5779. {
  5780. #if defined(_XM_NO_INTRINSICS_)
  5781. XMVECTOR L1;
  5782. XMVECTOR L2;
  5783. XMVECTOR Dot;
  5784. XMVECTOR CosAngle;
  5785. XMVECTOR NegativeOne;
  5786. XMVECTOR One;
  5787. XMVECTOR Result;
  5788. L1 = XMVector2ReciprocalLength(V1);
  5789. L2 = XMVector2ReciprocalLength(V2);
  5790. Dot = XMVector2Dot(V1, V2);
  5791. L1 = XMVectorMultiply(L1, L2);
  5792. CosAngle = XMVectorMultiply(Dot, L1);
  5793. NegativeOne = XMVectorSplatConstant(-1, 0);
  5794. One = XMVectorSplatOne();
  5795. CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
  5796. Result = XMVectorACos(CosAngle);
  5797. return Result;
  5798. #elif defined(_XM_SSE_INTRINSICS_)
  5799. XMVECTOR L1;
  5800. XMVECTOR L2;
  5801. XMVECTOR Dot;
  5802. XMVECTOR CosAngle;
  5803. XMVECTOR Result;
  5804. L1 = XMVector2ReciprocalLength(V1);
  5805. L2 = XMVector2ReciprocalLength(V2);
  5806. Dot = XMVector2Dot(V1, V2);
  5807. L1 = _mm_mul_ps(L1, L2);
  5808. CosAngle = _mm_mul_ps(Dot, L1);
  5809. CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne);
  5810. Result = XMVectorACos(CosAngle);
  5811. return Result;
  5812. #else // _XM_VMX128_INTRINSICS_
  5813. #endif // _XM_VMX128_INTRINSICS_
  5814. }
  5815. //------------------------------------------------------------------------------
  5816. XMFINLINE XMVECTOR XMVector2LinePointDistance
  5817. (
  5818. FXMVECTOR LinePoint1,
  5819. FXMVECTOR LinePoint2,
  5820. FXMVECTOR Point
  5821. )
  5822. {
  5823. #if defined(_XM_NO_INTRINSICS_)
  5824. XMVECTOR PointVector;
  5825. XMVECTOR LineVector;
  5826. XMVECTOR ReciprocalLengthSq;
  5827. XMVECTOR PointProjectionScale;
  5828. XMVECTOR DistanceVector;
  5829. XMVECTOR Result;
  5830. // Given a vector PointVector from LinePoint1 to Point and a vector
  5831. // LineVector from LinePoint1 to LinePoint2, the scaled distance
  5832. // PointProjectionScale from LinePoint1 to the perpendicular projection
  5833. // of PointVector onto the line is defined as:
  5834. //
  5835. // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
  5836. PointVector = XMVectorSubtract(Point, LinePoint1);
  5837. LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
  5838. ReciprocalLengthSq = XMVector2LengthSq(LineVector);
  5839. ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
  5840. PointProjectionScale = XMVector2Dot(PointVector, LineVector);
  5841. PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
  5842. DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
  5843. DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
  5844. Result = XMVector2Length(DistanceVector);
  5845. return Result;
  5846. #elif defined(_XM_SSE_INTRINSICS_)
  5847. XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
  5848. XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
  5849. XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector);
  5850. XMVECTOR vResult = XMVector2Dot(PointVector,LineVector);
  5851. vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
  5852. vResult = _mm_mul_ps(vResult,LineVector);
  5853. vResult = _mm_sub_ps(PointVector,vResult);
  5854. vResult = XMVector2Length(vResult);
  5855. return vResult;
  5856. #else // _XM_VMX128_INTRINSICS_
  5857. #endif // _XM_VMX128_INTRINSICS_
  5858. }
  5859. //------------------------------------------------------------------------------
  5860. XMFINLINE XMVECTOR XMVector2IntersectLine
  5861. (
  5862. FXMVECTOR Line1Point1,
  5863. FXMVECTOR Line1Point2,
  5864. FXMVECTOR Line2Point1,
  5865. CXMVECTOR Line2Point2
  5866. )
  5867. {
  5868. #if defined(_XM_NO_INTRINSICS_)
  5869. XMVECTOR V1;
  5870. XMVECTOR V2;
  5871. XMVECTOR V3;
  5872. XMVECTOR C1;
  5873. XMVECTOR C2;
  5874. XMVECTOR Result;
  5875. CONST XMVECTOR Zero = XMVectorZero();
  5876. V1 = XMVectorSubtract(Line1Point2, Line1Point1);
  5877. V2 = XMVectorSubtract(Line2Point2, Line2Point1);
  5878. V3 = XMVectorSubtract(Line1Point1, Line2Point1);
  5879. C1 = XMVector2Cross(V1, V2);
  5880. C2 = XMVector2Cross(V2, V3);
  5881. if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
  5882. {
  5883. if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
  5884. {
  5885. // Coincident
  5886. Result = g_XMInfinity.v;
  5887. }
  5888. else
  5889. {
  5890. // Parallel
  5891. Result = g_XMQNaN.v;
  5892. }
  5893. }
  5894. else
  5895. {
  5896. // Intersection point = Line1Point1 + V1 * (C2 / C1)
  5897. XMVECTOR Scale;
  5898. Scale = XMVectorReciprocal(C1);
  5899. Scale = XMVectorMultiply(C2, Scale);
  5900. Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
  5901. }
  5902. return Result;
  5903. #elif defined(_XM_SSE_INTRINSICS_)
  5904. XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
  5905. XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
  5906. XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
  5907. // Generate the cross products
  5908. XMVECTOR C1 = XMVector2Cross(V1, V2);
  5909. XMVECTOR C2 = XMVector2Cross(V2, V3);
  5910. // If C1 is not close to epsilon, use the calculated value
  5911. XMVECTOR vResultMask = _mm_setzero_ps();
  5912. vResultMask = _mm_sub_ps(vResultMask,C1);
  5913. vResultMask = _mm_max_ps(vResultMask,C1);
  5914. // 0xFFFFFFFF if the calculated value is to be used
  5915. vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
  5916. // If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
  5917. XMVECTOR vFailMask = _mm_setzero_ps();
  5918. vFailMask = _mm_sub_ps(vFailMask,C2);
  5919. vFailMask = _mm_max_ps(vFailMask,C2);
  5920. vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
  5921. XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
  5922. vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
  5923. // vFail is NAN or INF
  5924. vFail = _mm_or_ps(vFail,vFailMask);
  5925. // Intersection point = Line1Point1 + V1 * (C2 / C1)
  5926. XMVECTOR vResult = _mm_div_ps(C2,C1);
  5927. vResult = _mm_mul_ps(vResult,V1);
  5928. vResult = _mm_add_ps(vResult,Line1Point1);
  5929. // Use result, or failure value
  5930. vResult = _mm_and_ps(vResult,vResultMask);
  5931. vResultMask = _mm_andnot_ps(vResultMask,vFail);
  5932. vResult = _mm_or_ps(vResult,vResultMask);
  5933. return vResult;
  5934. #else // _XM_VMX128_INTRINSICS_
  5935. #endif // _XM_VMX128_INTRINSICS_
  5936. }
  5937. //------------------------------------------------------------------------------
  5938. XMFINLINE XMVECTOR XMVector2Transform
  5939. (
  5940. FXMVECTOR V,
  5941. CXMMATRIX M
  5942. )
  5943. {
  5944. #if defined(_XM_NO_INTRINSICS_)
  5945. XMVECTOR X;
  5946. XMVECTOR Y;
  5947. XMVECTOR Result;
  5948. Y = XMVectorSplatY(V);
  5949. X = XMVectorSplatX(V);
  5950. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  5951. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  5952. return Result;
  5953. #elif defined(_XM_SSE_INTRINSICS_)
  5954. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  5955. vResult = _mm_mul_ps(vResult,M.r[0]);
  5956. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  5957. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  5958. vResult = _mm_add_ps(vResult,vTemp);
  5959. vResult = _mm_add_ps(vResult,M.r[3]);
  5960. return vResult;
  5961. #else // _XM_VMX128_INTRINSICS_
  5962. #endif // _XM_VMX128_INTRINSICS_
  5963. }
  5964. //------------------------------------------------------------------------------
  5965. XMINLINE XMFLOAT4* XMVector2TransformStream
  5966. (
  5967. XMFLOAT4* pOutputStream,
  5968. UINT OutputStride,
  5969. CONST XMFLOAT2* pInputStream,
  5970. UINT InputStride,
  5971. UINT VectorCount,
  5972. CXMMATRIX M
  5973. )
  5974. {
  5975. #if defined(_XM_NO_INTRINSICS_)
  5976. XMVECTOR V;
  5977. XMVECTOR X;
  5978. XMVECTOR Y;
  5979. XMVECTOR Result;
  5980. UINT i;
  5981. BYTE* pInputVector = (BYTE*)pInputStream;
  5982. BYTE* pOutputVector = (BYTE*)pOutputStream;
  5983. XMASSERT(pOutputStream);
  5984. XMASSERT(pInputStream);
  5985. for (i = 0; i < VectorCount; i++)
  5986. {
  5987. V = XMLoadFloat2((XMFLOAT2*)pInputVector);
  5988. Y = XMVectorSplatY(V);
  5989. X = XMVectorSplatX(V);
  5990. // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
  5991. // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
  5992. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  5993. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  5994. XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
  5995. pInputVector += InputStride;
  5996. pOutputVector += OutputStride;
  5997. }
  5998. return pOutputStream;
  5999. #elif defined(_XM_SSE_INTRINSICS_)
  6000. XMASSERT(pOutputStream);
  6001. XMASSERT(pInputStream);
  6002. UINT i;
  6003. const BYTE* pInputVector = (const BYTE*)pInputStream;
  6004. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6005. for (i = 0; i < VectorCount; i++)
  6006. {
  6007. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
  6008. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
  6009. vResult = _mm_mul_ps(vResult,M.r[1]);
  6010. vResult = _mm_add_ps(vResult,M.r[3]);
  6011. X = _mm_mul_ps(X,M.r[0]);
  6012. vResult = _mm_add_ps(vResult,X);
  6013. _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult);
  6014. pInputVector += InputStride;
  6015. pOutputVector += OutputStride;
  6016. }
  6017. return pOutputStream;
  6018. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6019. #endif // _XM_VMX128_INTRINSICS_
  6020. }
  6021. //------------------------------------------------------------------------------
  6022. XMINLINE XMFLOAT4* XMVector2TransformStreamNC
  6023. (
  6024. XMFLOAT4* pOutputStream,
  6025. UINT OutputStride,
  6026. CONST XMFLOAT2* pInputStream,
  6027. UINT InputStride,
  6028. UINT VectorCount,
  6029. CXMMATRIX M
  6030. )
  6031. {
  6032. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
  6033. return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
  6034. #else // _XM_VMX128_INTRINSICS_
  6035. #endif // _XM_VMX128_INTRINSICS_
  6036. }
  6037. //------------------------------------------------------------------------------
  6038. XMFINLINE XMVECTOR XMVector2TransformCoord
  6039. (
  6040. FXMVECTOR V,
  6041. CXMMATRIX M
  6042. )
  6043. {
  6044. #if defined(_XM_NO_INTRINSICS_)
  6045. XMVECTOR X;
  6046. XMVECTOR Y;
  6047. XMVECTOR InverseW;
  6048. XMVECTOR Result;
  6049. Y = XMVectorSplatY(V);
  6050. X = XMVectorSplatX(V);
  6051. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  6052. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6053. InverseW = XMVectorSplatW(Result);
  6054. InverseW = XMVectorReciprocal(InverseW);
  6055. Result = XMVectorMultiply(Result, InverseW);
  6056. return Result;
  6057. #elif defined(_XM_SSE_INTRINSICS_)
  6058. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  6059. vResult = _mm_mul_ps(vResult,M.r[0]);
  6060. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  6061. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  6062. vResult = _mm_add_ps(vResult,vTemp);
  6063. vResult = _mm_add_ps(vResult,M.r[3]);
  6064. vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  6065. vResult = _mm_div_ps(vResult,vTemp);
  6066. return vResult;
  6067. #else // _XM_VMX128_INTRINSICS_
  6068. #endif // _XM_VMX128_INTRINSICS_
  6069. }
  6070. //------------------------------------------------------------------------------
  6071. XMINLINE XMFLOAT2* XMVector2TransformCoordStream
  6072. (
  6073. XMFLOAT2* pOutputStream,
  6074. UINT OutputStride,
  6075. CONST XMFLOAT2* pInputStream,
  6076. UINT InputStride,
  6077. UINT VectorCount,
  6078. CXMMATRIX M
  6079. )
  6080. {
  6081. #if defined(_XM_NO_INTRINSICS_)
  6082. XMVECTOR V;
  6083. XMVECTOR X;
  6084. XMVECTOR Y;
  6085. XMVECTOR InverseW;
  6086. XMVECTOR Result;
  6087. UINT i;
  6088. BYTE* pInputVector = (BYTE*)pInputStream;
  6089. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6090. XMASSERT(pOutputStream);
  6091. XMASSERT(pInputStream);
  6092. for (i = 0; i < VectorCount; i++)
  6093. {
  6094. V = XMLoadFloat2((XMFLOAT2*)pInputVector);
  6095. Y = XMVectorSplatY(V);
  6096. X = XMVectorSplatX(V);
  6097. // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
  6098. // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
  6099. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  6100. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6101. InverseW = XMVectorSplatW(Result);
  6102. InverseW = XMVectorReciprocal(InverseW);
  6103. Result = XMVectorMultiply(Result, InverseW);
  6104. XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
  6105. pInputVector += InputStride;
  6106. pOutputVector += OutputStride;
  6107. }
  6108. return pOutputStream;
  6109. #elif defined(_XM_SSE_INTRINSICS_)
  6110. XMASSERT(pOutputStream);
  6111. XMASSERT(pInputStream);
  6112. UINT i;
  6113. const BYTE *pInputVector = (BYTE*)pInputStream;
  6114. BYTE *pOutputVector = (BYTE*)pOutputStream;
  6115. for (i = 0; i < VectorCount; i++)
  6116. {
  6117. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
  6118. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
  6119. vResult = _mm_mul_ps(vResult,M.r[1]);
  6120. vResult = _mm_add_ps(vResult,M.r[3]);
  6121. X = _mm_mul_ps(X,M.r[0]);
  6122. vResult = _mm_add_ps(vResult,X);
  6123. X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  6124. vResult = _mm_div_ps(vResult,X);
  6125. _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]);
  6126. pInputVector += InputStride;
  6127. pOutputVector += OutputStride;
  6128. }
  6129. return pOutputStream;
  6130. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6131. #endif // _XM_VMX128_INTRINSICS_
  6132. }
  6133. //------------------------------------------------------------------------------
  6134. XMFINLINE XMVECTOR XMVector2TransformNormal
  6135. (
  6136. FXMVECTOR V,
  6137. CXMMATRIX M
  6138. )
  6139. {
  6140. #if defined(_XM_NO_INTRINSICS_)
  6141. XMVECTOR X;
  6142. XMVECTOR Y;
  6143. XMVECTOR Result;
  6144. Y = XMVectorSplatY(V);
  6145. X = XMVectorSplatX(V);
  6146. Result = XMVectorMultiply(Y, M.r[1]);
  6147. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6148. return Result;
  6149. #elif defined(_XM_SSE_INTRINSICS_)
  6150. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  6151. vResult = _mm_mul_ps(vResult,M.r[0]);
  6152. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  6153. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  6154. vResult = _mm_add_ps(vResult,vTemp);
  6155. return vResult;
  6156. #else // _XM_VMX128_INTRINSICS_
  6157. #endif // _XM_VMX128_INTRINSICS_
  6158. }
  6159. //------------------------------------------------------------------------------
  6160. XMINLINE XMFLOAT2* XMVector2TransformNormalStream
  6161. (
  6162. XMFLOAT2* pOutputStream,
  6163. UINT OutputStride,
  6164. CONST XMFLOAT2* pInputStream,
  6165. UINT InputStride,
  6166. UINT VectorCount,
  6167. CXMMATRIX M
  6168. )
  6169. {
  6170. #if defined(_XM_NO_INTRINSICS_)
  6171. XMVECTOR V;
  6172. XMVECTOR X;
  6173. XMVECTOR Y;
  6174. XMVECTOR Result;
  6175. UINT i;
  6176. BYTE* pInputVector = (BYTE*)pInputStream;
  6177. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6178. XMASSERT(pOutputStream);
  6179. XMASSERT(pInputStream);
  6180. for (i = 0; i < VectorCount; i++)
  6181. {
  6182. V = XMLoadFloat2((XMFLOAT2*)pInputVector);
  6183. Y = XMVectorSplatY(V);
  6184. X = XMVectorSplatX(V);
  6185. // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
  6186. // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
  6187. Result = XMVectorMultiply(Y, M.r[1]);
  6188. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6189. XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
  6190. pInputVector += InputStride;
  6191. pOutputVector += OutputStride;
  6192. }
  6193. return pOutputStream;
  6194. #elif defined(_XM_SSE_INTRINSICS_)
  6195. XMASSERT(pOutputStream);
  6196. XMASSERT(pInputStream);
  6197. UINT i;
  6198. const BYTE*pInputVector = (const BYTE*)pInputStream;
  6199. BYTE *pOutputVector = (BYTE*)pOutputStream;
  6200. for (i = 0; i < VectorCount; i++)
  6201. {
  6202. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x);
  6203. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y);
  6204. vResult = _mm_mul_ps(vResult,M.r[1]);
  6205. X = _mm_mul_ps(X,M.r[0]);
  6206. vResult = _mm_add_ps(vResult,X);
  6207. _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]);
  6208. pInputVector += InputStride;
  6209. pOutputVector += OutputStride;
  6210. }
  6211. return pOutputStream;
  6212. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6213. #endif // _XM_VMX128_INTRINSICS_
  6214. }
  6215. /****************************************************************************
  6216. *
  6217. * 3D Vector
  6218. *
  6219. ****************************************************************************/
  6220. //------------------------------------------------------------------------------
  6221. // Comparison operations
  6222. //------------------------------------------------------------------------------
  6223. //------------------------------------------------------------------------------
  6224. XMFINLINE BOOL XMVector3Equal
  6225. (
  6226. FXMVECTOR V1,
  6227. FXMVECTOR V2
  6228. )
  6229. {
  6230. #if defined(_XM_NO_INTRINSICS_)
  6231. return (((V1.x == V2.x) && (V1.y == V2.y) && (V1.z == V2.z)) != 0);
  6232. #elif defined(_XM_SSE_INTRINSICS_)
  6233. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  6234. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6235. #else // _XM_VMX128_INTRINSICS_
  6236. return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
  6237. #endif
  6238. }
  6239. //------------------------------------------------------------------------------
  6240. XMFINLINE UINT XMVector3EqualR
  6241. (
  6242. FXMVECTOR V1,
  6243. FXMVECTOR V2
  6244. )
  6245. {
  6246. #if defined(_XM_NO_INTRINSICS_)
  6247. UINT CR = 0;
  6248. if ((V1.x == V2.x) &&
  6249. (V1.y == V2.y) &&
  6250. (V1.z == V2.z))
  6251. {
  6252. CR = XM_CRMASK_CR6TRUE;
  6253. }
  6254. else if ((V1.x != V2.x) &&
  6255. (V1.y != V2.y) &&
  6256. (V1.z != V2.z))
  6257. {
  6258. CR = XM_CRMASK_CR6FALSE;
  6259. }
  6260. return CR;
  6261. #elif defined(_XM_SSE_INTRINSICS_)
  6262. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  6263. int iTest = _mm_movemask_ps(vTemp)&7;
  6264. UINT CR = 0;
  6265. if (iTest==7)
  6266. {
  6267. CR = XM_CRMASK_CR6TRUE;
  6268. }
  6269. else if (!iTest)
  6270. {
  6271. CR = XM_CRMASK_CR6FALSE;
  6272. }
  6273. return CR;
  6274. #else // _XM_VMX128_INTRINSICS_
  6275. #endif // _XM_VMX128_INTRINSICS_
  6276. }
  6277. //------------------------------------------------------------------------------
  6278. XMFINLINE BOOL XMVector3EqualInt
  6279. (
  6280. FXMVECTOR V1,
  6281. FXMVECTOR V2
  6282. )
  6283. {
  6284. #if defined(_XM_NO_INTRINSICS_)
  6285. return (((V1.u[0] == V2.u[0]) && (V1.u[1] == V2.u[1]) && (V1.u[2] == V2.u[2])) != 0);
  6286. #elif defined(_XM_SSE_INTRINSICS_)
  6287. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  6288. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0);
  6289. #else // _XM_VMX128_INTRINSICS_
  6290. return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
  6291. #endif
  6292. }
  6293. //------------------------------------------------------------------------------
  6294. XMFINLINE UINT XMVector3EqualIntR
  6295. (
  6296. FXMVECTOR V1,
  6297. FXMVECTOR V2
  6298. )
  6299. {
  6300. #if defined(_XM_NO_INTRINSICS_)
  6301. UINT CR = 0;
  6302. if ((V1.u[0] == V2.u[0]) &&
  6303. (V1.u[1] == V2.u[1]) &&
  6304. (V1.u[2] == V2.u[2]))
  6305. {
  6306. CR = XM_CRMASK_CR6TRUE;
  6307. }
  6308. else if ((V1.u[0] != V2.u[0]) &&
  6309. (V1.u[1] != V2.u[1]) &&
  6310. (V1.u[2] != V2.u[2]))
  6311. {
  6312. CR = XM_CRMASK_CR6FALSE;
  6313. }
  6314. return CR;
  6315. #elif defined(_XM_SSE_INTRINSICS_)
  6316. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  6317. int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7;
  6318. UINT CR = 0;
  6319. if (iTemp==7)
  6320. {
  6321. CR = XM_CRMASK_CR6TRUE;
  6322. }
  6323. else if (!iTemp)
  6324. {
  6325. CR = XM_CRMASK_CR6FALSE;
  6326. }
  6327. return CR;
  6328. #else // _XM_VMX128_INTRINSICS_
  6329. #endif // _XM_VMX128_INTRINSICS_
  6330. }
  6331. //------------------------------------------------------------------------------
  6332. XMFINLINE BOOL XMVector3NearEqual
  6333. (
  6334. FXMVECTOR V1,
  6335. FXMVECTOR V2,
  6336. FXMVECTOR Epsilon
  6337. )
  6338. {
  6339. #if defined(_XM_NO_INTRINSICS_)
  6340. FLOAT dx, dy, dz;
  6341. dx = fabsf(V1.x-V2.x);
  6342. dy = fabsf(V1.y-V2.y);
  6343. dz = fabsf(V1.z-V2.z);
  6344. return (((dx <= Epsilon.x) &&
  6345. (dy <= Epsilon.y) &&
  6346. (dz <= Epsilon.z)) != 0);
  6347. #elif defined(_XM_SSE_INTRINSICS_)
  6348. // Get the difference
  6349. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  6350. // Get the absolute value of the difference
  6351. XMVECTOR vTemp = _mm_setzero_ps();
  6352. vTemp = _mm_sub_ps(vTemp,vDelta);
  6353. vTemp = _mm_max_ps(vTemp,vDelta);
  6354. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  6355. // w is don't care
  6356. return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
  6357. #else // _XM_VMX128_INTRINSICS_
  6358. #endif // _XM_VMX128_INTRINSICS_
  6359. }
  6360. //------------------------------------------------------------------------------
  6361. XMFINLINE BOOL XMVector3NotEqual
  6362. (
  6363. FXMVECTOR V1,
  6364. FXMVECTOR V2
  6365. )
  6366. {
  6367. #if defined(_XM_NO_INTRINSICS_)
  6368. return (((V1.x != V2.x) || (V1.y != V2.y) || (V1.z != V2.z)) != 0);
  6369. #elif defined(_XM_SSE_INTRINSICS_)
  6370. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  6371. return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
  6372. #else // _XM_VMX128_INTRINSICS_
  6373. return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
  6374. #endif
  6375. }
  6376. //------------------------------------------------------------------------------
  6377. XMFINLINE BOOL XMVector3NotEqualInt
  6378. (
  6379. FXMVECTOR V1,
  6380. FXMVECTOR V2
  6381. )
  6382. {
  6383. #if defined(_XM_NO_INTRINSICS_)
  6384. return (((V1.u[0] != V2.u[0]) || (V1.u[1] != V2.u[1]) || (V1.u[2] != V2.u[2])) != 0);
  6385. #elif defined(_XM_SSE_INTRINSICS_)
  6386. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  6387. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0);
  6388. #else // _XM_VMX128_INTRINSICS_
  6389. return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
  6390. #endif
  6391. }
  6392. //------------------------------------------------------------------------------
  6393. XMFINLINE BOOL XMVector3Greater
  6394. (
  6395. FXMVECTOR V1,
  6396. FXMVECTOR V2
  6397. )
  6398. {
  6399. #if defined(_XM_NO_INTRINSICS_)
  6400. return (((V1.x > V2.x) && (V1.y > V2.y) && (V1.z > V2.z)) != 0);
  6401. #elif defined(_XM_SSE_INTRINSICS_)
  6402. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  6403. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6404. #else // _XM_VMX128_INTRINSICS_
  6405. return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
  6406. #endif
  6407. }
  6408. //------------------------------------------------------------------------------
  6409. XMFINLINE UINT XMVector3GreaterR
  6410. (
  6411. FXMVECTOR V1,
  6412. FXMVECTOR V2
  6413. )
  6414. {
  6415. #if defined(_XM_NO_INTRINSICS_)
  6416. UINT CR = 0;
  6417. if ((V1.x > V2.x) &&
  6418. (V1.y > V2.y) &&
  6419. (V1.z > V2.z))
  6420. {
  6421. CR = XM_CRMASK_CR6TRUE;
  6422. }
  6423. else if ((V1.x <= V2.x) &&
  6424. (V1.y <= V2.y) &&
  6425. (V1.z <= V2.z))
  6426. {
  6427. CR = XM_CRMASK_CR6FALSE;
  6428. }
  6429. return CR;
  6430. #elif defined(_XM_SSE_INTRINSICS_)
  6431. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  6432. UINT CR = 0;
  6433. int iTest = _mm_movemask_ps(vTemp)&7;
  6434. if (iTest==7)
  6435. {
  6436. CR = XM_CRMASK_CR6TRUE;
  6437. }
  6438. else if (!iTest)
  6439. {
  6440. CR = XM_CRMASK_CR6FALSE;
  6441. }
  6442. return CR;
  6443. #else // _XM_VMX128_INTRINSICS_
  6444. #endif // _XM_VMX128_INTRINSICS_
  6445. }
  6446. //------------------------------------------------------------------------------
  6447. XMFINLINE BOOL XMVector3GreaterOrEqual
  6448. (
  6449. FXMVECTOR V1,
  6450. FXMVECTOR V2
  6451. )
  6452. {
  6453. #if defined(_XM_NO_INTRINSICS_)
  6454. return (((V1.x >= V2.x) && (V1.y >= V2.y) && (V1.z >= V2.z)) != 0);
  6455. #elif defined(_XM_SSE_INTRINSICS_)
  6456. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  6457. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6458. #else // _XM_VMX128_INTRINSICS_
  6459. return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
  6460. #endif
  6461. }
  6462. //------------------------------------------------------------------------------
  6463. XMFINLINE UINT XMVector3GreaterOrEqualR
  6464. (
  6465. FXMVECTOR V1,
  6466. FXMVECTOR V2
  6467. )
  6468. {
  6469. #if defined(_XM_NO_INTRINSICS_)
  6470. UINT CR = 0;
  6471. if ((V1.x >= V2.x) &&
  6472. (V1.y >= V2.y) &&
  6473. (V1.z >= V2.z))
  6474. {
  6475. CR = XM_CRMASK_CR6TRUE;
  6476. }
  6477. else if ((V1.x < V2.x) &&
  6478. (V1.y < V2.y) &&
  6479. (V1.z < V2.z))
  6480. {
  6481. CR = XM_CRMASK_CR6FALSE;
  6482. }
  6483. return CR;
  6484. #elif defined(_XM_SSE_INTRINSICS_)
  6485. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  6486. UINT CR = 0;
  6487. int iTest = _mm_movemask_ps(vTemp)&7;
  6488. if (iTest==7)
  6489. {
  6490. CR = XM_CRMASK_CR6TRUE;
  6491. }
  6492. else if (!iTest)
  6493. {
  6494. CR = XM_CRMASK_CR6FALSE;
  6495. }
  6496. return CR;
  6497. #else // _XM_VMX128_INTRINSICS_
  6498. #endif // _XM_VMX128_INTRINSICS_
  6499. }
  6500. //------------------------------------------------------------------------------
  6501. XMFINLINE BOOL XMVector3Less
  6502. (
  6503. FXMVECTOR V1,
  6504. FXMVECTOR V2
  6505. )
  6506. {
  6507. #if defined(_XM_NO_INTRINSICS_)
  6508. return (((V1.x < V2.x) && (V1.y < V2.y) && (V1.z < V2.z)) != 0);
  6509. #elif defined(_XM_SSE_INTRINSICS_)
  6510. XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
  6511. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6512. #else // _XM_VMX128_INTRINSICS_
  6513. return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
  6514. #endif
  6515. }
  6516. //------------------------------------------------------------------------------
  6517. XMFINLINE BOOL XMVector3LessOrEqual
  6518. (
  6519. FXMVECTOR V1,
  6520. FXMVECTOR V2
  6521. )
  6522. {
  6523. #if defined(_XM_NO_INTRINSICS_)
  6524. return (((V1.x <= V2.x) && (V1.y <= V2.y) && (V1.z <= V2.z)) != 0);
  6525. #elif defined(_XM_SSE_INTRINSICS_)
  6526. XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
  6527. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6528. #else // _XM_VMX128_INTRINSICS_
  6529. return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
  6530. #endif
  6531. }
  6532. //------------------------------------------------------------------------------
  6533. XMFINLINE BOOL XMVector3InBounds
  6534. (
  6535. FXMVECTOR V,
  6536. FXMVECTOR Bounds
  6537. )
  6538. {
  6539. #if defined(_XM_NO_INTRINSICS_)
  6540. return (((V.x <= Bounds.x && V.x >= -Bounds.x) &&
  6541. (V.y <= Bounds.y && V.y >= -Bounds.y) &&
  6542. (V.z <= Bounds.z && V.z >= -Bounds.z)) != 0);
  6543. #elif defined(_XM_SSE_INTRINSICS_)
  6544. // Test if less than or equal
  6545. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  6546. // Negate the bounds
  6547. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  6548. // Test if greater or equal (Reversed)
  6549. vTemp2 = _mm_cmple_ps(vTemp2,V);
  6550. // Blend answers
  6551. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  6552. // x,y and z in bounds? (w is don't care)
  6553. return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
  6554. #else
  6555. return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
  6556. #endif
  6557. }
  6558. //------------------------------------------------------------------------------
  6559. XMFINLINE UINT XMVector3InBoundsR
  6560. (
  6561. FXMVECTOR V,
  6562. FXMVECTOR Bounds
  6563. )
  6564. {
  6565. #if defined(_XM_NO_INTRINSICS_)
  6566. UINT CR = 0;
  6567. if ((V.x <= Bounds.x && V.x >= -Bounds.x) &&
  6568. (V.y <= Bounds.y && V.y >= -Bounds.y) &&
  6569. (V.z <= Bounds.z && V.z >= -Bounds.z))
  6570. {
  6571. CR = XM_CRMASK_CR6BOUNDS;
  6572. }
  6573. return CR;
  6574. #elif defined(_XM_SSE_INTRINSICS_)
  6575. // Test if less than or equal
  6576. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  6577. // Negate the bounds
  6578. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  6579. // Test if greater or equal (Reversed)
  6580. vTemp2 = _mm_cmple_ps(vTemp2,V);
  6581. // Blend answers
  6582. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  6583. // x,y and z in bounds? (w is don't care)
  6584. return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0;
  6585. #else // _XM_VMX128_INTRINSICS_
  6586. #endif // _XM_VMX128_INTRINSICS_
  6587. }
  6588. //------------------------------------------------------------------------------
  6589. XMFINLINE BOOL XMVector3IsNaN
  6590. (
  6591. FXMVECTOR V
  6592. )
  6593. {
  6594. #if defined(_XM_NO_INTRINSICS_)
  6595. return (XMISNAN(V.x) ||
  6596. XMISNAN(V.y) ||
  6597. XMISNAN(V.z));
  6598. #elif defined(_XM_SSE_INTRINSICS_)
  6599. // Mask off the exponent
  6600. __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
  6601. // Mask off the mantissa
  6602. __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
  6603. // Are any of the exponents == 0x7F800000?
  6604. vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
  6605. // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
  6606. vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
  6607. // Perform a not on the NaN test to be true on NON-zero mantissas
  6608. vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
  6609. // If x, y or z are NaN, the signs are true after the merge above
  6610. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0);
  6611. #else // _XM_VMX128_INTRINSICS_
  6612. #endif // _XM_VMX128_INTRINSICS_
  6613. }
  6614. //------------------------------------------------------------------------------
  6615. XMFINLINE BOOL XMVector3IsInfinite
  6616. (
  6617. FXMVECTOR V
  6618. )
  6619. {
  6620. #if defined(_XM_NO_INTRINSICS_)
  6621. return (XMISINF(V.x) ||
  6622. XMISINF(V.y) ||
  6623. XMISINF(V.z));
  6624. #elif defined(_XM_SSE_INTRINSICS_)
  6625. // Mask off the sign bit
  6626. __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
  6627. // Compare to infinity
  6628. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  6629. // If x,y or z are infinity, the signs are true.
  6630. return ((_mm_movemask_ps(vTemp)&7) != 0);
  6631. #else // _XM_VMX128_INTRINSICS_
  6632. #endif // _XM_VMX128_INTRINSICS_
  6633. }
  6634. //------------------------------------------------------------------------------
  6635. // Computation operations
  6636. //------------------------------------------------------------------------------
  6637. //------------------------------------------------------------------------------
  6638. XMFINLINE XMVECTOR XMVector3Dot
  6639. (
  6640. FXMVECTOR V1,
  6641. FXMVECTOR V2
  6642. )
  6643. {
  6644. #if defined(_XM_NO_INTRINSICS_)
  6645. FLOAT fValue = V1.x * V2.x + V1.y * V2.y + V1.z * V2.z;
  6646. XMVECTOR vResult = {
  6647. fValue,
  6648. fValue,
  6649. fValue,
  6650. fValue
  6651. };
  6652. return vResult;
  6653. #elif defined(_XM_SSE_INTRINSICS_)
  6654. // Perform the dot product
  6655. XMVECTOR vDot = _mm_mul_ps(V1,V2);
  6656. // x=Dot.y, y=Dot.z
  6657. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  6658. // Result.x = x+y
  6659. vDot = _mm_add_ss(vDot,vTemp);
  6660. // x=Dot.z
  6661. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6662. // Result.x = (x+y)+z
  6663. vDot = _mm_add_ss(vDot,vTemp);
  6664. // Splat x
  6665. return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  6666. #else // _XM_VMX128_INTRINSICS_
  6667. #endif // _XM_VMX128_INTRINSICS_
  6668. }
  6669. //------------------------------------------------------------------------------
  6670. XMFINLINE XMVECTOR XMVector3Cross
  6671. (
  6672. FXMVECTOR V1,
  6673. FXMVECTOR V2
  6674. )
  6675. {
  6676. #if defined(_XM_NO_INTRINSICS_)
  6677. XMVECTOR vResult = {
  6678. (V1.y * V2.z) - (V1.z * V2.y),
  6679. (V1.z * V2.x) - (V1.x * V2.z),
  6680. (V1.x * V2.y) - (V1.y * V2.x),
  6681. 0.0f
  6682. };
  6683. return vResult;
  6684. #elif defined(_XM_SSE_INTRINSICS_)
  6685. // y1,z1,x1,w1
  6686. XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
  6687. // z2,x2,y2,w2
  6688. XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
  6689. // Perform the left operation
  6690. XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
  6691. // z1,x1,y1,w1
  6692. vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
  6693. // y2,z2,x2,w2
  6694. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
  6695. // Perform the right operation
  6696. vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
  6697. // Subract the right from left, and return answer
  6698. vResult = _mm_sub_ps(vResult,vTemp1);
  6699. // Set w to zero
  6700. return _mm_and_ps(vResult,g_XMMask3);
  6701. #else // _XM_VMX128_INTRINSICS_
  6702. #endif // _XM_VMX128_INTRINSICS_
  6703. }
  6704. //------------------------------------------------------------------------------
  6705. XMFINLINE XMVECTOR XMVector3LengthSq
  6706. (
  6707. FXMVECTOR V
  6708. )
  6709. {
  6710. return XMVector3Dot(V, V);
  6711. }
  6712. //------------------------------------------------------------------------------
  6713. XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst
  6714. (
  6715. FXMVECTOR V
  6716. )
  6717. {
  6718. #if defined(_XM_NO_INTRINSICS_)
  6719. XMVECTOR Result;
  6720. Result = XMVector3LengthSq(V);
  6721. Result = XMVectorReciprocalSqrtEst(Result);
  6722. return Result;
  6723. #elif defined(_XM_SSE_INTRINSICS_)
  6724. // Perform the dot product on x,y and z
  6725. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6726. // vTemp has z and y
  6727. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
  6728. // x+z, y
  6729. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6730. // y,y,y,y
  6731. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6732. // x+z+y,??,??,??
  6733. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6734. // Splat the length squared
  6735. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6736. // Get the reciprocal
  6737. vLengthSq = _mm_rsqrt_ps(vLengthSq);
  6738. return vLengthSq;
  6739. #else // _XM_VMX128_INTRINSICS_
  6740. #endif // _XM_VMX128_INTRINSICS_
  6741. }
  6742. //------------------------------------------------------------------------------
  6743. XMFINLINE XMVECTOR XMVector3ReciprocalLength
  6744. (
  6745. FXMVECTOR V
  6746. )
  6747. {
  6748. #if defined(_XM_NO_INTRINSICS_)
  6749. XMVECTOR Result;
  6750. Result = XMVector3LengthSq(V);
  6751. Result = XMVectorReciprocalSqrt(Result);
  6752. return Result;
  6753. #elif defined(_XM_SSE_INTRINSICS_)
  6754. // Perform the dot product
  6755. XMVECTOR vDot = _mm_mul_ps(V,V);
  6756. // x=Dot.y, y=Dot.z
  6757. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  6758. // Result.x = x+y
  6759. vDot = _mm_add_ss(vDot,vTemp);
  6760. // x=Dot.z
  6761. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6762. // Result.x = (x+y)+z
  6763. vDot = _mm_add_ss(vDot,vTemp);
  6764. // Splat x
  6765. vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  6766. // Get the reciprocal
  6767. vDot = _mm_sqrt_ps(vDot);
  6768. // Get the reciprocal
  6769. vDot = _mm_div_ps(g_XMOne,vDot);
  6770. return vDot;
  6771. #else // _XM_VMX128_INTRINSICS_
  6772. #endif // _XM_VMX128_INTRINSICS_
  6773. }
  6774. //------------------------------------------------------------------------------
  6775. XMFINLINE XMVECTOR XMVector3LengthEst
  6776. (
  6777. FXMVECTOR V
  6778. )
  6779. {
  6780. #if defined(_XM_NO_INTRINSICS_)
  6781. XMVECTOR Result;
  6782. Result = XMVector3LengthSq(V);
  6783. Result = XMVectorSqrtEst(Result);
  6784. return Result;
  6785. #elif defined(_XM_SSE_INTRINSICS_)
  6786. // Perform the dot product on x,y and z
  6787. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6788. // vTemp has z and y
  6789. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
  6790. // x+z, y
  6791. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6792. // y,y,y,y
  6793. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6794. // x+z+y,??,??,??
  6795. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6796. // Splat the length squared
  6797. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6798. // Get the length
  6799. vLengthSq = _mm_sqrt_ps(vLengthSq);
  6800. return vLengthSq;
  6801. #else // _XM_VMX128_INTRINSICS_
  6802. #endif // _XM_VMX128_INTRINSICS_
  6803. }
  6804. //------------------------------------------------------------------------------
  6805. XMFINLINE XMVECTOR XMVector3Length
  6806. (
  6807. FXMVECTOR V
  6808. )
  6809. {
  6810. #if defined(_XM_NO_INTRINSICS_)
  6811. XMVECTOR Result;
  6812. Result = XMVector3LengthSq(V);
  6813. Result = XMVectorSqrt(Result);
  6814. return Result;
  6815. #elif defined(_XM_SSE_INTRINSICS_)
  6816. // Perform the dot product on x,y and z
  6817. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6818. // vTemp has z and y
  6819. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
  6820. // x+z, y
  6821. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6822. // y,y,y,y
  6823. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6824. // x+z+y,??,??,??
  6825. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6826. // Splat the length squared
  6827. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6828. // Get the length
  6829. vLengthSq = _mm_sqrt_ps(vLengthSq);
  6830. return vLengthSq;
  6831. #else // _XM_VMX128_INTRINSICS_
  6832. #endif // _XM_VMX128_INTRINSICS_
  6833. }
  6834. //------------------------------------------------------------------------------
  6835. // XMVector3NormalizeEst uses a reciprocal estimate and
  6836. // returns QNaN on zero and infinite vectors.
  6837. XMFINLINE XMVECTOR XMVector3NormalizeEst
  6838. (
  6839. FXMVECTOR V
  6840. )
  6841. {
  6842. #if defined(_XM_NO_INTRINSICS_)
  6843. XMVECTOR Result;
  6844. Result = XMVector3ReciprocalLength(V);
  6845. Result = XMVectorMultiply(V, Result);
  6846. return Result;
  6847. #elif defined(_XM_SSE_INTRINSICS_)
  6848. // Perform the dot product
  6849. XMVECTOR vDot = _mm_mul_ps(V,V);
  6850. // x=Dot.y, y=Dot.z
  6851. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  6852. // Result.x = x+y
  6853. vDot = _mm_add_ss(vDot,vTemp);
  6854. // x=Dot.z
  6855. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6856. // Result.x = (x+y)+z
  6857. vDot = _mm_add_ss(vDot,vTemp);
  6858. // Splat x
  6859. vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  6860. // Get the reciprocal
  6861. vDot = _mm_rsqrt_ps(vDot);
  6862. // Perform the normalization
  6863. vDot = _mm_mul_ps(vDot,V);
  6864. return vDot;
  6865. #else // _XM_VMX128_INTRINSICS_
  6866. #endif // _XM_VMX128_INTRINSICS_
  6867. }
  6868. //------------------------------------------------------------------------------
  6869. XMFINLINE XMVECTOR XMVector3Normalize
  6870. (
  6871. FXMVECTOR V
  6872. )
  6873. {
  6874. #if defined(_XM_NO_INTRINSICS_)
  6875. FLOAT fLengthSq;
  6876. XMVECTOR vResult;
  6877. fLengthSq = sqrtf((V.x*V.x)+(V.y*V.y)+(V.z*V.z));
  6878. // Prevent divide by zero
  6879. if (fLengthSq) {
  6880. fLengthSq = 1.0f/fLengthSq;
  6881. }
  6882. vResult.x = V.x*fLengthSq;
  6883. vResult.y = V.y*fLengthSq;
  6884. vResult.z = V.z*fLengthSq;
  6885. vResult.w = V.w*fLengthSq;
  6886. return vResult;
  6887. #elif defined(_XM_SSE_INTRINSICS_)
  6888. // Perform the dot product on x,y and z only
  6889. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6890. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
  6891. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6892. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6893. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6894. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6895. // Prepare for the division
  6896. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  6897. // Failsafe on zero (Or epsilon) length planes
  6898. // If the length is infinity, set the elements to zero
  6899. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  6900. // Divide to perform the normalization
  6901. vResult = _mm_div_ps(V,vResult);
  6902. // Any that are infinity, set to zero
  6903. vResult = _mm_and_ps(vResult,vLengthSq);
  6904. return vResult;
  6905. #else // _XM_VMX128_INTRINSICS_
  6906. #endif // _XM_VMX128_INTRINSICS_
  6907. }
  6908. //------------------------------------------------------------------------------
  6909. XMFINLINE XMVECTOR XMVector3ClampLength
  6910. (
  6911. FXMVECTOR V,
  6912. FLOAT LengthMin,
  6913. FLOAT LengthMax
  6914. )
  6915. {
  6916. #if defined(_XM_NO_INTRINSICS_)
  6917. XMVECTOR ClampMax;
  6918. XMVECTOR ClampMin;
  6919. ClampMax = XMVectorReplicate(LengthMax);
  6920. ClampMin = XMVectorReplicate(LengthMin);
  6921. return XMVector3ClampLengthV(V, ClampMin, ClampMax);
  6922. #elif defined(_XM_SSE_INTRINSICS_)
  6923. XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
  6924. XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
  6925. return XMVector3ClampLengthV(V,ClampMin,ClampMax);
  6926. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6927. #endif // _XM_VMX128_INTRINSICS_
  6928. }
  6929. //------------------------------------------------------------------------------
  6930. XMFINLINE XMVECTOR XMVector3ClampLengthV
  6931. (
  6932. FXMVECTOR V,
  6933. FXMVECTOR LengthMin,
  6934. FXMVECTOR LengthMax
  6935. )
  6936. {
  6937. #if defined(_XM_NO_INTRINSICS_)
  6938. XMVECTOR ClampLength;
  6939. XMVECTOR LengthSq;
  6940. XMVECTOR RcpLength;
  6941. XMVECTOR Length;
  6942. XMVECTOR Normal;
  6943. XMVECTOR Zero;
  6944. XMVECTOR InfiniteLength;
  6945. XMVECTOR ZeroLength;
  6946. XMVECTOR Select;
  6947. XMVECTOR ControlMax;
  6948. XMVECTOR ControlMin;
  6949. XMVECTOR Control;
  6950. XMVECTOR Result;
  6951. XMASSERT((LengthMin.y == LengthMin.x) && (LengthMin.z == LengthMin.x));
  6952. XMASSERT((LengthMax.y == LengthMax.x) && (LengthMax.z == LengthMax.x));
  6953. XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
  6954. XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
  6955. XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
  6956. LengthSq = XMVector3LengthSq(V);
  6957. Zero = XMVectorZero();
  6958. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  6959. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  6960. ZeroLength = XMVectorEqual(LengthSq, Zero);
  6961. Normal = XMVectorMultiply(V, RcpLength);
  6962. Length = XMVectorMultiply(LengthSq, RcpLength);
  6963. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  6964. Length = XMVectorSelect(LengthSq, Length, Select);
  6965. Normal = XMVectorSelect(LengthSq, Normal, Select);
  6966. ControlMax = XMVectorGreater(Length, LengthMax);
  6967. ControlMin = XMVectorLess(Length, LengthMin);
  6968. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  6969. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  6970. Result = XMVectorMultiply(Normal, ClampLength);
  6971. // Preserve the original vector (with no precision loss) if the length falls within the given range
  6972. Control = XMVectorEqualInt(ControlMax, ControlMin);
  6973. Result = XMVectorSelect(Result, V, Control);
  6974. return Result;
  6975. #elif defined(_XM_SSE_INTRINSICS_)
  6976. XMVECTOR ClampLength;
  6977. XMVECTOR LengthSq;
  6978. XMVECTOR RcpLength;
  6979. XMVECTOR Length;
  6980. XMVECTOR Normal;
  6981. XMVECTOR InfiniteLength;
  6982. XMVECTOR ZeroLength;
  6983. XMVECTOR Select;
  6984. XMVECTOR ControlMax;
  6985. XMVECTOR ControlMin;
  6986. XMVECTOR Control;
  6987. XMVECTOR Result;
  6988. XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
  6989. XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
  6990. XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero));
  6991. XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero));
  6992. XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
  6993. LengthSq = XMVector3LengthSq(V);
  6994. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  6995. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
  6996. ZeroLength = XMVectorEqual(LengthSq,g_XMZero);
  6997. Normal = _mm_mul_ps(V, RcpLength);
  6998. Length = _mm_mul_ps(LengthSq, RcpLength);
  6999. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  7000. Length = XMVectorSelect(LengthSq, Length, Select);
  7001. Normal = XMVectorSelect(LengthSq, Normal, Select);
  7002. ControlMax = XMVectorGreater(Length, LengthMax);
  7003. ControlMin = XMVectorLess(Length, LengthMin);
  7004. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  7005. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  7006. Result = _mm_mul_ps(Normal, ClampLength);
  7007. // Preserve the original vector (with no precision loss) if the length falls within the given range
  7008. Control = XMVectorEqualInt(ControlMax, ControlMin);
  7009. Result = XMVectorSelect(Result, V, Control);
  7010. return Result;
  7011. #else // _XM_VMX128_INTRINSICS_
  7012. #endif // _XM_VMX128_INTRINSICS_
  7013. }
  7014. //------------------------------------------------------------------------------
  7015. XMFINLINE XMVECTOR XMVector3Reflect
  7016. (
  7017. FXMVECTOR Incident,
  7018. FXMVECTOR Normal
  7019. )
  7020. {
  7021. #if defined(_XM_NO_INTRINSICS_)
  7022. XMVECTOR Result;
  7023. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  7024. Result = XMVector3Dot(Incident, Normal);
  7025. Result = XMVectorAdd(Result, Result);
  7026. Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
  7027. return Result;
  7028. #elif defined(_XM_SSE_INTRINSICS_)
  7029. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  7030. XMVECTOR Result = XMVector3Dot(Incident, Normal);
  7031. Result = _mm_add_ps(Result, Result);
  7032. Result = _mm_mul_ps(Result, Normal);
  7033. Result = _mm_sub_ps(Incident,Result);
  7034. return Result;
  7035. #else // _XM_VMX128_INTRINSICS_
  7036. #endif // _XM_VMX128_INTRINSICS_
  7037. }
  7038. //------------------------------------------------------------------------------
  7039. XMFINLINE XMVECTOR XMVector3Refract
  7040. (
  7041. FXMVECTOR Incident,
  7042. FXMVECTOR Normal,
  7043. FLOAT RefractionIndex
  7044. )
  7045. {
  7046. #if defined(_XM_NO_INTRINSICS_)
  7047. XMVECTOR Index;
  7048. Index = XMVectorReplicate(RefractionIndex);
  7049. return XMVector3RefractV(Incident, Normal, Index);
  7050. #elif defined(_XM_SSE_INTRINSICS_)
  7051. XMVECTOR Index = _mm_set_ps1(RefractionIndex);
  7052. return XMVector3RefractV(Incident,Normal,Index);
  7053. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7054. #endif // _XM_VMX128_INTRINSICS_
  7055. }
  7056. //------------------------------------------------------------------------------
  7057. XMFINLINE XMVECTOR XMVector3RefractV
  7058. (
  7059. FXMVECTOR Incident,
  7060. FXMVECTOR Normal,
  7061. FXMVECTOR RefractionIndex
  7062. )
  7063. {
  7064. #if defined(_XM_NO_INTRINSICS_)
  7065. XMVECTOR IDotN;
  7066. XMVECTOR R;
  7067. CONST XMVECTOR Zero = XMVectorZero();
  7068. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  7069. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  7070. IDotN = XMVector3Dot(Incident, Normal);
  7071. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  7072. R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
  7073. R = XMVectorMultiply(R, RefractionIndex);
  7074. R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
  7075. if (XMVector4LessOrEqual(R, Zero))
  7076. {
  7077. // Total internal reflection
  7078. return Zero;
  7079. }
  7080. else
  7081. {
  7082. XMVECTOR Result;
  7083. // R = RefractionIndex * IDotN + sqrt(R)
  7084. R = XMVectorSqrt(R);
  7085. R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
  7086. // Result = RefractionIndex * Incident - Normal * R
  7087. Result = XMVectorMultiply(RefractionIndex, Incident);
  7088. Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
  7089. return Result;
  7090. }
  7091. #elif defined(_XM_SSE_INTRINSICS_)
  7092. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  7093. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  7094. XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
  7095. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  7096. XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
  7097. R = _mm_sub_ps(g_XMOne,R);
  7098. R = _mm_mul_ps(R, RefractionIndex);
  7099. R = _mm_mul_ps(R, RefractionIndex);
  7100. R = _mm_sub_ps(g_XMOne,R);
  7101. XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
  7102. if (_mm_movemask_ps(vResult)==0x0f)
  7103. {
  7104. // Total internal reflection
  7105. vResult = g_XMZero;
  7106. }
  7107. else
  7108. {
  7109. // R = RefractionIndex * IDotN + sqrt(R)
  7110. R = _mm_sqrt_ps(R);
  7111. vResult = _mm_mul_ps(RefractionIndex,IDotN);
  7112. R = _mm_add_ps(R,vResult);
  7113. // Result = RefractionIndex * Incident - Normal * R
  7114. vResult = _mm_mul_ps(RefractionIndex, Incident);
  7115. R = _mm_mul_ps(R,Normal);
  7116. vResult = _mm_sub_ps(vResult,R);
  7117. }
  7118. return vResult;
  7119. #else // _XM_VMX128_INTRINSICS_
  7120. #endif // _XM_VMX128_INTRINSICS_
  7121. }
  7122. //------------------------------------------------------------------------------
  7123. XMFINLINE XMVECTOR XMVector3Orthogonal
  7124. (
  7125. FXMVECTOR V
  7126. )
  7127. {
  7128. #if defined(_XM_NO_INTRINSICS_)
  7129. XMVECTOR NegativeV;
  7130. XMVECTOR Z, YZYY;
  7131. XMVECTOR ZIsNegative, YZYYIsNegative;
  7132. XMVECTOR S, D;
  7133. XMVECTOR R0, R1;
  7134. XMVECTOR Select;
  7135. XMVECTOR Zero;
  7136. XMVECTOR Result;
  7137. static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  7138. static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  7139. Zero = XMVectorZero();
  7140. Z = XMVectorSplatZ(V);
  7141. YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v);
  7142. NegativeV = XMVectorSubtract(Zero, V);
  7143. ZIsNegative = XMVectorLess(Z, Zero);
  7144. YZYYIsNegative = XMVectorLess(YZYY, Zero);
  7145. S = XMVectorAdd(YZYY, Z);
  7146. D = XMVectorSubtract(YZYY, Z);
  7147. Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
  7148. R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v);
  7149. R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v);
  7150. Result = XMVectorSelect(R1, R0, Select);
  7151. return Result;
  7152. #elif defined(_XM_SSE_INTRINSICS_)
  7153. XMVECTOR NegativeV;
  7154. XMVECTOR Z, YZYY;
  7155. XMVECTOR ZIsNegative, YZYYIsNegative;
  7156. XMVECTOR S, D;
  7157. XMVECTOR R0, R1;
  7158. XMVECTOR Select;
  7159. XMVECTOR Zero;
  7160. XMVECTOR Result;
  7161. static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  7162. static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  7163. Zero = XMVectorZero();
  7164. Z = XMVectorSplatZ(V);
  7165. YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y);
  7166. NegativeV = _mm_sub_ps(Zero, V);
  7167. ZIsNegative = XMVectorLess(Z, Zero);
  7168. YZYYIsNegative = XMVectorLess(YZYY, Zero);
  7169. S = _mm_add_ps(YZYY, Z);
  7170. D = _mm_sub_ps(YZYY, Z);
  7171. Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
  7172. R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X);
  7173. R1 = XMVectorPermute(V, D,Permute1X0X0X0X);
  7174. Result = XMVectorSelect(R1, R0, Select);
  7175. return Result;
  7176. #else // _XM_VMX128_INTRINSICS_
  7177. #endif // _XM_VMX128_INTRINSICS_
  7178. }
  7179. //------------------------------------------------------------------------------
  7180. XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst
  7181. (
  7182. FXMVECTOR N1,
  7183. FXMVECTOR N2
  7184. )
  7185. {
  7186. #if defined(_XM_NO_INTRINSICS_)
  7187. XMVECTOR Result;
  7188. XMVECTOR NegativeOne;
  7189. XMVECTOR One;
  7190. Result = XMVector3Dot(N1, N2);
  7191. NegativeOne = XMVectorSplatConstant(-1, 0);
  7192. One = XMVectorSplatOne();
  7193. Result = XMVectorClamp(Result, NegativeOne, One);
  7194. Result = XMVectorACosEst(Result);
  7195. return Result;
  7196. #elif defined(_XM_SSE_INTRINSICS_)
  7197. XMVECTOR vResult = XMVector3Dot(N1,N2);
  7198. // Clamp to -1.0f to 1.0f
  7199. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  7200. vResult = _mm_min_ps(vResult,g_XMOne);
  7201. vResult = XMVectorACosEst(vResult);
  7202. return vResult;
  7203. #else // _XM_VMX128_INTRINSICS_
  7204. #endif // _XM_VMX128_INTRINSICS_
  7205. }
  7206. //------------------------------------------------------------------------------
  7207. XMFINLINE XMVECTOR XMVector3AngleBetweenNormals
  7208. (
  7209. FXMVECTOR N1,
  7210. FXMVECTOR N2
  7211. )
  7212. {
  7213. #if defined(_XM_NO_INTRINSICS_)
  7214. XMVECTOR Result;
  7215. XMVECTOR NegativeOne;
  7216. XMVECTOR One;
  7217. Result = XMVector3Dot(N1, N2);
  7218. NegativeOne = XMVectorSplatConstant(-1, 0);
  7219. One = XMVectorSplatOne();
  7220. Result = XMVectorClamp(Result, NegativeOne, One);
  7221. Result = XMVectorACos(Result);
  7222. return Result;
  7223. #elif defined(_XM_SSE_INTRINSICS_)
  7224. XMVECTOR vResult = XMVector3Dot(N1,N2);
  7225. // Clamp to -1.0f to 1.0f
  7226. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  7227. vResult = _mm_min_ps(vResult,g_XMOne);
  7228. vResult = XMVectorACos(vResult);
  7229. return vResult;
  7230. #else // _XM_VMX128_INTRINSICS_
  7231. #endif // _XM_VMX128_INTRINSICS_
  7232. }
  7233. //------------------------------------------------------------------------------
  7234. XMFINLINE XMVECTOR XMVector3AngleBetweenVectors
  7235. (
  7236. FXMVECTOR V1,
  7237. FXMVECTOR V2
  7238. )
  7239. {
  7240. #if defined(_XM_NO_INTRINSICS_)
  7241. XMVECTOR L1;
  7242. XMVECTOR L2;
  7243. XMVECTOR Dot;
  7244. XMVECTOR CosAngle;
  7245. XMVECTOR NegativeOne;
  7246. XMVECTOR One;
  7247. XMVECTOR Result;
  7248. L1 = XMVector3ReciprocalLength(V1);
  7249. L2 = XMVector3ReciprocalLength(V2);
  7250. Dot = XMVector3Dot(V1, V2);
  7251. L1 = XMVectorMultiply(L1, L2);
  7252. NegativeOne = XMVectorSplatConstant(-1, 0);
  7253. One = XMVectorSplatOne();
  7254. CosAngle = XMVectorMultiply(Dot, L1);
  7255. CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
  7256. Result = XMVectorACos(CosAngle);
  7257. return Result;
  7258. #elif defined(_XM_SSE_INTRINSICS_)
  7259. XMVECTOR L1;
  7260. XMVECTOR L2;
  7261. XMVECTOR Dot;
  7262. XMVECTOR CosAngle;
  7263. XMVECTOR Result;
  7264. L1 = XMVector3ReciprocalLength(V1);
  7265. L2 = XMVector3ReciprocalLength(V2);
  7266. Dot = XMVector3Dot(V1, V2);
  7267. L1 = _mm_mul_ps(L1, L2);
  7268. CosAngle = _mm_mul_ps(Dot, L1);
  7269. CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne);
  7270. Result = XMVectorACos(CosAngle);
  7271. return Result;
  7272. #else // _XM_VMX128_INTRINSICS_
  7273. #endif // _XM_VMX128_INTRINSICS_
  7274. }
  7275. //------------------------------------------------------------------------------
  7276. XMFINLINE XMVECTOR XMVector3LinePointDistance
  7277. (
  7278. FXMVECTOR LinePoint1,
  7279. FXMVECTOR LinePoint2,
  7280. FXMVECTOR Point
  7281. )
  7282. {
  7283. #if defined(_XM_NO_INTRINSICS_)
  7284. XMVECTOR PointVector;
  7285. XMVECTOR LineVector;
  7286. XMVECTOR ReciprocalLengthSq;
  7287. XMVECTOR PointProjectionScale;
  7288. XMVECTOR DistanceVector;
  7289. XMVECTOR Result;
  7290. // Given a vector PointVector from LinePoint1 to Point and a vector
  7291. // LineVector from LinePoint1 to LinePoint2, the scaled distance
  7292. // PointProjectionScale from LinePoint1 to the perpendicular projection
  7293. // of PointVector onto the line is defined as:
  7294. //
  7295. // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
  7296. PointVector = XMVectorSubtract(Point, LinePoint1);
  7297. LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
  7298. ReciprocalLengthSq = XMVector3LengthSq(LineVector);
  7299. ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
  7300. PointProjectionScale = XMVector3Dot(PointVector, LineVector);
  7301. PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
  7302. DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
  7303. DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
  7304. Result = XMVector3Length(DistanceVector);
  7305. return Result;
  7306. #elif defined(_XM_SSE_INTRINSICS_)
  7307. XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
  7308. XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
  7309. XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector);
  7310. XMVECTOR vResult = XMVector3Dot(PointVector,LineVector);
  7311. vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
  7312. vResult = _mm_mul_ps(vResult,LineVector);
  7313. vResult = _mm_sub_ps(PointVector,vResult);
  7314. vResult = XMVector3Length(vResult);
  7315. return vResult;
  7316. #else // _XM_VMX128_INTRINSICS_
  7317. #endif // _XM_VMX128_INTRINSICS_
  7318. }
  7319. //------------------------------------------------------------------------------
  7320. XMFINLINE VOID XMVector3ComponentsFromNormal
  7321. (
  7322. XMVECTOR* pParallel,
  7323. XMVECTOR* pPerpendicular,
  7324. FXMVECTOR V,
  7325. FXMVECTOR Normal
  7326. )
  7327. {
  7328. #if defined(_XM_NO_INTRINSICS_)
  7329. XMVECTOR Parallel;
  7330. XMVECTOR Scale;
  7331. XMASSERT(pParallel);
  7332. XMASSERT(pPerpendicular);
  7333. Scale = XMVector3Dot(V, Normal);
  7334. Parallel = XMVectorMultiply(Normal, Scale);
  7335. *pParallel = Parallel;
  7336. *pPerpendicular = XMVectorSubtract(V, Parallel);
  7337. #elif defined(_XM_SSE_INTRINSICS_)
  7338. XMASSERT(pParallel);
  7339. XMASSERT(pPerpendicular);
  7340. XMVECTOR Scale = XMVector3Dot(V, Normal);
  7341. XMVECTOR Parallel = _mm_mul_ps(Normal,Scale);
  7342. *pParallel = Parallel;
  7343. *pPerpendicular = _mm_sub_ps(V,Parallel);
  7344. #else // _XM_VMX128_INTRINSICS_
  7345. #endif // _XM_VMX128_INTRINSICS_
  7346. }
  7347. //------------------------------------------------------------------------------
  7348. // Transform a vector using a rotation expressed as a unit quaternion
  7349. XMFINLINE XMVECTOR XMVector3Rotate
  7350. (
  7351. FXMVECTOR V,
  7352. FXMVECTOR RotationQuaternion
  7353. )
  7354. {
  7355. #if defined(_XM_NO_INTRINSICS_)
  7356. XMVECTOR A;
  7357. XMVECTOR Q;
  7358. XMVECTOR Result;
  7359. A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
  7360. Q = XMQuaternionConjugate(RotationQuaternion);
  7361. Result = XMQuaternionMultiply(Q, A);
  7362. Result = XMQuaternionMultiply(Result, RotationQuaternion);
  7363. return Result;
  7364. #elif defined(_XM_SSE_INTRINSICS_)
  7365. XMVECTOR A;
  7366. XMVECTOR Q;
  7367. XMVECTOR Result;
  7368. A = _mm_and_ps(V,g_XMMask3);
  7369. Q = XMQuaternionConjugate(RotationQuaternion);
  7370. Result = XMQuaternionMultiply(Q, A);
  7371. Result = XMQuaternionMultiply(Result, RotationQuaternion);
  7372. return Result;
  7373. #else // _XM_VMX128_INTRINSICS_
  7374. #endif // _XM_VMX128_INTRINSICS_
  7375. }
  7376. //------------------------------------------------------------------------------
  7377. // Transform a vector using the inverse of a rotation expressed as a unit quaternion
  7378. XMFINLINE XMVECTOR XMVector3InverseRotate
  7379. (
  7380. FXMVECTOR V,
  7381. FXMVECTOR RotationQuaternion
  7382. )
  7383. {
  7384. #if defined(_XM_NO_INTRINSICS_)
  7385. XMVECTOR A;
  7386. XMVECTOR Q;
  7387. XMVECTOR Result;
  7388. A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
  7389. Result = XMQuaternionMultiply(RotationQuaternion, A);
  7390. Q = XMQuaternionConjugate(RotationQuaternion);
  7391. Result = XMQuaternionMultiply(Result, Q);
  7392. return Result;
  7393. #elif defined(_XM_SSE_INTRINSICS_)
  7394. XMVECTOR A;
  7395. XMVECTOR Q;
  7396. XMVECTOR Result;
  7397. A = _mm_and_ps(V,g_XMMask3);
  7398. Result = XMQuaternionMultiply(RotationQuaternion, A);
  7399. Q = XMQuaternionConjugate(RotationQuaternion);
  7400. Result = XMQuaternionMultiply(Result, Q);
  7401. return Result;
  7402. #else // _XM_VMX128_INTRINSICS_
  7403. #endif // _XM_VMX128_INTRINSICS_
  7404. }
  7405. //------------------------------------------------------------------------------
  7406. XMFINLINE XMVECTOR XMVector3Transform
  7407. (
  7408. FXMVECTOR V,
  7409. CXMMATRIX M
  7410. )
  7411. {
  7412. #if defined(_XM_NO_INTRINSICS_)
  7413. XMVECTOR X;
  7414. XMVECTOR Y;
  7415. XMVECTOR Z;
  7416. XMVECTOR Result;
  7417. Z = XMVectorSplatZ(V);
  7418. Y = XMVectorSplatY(V);
  7419. X = XMVectorSplatX(V);
  7420. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7421. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7422. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7423. return Result;
  7424. #elif defined(_XM_SSE_INTRINSICS_)
  7425. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  7426. vResult = _mm_mul_ps(vResult,M.r[0]);
  7427. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  7428. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  7429. vResult = _mm_add_ps(vResult,vTemp);
  7430. vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  7431. vTemp = _mm_mul_ps(vTemp,M.r[2]);
  7432. vResult = _mm_add_ps(vResult,vTemp);
  7433. vResult = _mm_add_ps(vResult,M.r[3]);
  7434. return vResult;
  7435. #else // _XM_VMX128_INTRINSICS_
  7436. #endif // _XM_VMX128_INTRINSICS_
  7437. }
  7438. //------------------------------------------------------------------------------
  7439. XMINLINE XMFLOAT4* XMVector3TransformStream
  7440. (
  7441. XMFLOAT4* pOutputStream,
  7442. UINT OutputStride,
  7443. CONST XMFLOAT3* pInputStream,
  7444. UINT InputStride,
  7445. UINT VectorCount,
  7446. CXMMATRIX M
  7447. )
  7448. {
  7449. #if defined(_XM_NO_INTRINSICS_)
  7450. XMVECTOR V;
  7451. XMVECTOR X;
  7452. XMVECTOR Y;
  7453. XMVECTOR Z;
  7454. XMVECTOR Result;
  7455. UINT i;
  7456. BYTE* pInputVector = (BYTE*)pInputStream;
  7457. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7458. XMASSERT(pOutputStream);
  7459. XMASSERT(pInputStream);
  7460. for (i = 0; i < VectorCount; i++)
  7461. {
  7462. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7463. Z = XMVectorSplatZ(V);
  7464. Y = XMVectorSplatY(V);
  7465. X = XMVectorSplatX(V);
  7466. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7467. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7468. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7469. XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
  7470. pInputVector += InputStride;
  7471. pOutputVector += OutputStride;
  7472. }
  7473. return pOutputStream;
  7474. #elif defined(_XM_SSE_INTRINSICS_)
  7475. XMASSERT(pOutputStream);
  7476. XMASSERT(pInputStream);
  7477. UINT i;
  7478. const BYTE* pInputVector = (const BYTE*)pInputStream;
  7479. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7480. for (i = 0; i < VectorCount; i++)
  7481. {
  7482. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
  7483. XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
  7484. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
  7485. vResult = _mm_mul_ps(vResult,M.r[2]);
  7486. vResult = _mm_add_ps(vResult,M.r[3]);
  7487. Y = _mm_mul_ps(Y,M.r[1]);
  7488. vResult = _mm_add_ps(vResult,Y);
  7489. X = _mm_mul_ps(X,M.r[0]);
  7490. vResult = _mm_add_ps(vResult,X);
  7491. _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult);
  7492. pInputVector += InputStride;
  7493. pOutputVector += OutputStride;
  7494. }
  7495. return pOutputStream;
  7496. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7497. #endif // _XM_VMX128_INTRINSICS_
  7498. }
  7499. //------------------------------------------------------------------------------
  7500. XMINLINE XMFLOAT4* XMVector3TransformStreamNC
  7501. (
  7502. XMFLOAT4* pOutputStream,
  7503. UINT OutputStride,
  7504. CONST XMFLOAT3* pInputStream,
  7505. UINT InputStride,
  7506. UINT VectorCount,
  7507. CXMMATRIX M
  7508. )
  7509. {
  7510. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
  7511. return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
  7512. #else // _XM_VMX128_INTRINSICS_
  7513. #endif // _XM_VMX128_INTRINSICS_
  7514. }
  7515. //------------------------------------------------------------------------------
  7516. XMFINLINE XMVECTOR XMVector3TransformCoord
  7517. (
  7518. FXMVECTOR V,
  7519. CXMMATRIX M
  7520. )
  7521. {
  7522. #if defined(_XM_NO_INTRINSICS_)
  7523. XMVECTOR X;
  7524. XMVECTOR Y;
  7525. XMVECTOR Z;
  7526. XMVECTOR InverseW;
  7527. XMVECTOR Result;
  7528. Z = XMVectorSplatZ(V);
  7529. Y = XMVectorSplatY(V);
  7530. X = XMVectorSplatX(V);
  7531. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7532. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7533. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7534. InverseW = XMVectorSplatW(Result);
  7535. InverseW = XMVectorReciprocal(InverseW);
  7536. Result = XMVectorMultiply(Result, InverseW);
  7537. return Result;
  7538. #elif defined(_XM_SSE_INTRINSICS_)
  7539. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  7540. vResult = _mm_mul_ps(vResult,M.r[0]);
  7541. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  7542. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  7543. vResult = _mm_add_ps(vResult,vTemp);
  7544. vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  7545. vTemp = _mm_mul_ps(vTemp,M.r[2]);
  7546. vResult = _mm_add_ps(vResult,vTemp);
  7547. vResult = _mm_add_ps(vResult,M.r[3]);
  7548. vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  7549. vResult = _mm_div_ps(vResult,vTemp);
  7550. return vResult;
  7551. #else // _XM_VMX128_INTRINSICS_
  7552. #endif // _XM_VMX128_INTRINSICS_
  7553. }
  7554. //------------------------------------------------------------------------------
  7555. XMINLINE XMFLOAT3* XMVector3TransformCoordStream
  7556. (
  7557. XMFLOAT3* pOutputStream,
  7558. UINT OutputStride,
  7559. CONST XMFLOAT3* pInputStream,
  7560. UINT InputStride,
  7561. UINT VectorCount,
  7562. CXMMATRIX M
  7563. )
  7564. {
  7565. #if defined(_XM_NO_INTRINSICS_)
  7566. XMVECTOR V;
  7567. XMVECTOR X;
  7568. XMVECTOR Y;
  7569. XMVECTOR Z;
  7570. XMVECTOR InverseW;
  7571. XMVECTOR Result;
  7572. UINT i;
  7573. BYTE* pInputVector = (BYTE*)pInputStream;
  7574. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7575. XMASSERT(pOutputStream);
  7576. XMASSERT(pInputStream);
  7577. for (i = 0; i < VectorCount; i++)
  7578. {
  7579. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7580. Z = XMVectorSplatZ(V);
  7581. Y = XMVectorSplatY(V);
  7582. X = XMVectorSplatX(V);
  7583. // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
  7584. // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
  7585. // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
  7586. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7587. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7588. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7589. InverseW = XMVectorSplatW(Result);
  7590. InverseW = XMVectorReciprocal(InverseW);
  7591. Result = XMVectorMultiply(Result, InverseW);
  7592. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7593. pInputVector += InputStride;
  7594. pOutputVector += OutputStride;
  7595. }
  7596. return pOutputStream;
  7597. #elif defined(_XM_SSE_INTRINSICS_)
  7598. XMASSERT(pOutputStream);
  7599. XMASSERT(pInputStream);
  7600. UINT i;
  7601. const BYTE *pInputVector = (BYTE*)pInputStream;
  7602. BYTE *pOutputVector = (BYTE*)pOutputStream;
  7603. for (i = 0; i < VectorCount; i++)
  7604. {
  7605. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
  7606. XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
  7607. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
  7608. vResult = _mm_mul_ps(vResult,M.r[2]);
  7609. vResult = _mm_add_ps(vResult,M.r[3]);
  7610. Y = _mm_mul_ps(Y,M.r[1]);
  7611. vResult = _mm_add_ps(vResult,Y);
  7612. X = _mm_mul_ps(X,M.r[0]);
  7613. vResult = _mm_add_ps(vResult,X);
  7614. X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  7615. vResult = _mm_div_ps(vResult,X);
  7616. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
  7617. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7618. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
  7619. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7620. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
  7621. pInputVector += InputStride;
  7622. pOutputVector += OutputStride;
  7623. }
  7624. return pOutputStream;
  7625. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7626. #endif // _XM_VMX128_INTRINSICS_
  7627. }
  7628. //------------------------------------------------------------------------------
  7629. XMFINLINE XMVECTOR XMVector3TransformNormal
  7630. (
  7631. FXMVECTOR V,
  7632. CXMMATRIX M
  7633. )
  7634. {
  7635. #if defined(_XM_NO_INTRINSICS_)
  7636. XMVECTOR X;
  7637. XMVECTOR Y;
  7638. XMVECTOR Z;
  7639. XMVECTOR Result;
  7640. Z = XMVectorSplatZ(V);
  7641. Y = XMVectorSplatY(V);
  7642. X = XMVectorSplatX(V);
  7643. Result = XMVectorMultiply(Z, M.r[2]);
  7644. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7645. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7646. return Result;
  7647. #elif defined(_XM_SSE_INTRINSICS_)
  7648. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  7649. vResult = _mm_mul_ps(vResult,M.r[0]);
  7650. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  7651. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  7652. vResult = _mm_add_ps(vResult,vTemp);
  7653. vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  7654. vTemp = _mm_mul_ps(vTemp,M.r[2]);
  7655. vResult = _mm_add_ps(vResult,vTemp);
  7656. return vResult;
  7657. #else // _XM_VMX128_INTRINSICS_
  7658. #endif // _XM_VMX128_INTRINSICS_
  7659. }
  7660. //------------------------------------------------------------------------------
  7661. XMINLINE XMFLOAT3* XMVector3TransformNormalStream
  7662. (
  7663. XMFLOAT3* pOutputStream,
  7664. UINT OutputStride,
  7665. CONST XMFLOAT3* pInputStream,
  7666. UINT InputStride,
  7667. UINT VectorCount,
  7668. CXMMATRIX M
  7669. )
  7670. {
  7671. #if defined(_XM_NO_INTRINSICS_)
  7672. XMVECTOR V;
  7673. XMVECTOR X;
  7674. XMVECTOR Y;
  7675. XMVECTOR Z;
  7676. XMVECTOR Result;
  7677. UINT i;
  7678. BYTE* pInputVector = (BYTE*)pInputStream;
  7679. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7680. XMASSERT(pOutputStream);
  7681. XMASSERT(pInputStream);
  7682. for (i = 0; i < VectorCount; i++)
  7683. {
  7684. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7685. Z = XMVectorSplatZ(V);
  7686. Y = XMVectorSplatY(V);
  7687. X = XMVectorSplatX(V);
  7688. // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
  7689. // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
  7690. // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
  7691. Result = XMVectorMultiply(Z, M.r[2]);
  7692. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7693. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7694. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7695. pInputVector += InputStride;
  7696. pOutputVector += OutputStride;
  7697. }
  7698. return pOutputStream;
  7699. #elif defined(_XM_SSE_INTRINSICS_)
  7700. XMASSERT(pOutputStream);
  7701. XMASSERT(pInputStream);
  7702. UINT i;
  7703. const BYTE *pInputVector = (BYTE*)pInputStream;
  7704. BYTE *pOutputVector = (BYTE*)pOutputStream;
  7705. for (i = 0; i < VectorCount; i++)
  7706. {
  7707. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
  7708. XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
  7709. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
  7710. vResult = _mm_mul_ps(vResult,M.r[2]);
  7711. Y = _mm_mul_ps(Y,M.r[1]);
  7712. vResult = _mm_add_ps(vResult,Y);
  7713. X = _mm_mul_ps(X,M.r[0]);
  7714. vResult = _mm_add_ps(vResult,X);
  7715. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
  7716. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7717. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
  7718. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7719. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
  7720. pInputVector += InputStride;
  7721. pOutputVector += OutputStride;
  7722. }
  7723. return pOutputStream;
  7724. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7725. #endif // _XM_VMX128_INTRINSICS_
  7726. }
  7727. //------------------------------------------------------------------------------
  7728. XMINLINE XMVECTOR XMVector3Project
  7729. (
  7730. FXMVECTOR V,
  7731. FLOAT ViewportX,
  7732. FLOAT ViewportY,
  7733. FLOAT ViewportWidth,
  7734. FLOAT ViewportHeight,
  7735. FLOAT ViewportMinZ,
  7736. FLOAT ViewportMaxZ,
  7737. CXMMATRIX Projection,
  7738. CXMMATRIX View,
  7739. CXMMATRIX World
  7740. )
  7741. {
  7742. #if defined(_XM_NO_INTRINSICS_)
  7743. XMMATRIX Transform;
  7744. XMVECTOR Scale;
  7745. XMVECTOR Offset;
  7746. XMVECTOR Result;
  7747. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7748. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7749. Scale = XMVectorSet(HalfViewportWidth,
  7750. -HalfViewportHeight,
  7751. ViewportMaxZ - ViewportMinZ,
  7752. 0.0f);
  7753. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7754. ViewportY + HalfViewportHeight,
  7755. ViewportMinZ,
  7756. 0.0f);
  7757. Transform = XMMatrixMultiply(World, View);
  7758. Transform = XMMatrixMultiply(Transform, Projection);
  7759. Result = XMVector3TransformCoord(V, Transform);
  7760. Result = XMVectorMultiplyAdd(Result, Scale, Offset);
  7761. return Result;
  7762. #elif defined(_XM_SSE_INTRINSICS_)
  7763. XMMATRIX Transform;
  7764. XMVECTOR Scale;
  7765. XMVECTOR Offset;
  7766. XMVECTOR Result;
  7767. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7768. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7769. Scale = XMVectorSet(HalfViewportWidth,
  7770. -HalfViewportHeight,
  7771. ViewportMaxZ - ViewportMinZ,
  7772. 0.0f);
  7773. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7774. ViewportY + HalfViewportHeight,
  7775. ViewportMinZ,
  7776. 0.0f);
  7777. Transform = XMMatrixMultiply(World, View);
  7778. Transform = XMMatrixMultiply(Transform, Projection);
  7779. Result = XMVector3TransformCoord(V, Transform);
  7780. Result = _mm_mul_ps(Result,Scale);
  7781. Result = _mm_add_ps(Result,Offset);
  7782. return Result;
  7783. #else // _XM_VMX128_INTRINSICS_
  7784. #endif // _XM_VMX128_INTRINSICS_
  7785. }
  7786. //------------------------------------------------------------------------------
  7787. XMINLINE XMFLOAT3* XMVector3ProjectStream
  7788. (
  7789. XMFLOAT3* pOutputStream,
  7790. UINT OutputStride,
  7791. CONST XMFLOAT3* pInputStream,
  7792. UINT InputStride,
  7793. UINT VectorCount,
  7794. FLOAT ViewportX,
  7795. FLOAT ViewportY,
  7796. FLOAT ViewportWidth,
  7797. FLOAT ViewportHeight,
  7798. FLOAT ViewportMinZ,
  7799. FLOAT ViewportMaxZ,
  7800. CXMMATRIX Projection,
  7801. CXMMATRIX View,
  7802. CXMMATRIX World
  7803. )
  7804. {
  7805. #if defined(_XM_NO_INTRINSICS_)
  7806. XMMATRIX Transform;
  7807. XMVECTOR V;
  7808. XMVECTOR Scale;
  7809. XMVECTOR Offset;
  7810. XMVECTOR Result;
  7811. UINT i;
  7812. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7813. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7814. BYTE* pInputVector = (BYTE*)pInputStream;
  7815. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7816. XMASSERT(pOutputStream);
  7817. XMASSERT(pInputStream);
  7818. Scale = XMVectorSet(HalfViewportWidth,
  7819. -HalfViewportHeight,
  7820. ViewportMaxZ - ViewportMinZ,
  7821. 1.0f);
  7822. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7823. ViewportY + HalfViewportHeight,
  7824. ViewportMinZ,
  7825. 0.0f);
  7826. Transform = XMMatrixMultiply(World, View);
  7827. Transform = XMMatrixMultiply(Transform, Projection);
  7828. for (i = 0; i < VectorCount; i++)
  7829. {
  7830. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7831. Result = XMVector3TransformCoord(V, Transform);
  7832. Result = XMVectorMultiplyAdd(Result, Scale, Offset);
  7833. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7834. pInputVector += InputStride;
  7835. pOutputVector += OutputStride;
  7836. }
  7837. return pOutputStream;
  7838. #elif defined(_XM_SSE_INTRINSICS_)
  7839. XMASSERT(pOutputStream);
  7840. XMASSERT(pInputStream);
  7841. XMMATRIX Transform;
  7842. XMVECTOR V;
  7843. XMVECTOR Scale;
  7844. XMVECTOR Offset;
  7845. XMVECTOR Result;
  7846. UINT i;
  7847. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7848. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7849. BYTE* pInputVector = (BYTE*)pInputStream;
  7850. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7851. Scale = XMVectorSet(HalfViewportWidth,
  7852. -HalfViewportHeight,
  7853. ViewportMaxZ - ViewportMinZ,
  7854. 1.0f);
  7855. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7856. ViewportY + HalfViewportHeight,
  7857. ViewportMinZ,
  7858. 0.0f);
  7859. Transform = XMMatrixMultiply(World, View);
  7860. Transform = XMMatrixMultiply(Transform, Projection);
  7861. for (i = 0; i < VectorCount; i++)
  7862. {
  7863. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7864. Result = XMVector3TransformCoord(V, Transform);
  7865. Result = _mm_mul_ps(Result,Scale);
  7866. Result = _mm_add_ps(Result,Offset);
  7867. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7868. pInputVector += InputStride;
  7869. pOutputVector += OutputStride;
  7870. }
  7871. return pOutputStream;
  7872. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7873. #endif // _XM_VMX128_INTRINSICS_
  7874. }
  7875. //------------------------------------------------------------------------------
  7876. XMFINLINE XMVECTOR XMVector3Unproject
  7877. (
  7878. FXMVECTOR V,
  7879. FLOAT ViewportX,
  7880. FLOAT ViewportY,
  7881. FLOAT ViewportWidth,
  7882. FLOAT ViewportHeight,
  7883. FLOAT ViewportMinZ,
  7884. FLOAT ViewportMaxZ,
  7885. CXMMATRIX Projection,
  7886. CXMMATRIX View,
  7887. CXMMATRIX World
  7888. )
  7889. {
  7890. #if defined(_XM_NO_INTRINSICS_)
  7891. XMMATRIX Transform;
  7892. XMVECTOR Scale;
  7893. XMVECTOR Offset;
  7894. XMVECTOR Determinant;
  7895. XMVECTOR Result;
  7896. CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
  7897. Scale = XMVectorSet(ViewportWidth * 0.5f,
  7898. -ViewportHeight * 0.5f,
  7899. ViewportMaxZ - ViewportMinZ,
  7900. 1.0f);
  7901. Scale = XMVectorReciprocal(Scale);
  7902. Offset = XMVectorSet(-ViewportX,
  7903. -ViewportY,
  7904. -ViewportMinZ,
  7905. 0.0f);
  7906. Offset = XMVectorMultiplyAdd(Scale, Offset, D);
  7907. Transform = XMMatrixMultiply(World, View);
  7908. Transform = XMMatrixMultiply(Transform, Projection);
  7909. Transform = XMMatrixInverse(&Determinant, Transform);
  7910. Result = XMVectorMultiplyAdd(V, Scale, Offset);
  7911. Result = XMVector3TransformCoord(Result, Transform);
  7912. return Result;
  7913. #elif defined(_XM_SSE_INTRINSICS_)
  7914. XMMATRIX Transform;
  7915. XMVECTOR Scale;
  7916. XMVECTOR Offset;
  7917. XMVECTOR Determinant;
  7918. XMVECTOR Result;
  7919. CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
  7920. Scale = XMVectorSet(ViewportWidth * 0.5f,
  7921. -ViewportHeight * 0.5f,
  7922. ViewportMaxZ - ViewportMinZ,
  7923. 1.0f);
  7924. Scale = XMVectorReciprocal(Scale);
  7925. Offset = XMVectorSet(-ViewportX,
  7926. -ViewportY,
  7927. -ViewportMinZ,
  7928. 0.0f);
  7929. Offset = _mm_mul_ps(Offset,Scale);
  7930. Offset = _mm_add_ps(Offset,D);
  7931. Transform = XMMatrixMultiply(World, View);
  7932. Transform = XMMatrixMultiply(Transform, Projection);
  7933. Transform = XMMatrixInverse(&Determinant, Transform);
  7934. Result = _mm_mul_ps(V,Scale);
  7935. Result = _mm_add_ps(Result,Offset);
  7936. Result = XMVector3TransformCoord(Result, Transform);
  7937. return Result;
  7938. #else // _XM_VMX128_INTRINSICS_
  7939. #endif // _XM_VMX128_INTRINSICS_
  7940. }
  7941. //------------------------------------------------------------------------------
  7942. XMINLINE XMFLOAT3* XMVector3UnprojectStream
  7943. (
  7944. XMFLOAT3* pOutputStream,
  7945. UINT OutputStride,
  7946. CONST XMFLOAT3* pInputStream,
  7947. UINT InputStride,
  7948. UINT VectorCount,
  7949. FLOAT ViewportX,
  7950. FLOAT ViewportY,
  7951. FLOAT ViewportWidth,
  7952. FLOAT ViewportHeight,
  7953. FLOAT ViewportMinZ,
  7954. FLOAT ViewportMaxZ,
  7955. CXMMATRIX Projection,
  7956. CXMMATRIX View,
  7957. CXMMATRIX World)
  7958. {
  7959. #if defined(_XM_NO_INTRINSICS_)
  7960. XMMATRIX Transform;
  7961. XMVECTOR Scale;
  7962. XMVECTOR Offset;
  7963. XMVECTOR V;
  7964. XMVECTOR Determinant;
  7965. XMVECTOR Result;
  7966. UINT i;
  7967. BYTE* pInputVector = (BYTE*)pInputStream;
  7968. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7969. CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
  7970. XMASSERT(pOutputStream);
  7971. XMASSERT(pInputStream);
  7972. Scale = XMVectorSet(ViewportWidth * 0.5f,
  7973. -ViewportHeight * 0.5f,
  7974. ViewportMaxZ - ViewportMinZ,
  7975. 1.0f);
  7976. Scale = XMVectorReciprocal(Scale);
  7977. Offset = XMVectorSet(-ViewportX,
  7978. -ViewportY,
  7979. -ViewportMinZ,
  7980. 0.0f);
  7981. Offset = XMVectorMultiplyAdd(Scale, Offset, D);
  7982. Transform = XMMatrixMultiply(World, View);
  7983. Transform = XMMatrixMultiply(Transform, Projection);
  7984. Transform = XMMatrixInverse(&Determinant, Transform);
  7985. for (i = 0; i < VectorCount; i++)
  7986. {
  7987. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7988. Result = XMVectorMultiplyAdd(V, Scale, Offset);
  7989. Result = XMVector3TransformCoord(Result, Transform);
  7990. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7991. pInputVector += InputStride;
  7992. pOutputVector += OutputStride;
  7993. }
  7994. return pOutputStream;
  7995. #elif defined(_XM_SSE_INTRINSICS_)
  7996. XMASSERT(pOutputStream);
  7997. XMASSERT(pInputStream);
  7998. XMMATRIX Transform;
  7999. XMVECTOR Scale;
  8000. XMVECTOR Offset;
  8001. XMVECTOR V;
  8002. XMVECTOR Determinant;
  8003. XMVECTOR Result;
  8004. UINT i;
  8005. BYTE* pInputVector = (BYTE*)pInputStream;
  8006. BYTE* pOutputVector = (BYTE*)pOutputStream;
  8007. CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
  8008. Scale = XMVectorSet(ViewportWidth * 0.5f,
  8009. -ViewportHeight * 0.5f,
  8010. ViewportMaxZ - ViewportMinZ,
  8011. 1.0f);
  8012. Scale = XMVectorReciprocal(Scale);
  8013. Offset = XMVectorSet(-ViewportX,
  8014. -ViewportY,
  8015. -ViewportMinZ,
  8016. 0.0f);
  8017. Offset = _mm_mul_ps(Offset,Scale);
  8018. Offset = _mm_add_ps(Offset,D);
  8019. Transform = XMMatrixMultiply(World, View);
  8020. Transform = XMMatrixMultiply(Transform, Projection);
  8021. Transform = XMMatrixInverse(&Determinant, Transform);
  8022. for (i = 0; i < VectorCount; i++)
  8023. {
  8024. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  8025. Result = XMVectorMultiplyAdd(V, Scale, Offset);
  8026. Result = XMVector3TransformCoord(Result, Transform);
  8027. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  8028. pInputVector += InputStride;
  8029. pOutputVector += OutputStride;
  8030. }
  8031. return pOutputStream;
  8032. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  8033. #endif // _XM_VMX128_INTRINSICS_
  8034. }
  8035. /****************************************************************************
  8036. *
  8037. * 4D Vector
  8038. *
  8039. ****************************************************************************/
  8040. //------------------------------------------------------------------------------
  8041. // Comparison operations
  8042. //------------------------------------------------------------------------------
  8043. //------------------------------------------------------------------------------
  8044. XMFINLINE BOOL XMVector4Equal
  8045. (
  8046. FXMVECTOR V1,
  8047. FXMVECTOR V2
  8048. )
  8049. {
  8050. #if defined(_XM_NO_INTRINSICS_)
  8051. return (((V1.x == V2.x) && (V1.y == V2.y) && (V1.z == V2.z) && (V1.w == V2.w)) != 0);
  8052. #elif defined(_XM_SSE_INTRINSICS_)
  8053. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  8054. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8055. #else
  8056. return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
  8057. #endif
  8058. }
  8059. //------------------------------------------------------------------------------
  8060. XMFINLINE UINT XMVector4EqualR
  8061. (
  8062. FXMVECTOR V1,
  8063. FXMVECTOR V2
  8064. )
  8065. {
  8066. #if defined(_XM_NO_INTRINSICS_)
  8067. UINT CR = 0;
  8068. if ((V1.x == V2.x) &&
  8069. (V1.y == V2.y) &&
  8070. (V1.z == V2.z) &&
  8071. (V1.w == V2.w))
  8072. {
  8073. CR = XM_CRMASK_CR6TRUE;
  8074. }
  8075. else if ((V1.x != V2.x) &&
  8076. (V1.y != V2.y) &&
  8077. (V1.z != V2.z) &&
  8078. (V1.w != V2.w))
  8079. {
  8080. CR = XM_CRMASK_CR6FALSE;
  8081. }
  8082. return CR;
  8083. #elif defined(_XM_SSE_INTRINSICS_)
  8084. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  8085. int iTest = _mm_movemask_ps(vTemp);
  8086. UINT CR = 0;
  8087. if (iTest==0xf) // All equal?
  8088. {
  8089. CR = XM_CRMASK_CR6TRUE;
  8090. }
  8091. else if (iTest==0) // All not equal?
  8092. {
  8093. CR = XM_CRMASK_CR6FALSE;
  8094. }
  8095. return CR;
  8096. #else // _XM_VMX128_INTRINSICS_
  8097. #endif // _XM_VMX128_INTRINSICS_
  8098. }
  8099. //------------------------------------------------------------------------------
  8100. XMFINLINE BOOL XMVector4EqualInt
  8101. (
  8102. FXMVECTOR V1,
  8103. FXMVECTOR V2
  8104. )
  8105. {
  8106. #if defined(_XM_NO_INTRINSICS_)
  8107. return (((V1.u[0] == V2.u[0]) && (V1.u[1] == V2.u[1]) && (V1.u[2] == V2.u[2]) && (V1.u[3] == V2.u[3])) != 0);
  8108. #elif defined(_XM_SSE_INTRINSICS_)
  8109. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  8110. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0);
  8111. #else
  8112. return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
  8113. #endif
  8114. }
  8115. //------------------------------------------------------------------------------
  8116. XMFINLINE UINT XMVector4EqualIntR
  8117. (
  8118. FXMVECTOR V1,
  8119. FXMVECTOR V2
  8120. )
  8121. {
  8122. #if defined(_XM_NO_INTRINSICS_)
  8123. UINT CR = 0;
  8124. if (V1.u[0] == V2.u[0] &&
  8125. V1.u[1] == V2.u[1] &&
  8126. V1.u[2] == V2.u[2] &&
  8127. V1.u[3] == V2.u[3])
  8128. {
  8129. CR = XM_CRMASK_CR6TRUE;
  8130. }
  8131. else if (V1.u[0] != V2.u[0] &&
  8132. V1.u[1] != V2.u[1] &&
  8133. V1.u[2] != V2.u[2] &&
  8134. V1.u[3] != V2.u[3])
  8135. {
  8136. CR = XM_CRMASK_CR6FALSE;
  8137. }
  8138. return CR;
  8139. #elif defined(_XM_SSE_INTRINSICS_)
  8140. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  8141. int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]);
  8142. UINT CR = 0;
  8143. if (iTest==0xf) // All equal?
  8144. {
  8145. CR = XM_CRMASK_CR6TRUE;
  8146. }
  8147. else if (iTest==0) // All not equal?
  8148. {
  8149. CR = XM_CRMASK_CR6FALSE;
  8150. }
  8151. return CR;
  8152. #else // _XM_VMX128_INTRINSICS_
  8153. #endif // _XM_VMX128_INTRINSICS_
  8154. }
  8155. XMFINLINE BOOL XMVector4NearEqual
  8156. (
  8157. FXMVECTOR V1,
  8158. FXMVECTOR V2,
  8159. FXMVECTOR Epsilon
  8160. )
  8161. {
  8162. #if defined(_XM_NO_INTRINSICS_)
  8163. FLOAT dx, dy, dz, dw;
  8164. dx = fabsf(V1.x-V2.x);
  8165. dy = fabsf(V1.y-V2.y);
  8166. dz = fabsf(V1.z-V2.z);
  8167. dw = fabsf(V1.w-V2.w);
  8168. return (((dx <= Epsilon.x) &&
  8169. (dy <= Epsilon.y) &&
  8170. (dz <= Epsilon.z) &&
  8171. (dw <= Epsilon.w)) != 0);
  8172. #elif defined(_XM_SSE_INTRINSICS_)
  8173. // Get the difference
  8174. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  8175. // Get the absolute value of the difference
  8176. XMVECTOR vTemp = _mm_setzero_ps();
  8177. vTemp = _mm_sub_ps(vTemp,vDelta);
  8178. vTemp = _mm_max_ps(vTemp,vDelta);
  8179. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  8180. return ((_mm_movemask_ps(vTemp)==0xf) != 0);
  8181. #else // _XM_VMX128_INTRINSICS_
  8182. #endif // _XM_VMX128_INTRINSICS_
  8183. }
  8184. //------------------------------------------------------------------------------
  8185. XMFINLINE BOOL XMVector4NotEqual
  8186. (
  8187. FXMVECTOR V1,
  8188. FXMVECTOR V2
  8189. )
  8190. {
  8191. #if defined(_XM_NO_INTRINSICS_)
  8192. return (((V1.x != V2.x) || (V1.y != V2.y) || (V1.z != V2.z) || (V1.w != V2.w)) != 0);
  8193. #elif defined(_XM_SSE_INTRINSICS_)
  8194. XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
  8195. return ((_mm_movemask_ps(vTemp)) != 0);
  8196. #else
  8197. return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
  8198. #endif
  8199. }
  8200. //------------------------------------------------------------------------------
  8201. XMFINLINE BOOL XMVector4NotEqualInt
  8202. (
  8203. FXMVECTOR V1,
  8204. FXMVECTOR V2
  8205. )
  8206. {
  8207. #if defined(_XM_NO_INTRINSICS_)
  8208. return (((V1.u[0] != V2.u[0]) || (V1.u[1] != V2.u[1]) || (V1.u[2] != V2.u[2]) || (V1.u[3] != V2.u[3])) != 0);
  8209. #elif defined(_XM_SSE_INTRINSICS_)
  8210. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  8211. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0);
  8212. #else
  8213. return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
  8214. #endif
  8215. }
  8216. //------------------------------------------------------------------------------
  8217. XMFINLINE BOOL XMVector4Greater
  8218. (
  8219. FXMVECTOR V1,
  8220. FXMVECTOR V2
  8221. )
  8222. {
  8223. #if defined(_XM_NO_INTRINSICS_)
  8224. return (((V1.x > V2.x) && (V1.y > V2.y) && (V1.z > V2.z) && (V1.w > V2.w)) != 0);
  8225. #elif defined(_XM_SSE_INTRINSICS_)
  8226. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  8227. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8228. #else
  8229. return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
  8230. #endif
  8231. }
  8232. //------------------------------------------------------------------------------
  8233. XMFINLINE UINT XMVector4GreaterR
  8234. (
  8235. FXMVECTOR V1,
  8236. FXMVECTOR V2
  8237. )
  8238. {
  8239. #if defined(_XM_NO_INTRINSICS_)
  8240. UINT CR = 0;
  8241. if (V1.x > V2.x &&
  8242. V1.y > V2.y &&
  8243. V1.z > V2.z &&
  8244. V1.w > V2.w)
  8245. {
  8246. CR = XM_CRMASK_CR6TRUE;
  8247. }
  8248. else if (V1.x <= V2.x &&
  8249. V1.y <= V2.y &&
  8250. V1.z <= V2.z &&
  8251. V1.w <= V2.w)
  8252. {
  8253. CR = XM_CRMASK_CR6FALSE;
  8254. }
  8255. return CR;
  8256. #elif defined(_XM_SSE_INTRINSICS_)
  8257. UINT CR = 0;
  8258. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  8259. int iTest = _mm_movemask_ps(vTemp);
  8260. if (iTest==0xf) {
  8261. CR = XM_CRMASK_CR6TRUE;
  8262. }
  8263. else if (!iTest)
  8264. {
  8265. CR = XM_CRMASK_CR6FALSE;
  8266. }
  8267. return CR;
  8268. #else // _XM_VMX128_INTRINSICS_
  8269. #endif // _XM_VMX128_INTRINSICS_
  8270. }
  8271. //------------------------------------------------------------------------------
  8272. XMFINLINE BOOL XMVector4GreaterOrEqual
  8273. (
  8274. FXMVECTOR V1,
  8275. FXMVECTOR V2
  8276. )
  8277. {
  8278. #if defined(_XM_NO_INTRINSICS_)
  8279. return (((V1.x >= V2.x) && (V1.y >= V2.y) && (V1.z >= V2.z) && (V1.w >= V2.w)) != 0);
  8280. #elif defined(_XM_SSE_INTRINSICS_)
  8281. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  8282. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8283. #else
  8284. return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
  8285. #endif
  8286. }
  8287. //------------------------------------------------------------------------------
  8288. XMFINLINE UINT XMVector4GreaterOrEqualR
  8289. (
  8290. FXMVECTOR V1,
  8291. FXMVECTOR V2
  8292. )
  8293. {
  8294. #if defined(_XM_NO_INTRINSICS_)
  8295. UINT CR = 0;
  8296. if ((V1.x >= V2.x) &&
  8297. (V1.y >= V2.y) &&
  8298. (V1.z >= V2.z) &&
  8299. (V1.w >= V2.w))
  8300. {
  8301. CR = XM_CRMASK_CR6TRUE;
  8302. }
  8303. else if ((V1.x < V2.x) &&
  8304. (V1.y < V2.y) &&
  8305. (V1.z < V2.z) &&
  8306. (V1.w < V2.w))
  8307. {
  8308. CR = XM_CRMASK_CR6FALSE;
  8309. }
  8310. return CR;
  8311. #elif defined(_XM_SSE_INTRINSICS_)
  8312. UINT CR = 0;
  8313. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  8314. int iTest = _mm_movemask_ps(vTemp);
  8315. if (iTest==0x0f)
  8316. {
  8317. CR = XM_CRMASK_CR6TRUE;
  8318. }
  8319. else if (!iTest)
  8320. {
  8321. CR = XM_CRMASK_CR6FALSE;
  8322. }
  8323. return CR;
  8324. #else // _XM_VMX128_INTRINSICS_
  8325. #endif // _XM_VMX128_INTRINSICS_
  8326. }
  8327. //------------------------------------------------------------------------------
  8328. XMFINLINE BOOL XMVector4Less
  8329. (
  8330. FXMVECTOR V1,
  8331. FXMVECTOR V2
  8332. )
  8333. {
  8334. #if defined(_XM_NO_INTRINSICS_)
  8335. return (((V1.x < V2.x) && (V1.y < V2.y) && (V1.z < V2.z) && (V1.w < V2.w)) != 0);
  8336. #elif defined(_XM_SSE_INTRINSICS_)
  8337. XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
  8338. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8339. #else
  8340. return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
  8341. #endif
  8342. }
  8343. //------------------------------------------------------------------------------
  8344. XMFINLINE BOOL XMVector4LessOrEqual
  8345. (
  8346. FXMVECTOR V1,
  8347. FXMVECTOR V2
  8348. )
  8349. {
  8350. #if defined(_XM_NO_INTRINSICS_)
  8351. return (((V1.x <= V2.x) && (V1.y <= V2.y) && (V1.z <= V2.z) && (V1.w <= V2.w)) != 0);
  8352. #elif defined(_XM_SSE_INTRINSICS_)
  8353. XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
  8354. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8355. #else
  8356. return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
  8357. #endif
  8358. }
  8359. //------------------------------------------------------------------------------
  8360. XMFINLINE BOOL XMVector4InBounds
  8361. (
  8362. FXMVECTOR V,
  8363. FXMVECTOR Bounds
  8364. )
  8365. {
  8366. #if defined(_XM_NO_INTRINSICS_)
  8367. return (((V.x <= Bounds.x && V.x >= -Bounds.x) &&
  8368. (V.y <= Bounds.y && V.y >= -Bounds.y) &&
  8369. (V.z <= Bounds.z && V.z >= -Bounds.z) &&
  8370. (V.w <= Bounds.w && V.w >= -Bounds.w)) != 0);
  8371. #elif defined(_XM_SSE_INTRINSICS_)
  8372. // Test if less than or equal
  8373. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  8374. // Negate the bounds
  8375. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  8376. // Test if greater or equal (Reversed)
  8377. vTemp2 = _mm_cmple_ps(vTemp2,V);
  8378. // Blend answers
  8379. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  8380. // All in bounds?
  8381. return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
  8382. #else
  8383. return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
  8384. #endif
  8385. }
  8386. //------------------------------------------------------------------------------
  8387. XMFINLINE UINT XMVector4InBoundsR
  8388. (
  8389. FXMVECTOR V,
  8390. FXMVECTOR Bounds
  8391. )
  8392. {
  8393. #if defined(_XM_NO_INTRINSICS_)
  8394. UINT CR = 0;
  8395. if ((V.x <= Bounds.x && V.x >= -Bounds.x) &&
  8396. (V.y <= Bounds.y && V.y >= -Bounds.y) &&
  8397. (V.z <= Bounds.z && V.z >= -Bounds.z) &&
  8398. (V.w <= Bounds.w && V.w >= -Bounds.w))
  8399. {
  8400. CR = XM_CRMASK_CR6BOUNDS;
  8401. }
  8402. return CR;
  8403. #elif defined(_XM_SSE_INTRINSICS_)
  8404. // Test if less than or equal
  8405. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  8406. // Negate the bounds
  8407. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  8408. // Test if greater or equal (Reversed)
  8409. vTemp2 = _mm_cmple_ps(vTemp2,V);
  8410. // Blend answers
  8411. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  8412. // All in bounds?
  8413. return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0;
  8414. #else // _XM_VMX128_INTRINSICS_
  8415. #endif // _XM_VMX128_INTRINSICS_
  8416. }
  8417. //------------------------------------------------------------------------------
  8418. XMFINLINE BOOL XMVector4IsNaN
  8419. (
  8420. FXMVECTOR V
  8421. )
  8422. {
  8423. #if defined(_XM_NO_INTRINSICS_)
  8424. return (XMISNAN(V.x) ||
  8425. XMISNAN(V.y) ||
  8426. XMISNAN(V.z) ||
  8427. XMISNAN(V.w));
  8428. #elif defined(_XM_SSE_INTRINSICS_)
  8429. // Test against itself. NaN is always not equal
  8430. XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
  8431. // If any are NaN, the mask is non-zero
  8432. return (_mm_movemask_ps(vTempNan)!=0);
  8433. #else // _XM_VMX128_INTRINSICS_
  8434. #endif // _XM_VMX128_INTRINSICS_
  8435. }
  8436. //------------------------------------------------------------------------------
  8437. XMFINLINE BOOL XMVector4IsInfinite
  8438. (
  8439. FXMVECTOR V
  8440. )
  8441. {
  8442. #if defined(_XM_NO_INTRINSICS_)
  8443. return (XMISINF(V.x) ||
  8444. XMISINF(V.y) ||
  8445. XMISINF(V.z) ||
  8446. XMISINF(V.w));
  8447. #elif defined(_XM_SSE_INTRINSICS_)
  8448. // Mask off the sign bit
  8449. XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
  8450. // Compare to infinity
  8451. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  8452. // If any are infinity, the signs are true.
  8453. return (_mm_movemask_ps(vTemp) != 0);
  8454. #else // _XM_VMX128_INTRINSICS_
  8455. #endif // _XM_VMX128_INTRINSICS_
  8456. }
  8457. //------------------------------------------------------------------------------
  8458. // Computation operations
  8459. //------------------------------------------------------------------------------
  8460. //------------------------------------------------------------------------------
  8461. XMFINLINE XMVECTOR XMVector4Dot
  8462. (
  8463. FXMVECTOR V1,
  8464. FXMVECTOR V2
  8465. )
  8466. {
  8467. #if defined(_XM_NO_INTRINSICS_)
  8468. XMVECTOR Result;
  8469. Result.v[0] =
  8470. Result.v[1] =
  8471. Result.v[2] =
  8472. Result.v[3] = V1.v[0] * V2.v[0] + V1.v[1] * V2.v[1] + V1.v[2] * V2.v[2] + V1.v[3] * V2.v[3];
  8473. return Result;
  8474. #elif defined(_XM_SSE_INTRINSICS_)
  8475. XMVECTOR vTemp2 = V2;
  8476. XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
  8477. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  8478. vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
  8479. vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  8480. vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
  8481. return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  8482. #else // _XM_VMX128_INTRINSICS_
  8483. #endif // _XM_VMX128_INTRINSICS_
  8484. }
  8485. //------------------------------------------------------------------------------
  8486. XMFINLINE XMVECTOR XMVector4Cross
  8487. (
  8488. FXMVECTOR V1,
  8489. FXMVECTOR V2,
  8490. FXMVECTOR V3
  8491. )
  8492. {
  8493. #if defined(_XM_NO_INTRINSICS_)
  8494. XMVECTOR Result;
  8495. Result.x = (((V2.z*V3.w)-(V2.w*V3.z))*V1.y)-(((V2.y*V3.w)-(V2.w*V3.y))*V1.z)+(((V2.y*V3.z)-(V2.z*V3.y))*V1.w);
  8496. Result.y = (((V2.w*V3.z)-(V2.z*V3.w))*V1.x)-(((V2.w*V3.x)-(V2.x*V3.w))*V1.z)+(((V2.z*V3.x)-(V2.x*V3.z))*V1.w);
  8497. Result.z = (((V2.y*V3.w)-(V2.w*V3.y))*V1.x)-(((V2.x*V3.w)-(V2.w*V3.x))*V1.y)+(((V2.x*V3.y)-(V2.y*V3.x))*V1.w);
  8498. Result.w = (((V2.z*V3.y)-(V2.y*V3.z))*V1.x)-(((V2.z*V3.x)-(V2.x*V3.z))*V1.y)+(((V2.y*V3.x)-(V2.x*V3.y))*V1.z);
  8499. return Result;
  8500. #elif defined(_XM_SSE_INTRINSICS_)
  8501. // V2zwyz * V3wzwy
  8502. XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2));
  8503. XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3));
  8504. vResult = _mm_mul_ps(vResult,vTemp3);
  8505. // - V2wzwy * V3zwyz
  8506. XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3));
  8507. vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1));
  8508. vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
  8509. vResult = _mm_sub_ps(vResult,vTemp2);
  8510. // term1 * V1yxxx
  8511. XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1));
  8512. vResult = _mm_mul_ps(vResult,vTemp1);
  8513. // V2ywxz * V3wxwx
  8514. vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1));
  8515. vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3));
  8516. vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
  8517. // - V2wxwx * V3ywxz
  8518. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1));
  8519. vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1));
  8520. vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
  8521. vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
  8522. // vResult - temp * V1zzyy
  8523. vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2));
  8524. vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
  8525. vResult = _mm_sub_ps(vResult,vTemp1);
  8526. // V2yzxy * V3zxyx
  8527. vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1));
  8528. vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2));
  8529. vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
  8530. // - V2zxyx * V3yzxy
  8531. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1));
  8532. vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1));
  8533. vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
  8534. vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
  8535. // vResult + term * V1wwwz
  8536. vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3));
  8537. vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
  8538. vResult = _mm_add_ps(vResult,vTemp3);
  8539. return vResult;
  8540. #else // _XM_VMX128_INTRINSICS_
  8541. #endif // _XM_VMX128_INTRINSICS_
  8542. }
  8543. //------------------------------------------------------------------------------
  8544. XMFINLINE XMVECTOR XMVector4LengthSq
  8545. (
  8546. FXMVECTOR V
  8547. )
  8548. {
  8549. return XMVector4Dot(V, V);
  8550. }
  8551. //------------------------------------------------------------------------------
  8552. XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst
  8553. (
  8554. FXMVECTOR V
  8555. )
  8556. {
  8557. #if defined(_XM_NO_INTRINSICS_)
  8558. XMVECTOR Result;
  8559. Result = XMVector4LengthSq(V);
  8560. Result = XMVectorReciprocalSqrtEst(Result);
  8561. return Result;
  8562. #elif defined(_XM_SSE_INTRINSICS_)
  8563. // Perform the dot product on x,y,z and w
  8564. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8565. // vTemp has z and w
  8566. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8567. // x+z, y+w
  8568. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8569. // x+z,x+z,x+z,y+w
  8570. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8571. // ??,??,y+w,y+w
  8572. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8573. // ??,??,x+z+y+w,??
  8574. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8575. // Splat the length
  8576. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8577. // Get the reciprocal
  8578. vLengthSq = _mm_rsqrt_ps(vLengthSq);
  8579. return vLengthSq;
  8580. #else // _XM_VMX128_INTRINSICS_
  8581. #endif // _XM_VMX128_INTRINSICS_
  8582. }
  8583. //------------------------------------------------------------------------------
  8584. XMFINLINE XMVECTOR XMVector4ReciprocalLength
  8585. (
  8586. FXMVECTOR V
  8587. )
  8588. {
  8589. #if defined(_XM_NO_INTRINSICS_)
  8590. XMVECTOR Result;
  8591. Result = XMVector4LengthSq(V);
  8592. Result = XMVectorReciprocalSqrt(Result);
  8593. return Result;
  8594. #elif defined(_XM_SSE_INTRINSICS_)
  8595. // Perform the dot product on x,y,z and w
  8596. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8597. // vTemp has z and w
  8598. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8599. // x+z, y+w
  8600. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8601. // x+z,x+z,x+z,y+w
  8602. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8603. // ??,??,y+w,y+w
  8604. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8605. // ??,??,x+z+y+w,??
  8606. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8607. // Splat the length
  8608. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8609. // Get the reciprocal
  8610. vLengthSq = _mm_sqrt_ps(vLengthSq);
  8611. // Accurate!
  8612. vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
  8613. return vLengthSq;
  8614. #else // _XM_VMX128_INTRINSICS_
  8615. #endif // _XM_VMX128_INTRINSICS_
  8616. }
  8617. //------------------------------------------------------------------------------
  8618. XMFINLINE XMVECTOR XMVector4LengthEst
  8619. (
  8620. FXMVECTOR V
  8621. )
  8622. {
  8623. #if defined(_XM_NO_INTRINSICS_)
  8624. XMVECTOR Result;
  8625. Result = XMVector4LengthSq(V);
  8626. Result = XMVectorSqrtEst(Result);
  8627. return Result;
  8628. #elif defined(_XM_SSE_INTRINSICS_)
  8629. // Perform the dot product on x,y,z and w
  8630. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8631. // vTemp has z and w
  8632. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8633. // x+z, y+w
  8634. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8635. // x+z,x+z,x+z,y+w
  8636. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8637. // ??,??,y+w,y+w
  8638. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8639. // ??,??,x+z+y+w,??
  8640. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8641. // Splat the length
  8642. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8643. // Prepare for the division
  8644. vLengthSq = _mm_sqrt_ps(vLengthSq);
  8645. return vLengthSq;
  8646. #else // _XM_VMX128_INTRINSICS_
  8647. #endif // _XM_VMX128_INTRINSICS_
  8648. }
  8649. //------------------------------------------------------------------------------
  8650. XMFINLINE XMVECTOR XMVector4Length
  8651. (
  8652. FXMVECTOR V
  8653. )
  8654. {
  8655. #if defined(_XM_NO_INTRINSICS_)
  8656. XMVECTOR Result;
  8657. Result = XMVector4LengthSq(V);
  8658. Result = XMVectorSqrt(Result);
  8659. return Result;
  8660. #elif defined(_XM_SSE_INTRINSICS_)
  8661. // Perform the dot product on x,y,z and w
  8662. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8663. // vTemp has z and w
  8664. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8665. // x+z, y+w
  8666. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8667. // x+z,x+z,x+z,y+w
  8668. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8669. // ??,??,y+w,y+w
  8670. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8671. // ??,??,x+z+y+w,??
  8672. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8673. // Splat the length
  8674. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8675. // Prepare for the division
  8676. vLengthSq = _mm_sqrt_ps(vLengthSq);
  8677. return vLengthSq;
  8678. #else // _XM_VMX128_INTRINSICS_
  8679. #endif // _XM_VMX128_INTRINSICS_
  8680. }
  8681. //------------------------------------------------------------------------------
  8682. // XMVector4NormalizeEst uses a reciprocal estimate and
  8683. // returns QNaN on zero and infinite vectors.
  8684. XMFINLINE XMVECTOR XMVector4NormalizeEst
  8685. (
  8686. FXMVECTOR V
  8687. )
  8688. {
  8689. #if defined(_XM_NO_INTRINSICS_)
  8690. XMVECTOR Result;
  8691. Result = XMVector4ReciprocalLength(V);
  8692. Result = XMVectorMultiply(V, Result);
  8693. return Result;
  8694. #elif defined(_XM_SSE_INTRINSICS_)
  8695. // Perform the dot product on x,y,z and w
  8696. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8697. // vTemp has z and w
  8698. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8699. // x+z, y+w
  8700. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8701. // x+z,x+z,x+z,y+w
  8702. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8703. // ??,??,y+w,y+w
  8704. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8705. // ??,??,x+z+y+w,??
  8706. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8707. // Splat the length
  8708. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8709. // Prepare for the division
  8710. XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
  8711. // Failsafe on zero (Or epsilon) length planes
  8712. // If the length is infinity, set the elements to zero
  8713. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  8714. // Reciprocal mul to perform the normalization
  8715. vResult = _mm_mul_ps(vResult,V);
  8716. // Any that are infinity, set to zero
  8717. vResult = _mm_and_ps(vResult,vLengthSq);
  8718. return vResult;
  8719. #else // _XM_VMX128_INTRINSICS_
  8720. #endif // _XM_VMX128_INTRINSICS_
  8721. }
  8722. //------------------------------------------------------------------------------
  8723. XMFINLINE XMVECTOR XMVector4Normalize
  8724. (
  8725. FXMVECTOR V
  8726. )
  8727. {
  8728. #if defined(_XM_NO_INTRINSICS_)
  8729. XMVECTOR LengthSq;
  8730. XMVECTOR Zero;
  8731. XMVECTOR InfiniteLength;
  8732. XMVECTOR ZeroLength;
  8733. XMVECTOR Select;
  8734. XMVECTOR Result;
  8735. LengthSq = XMVector4LengthSq(V);
  8736. Zero = XMVectorZero();
  8737. Result = XMVectorReciprocalSqrt(LengthSq);
  8738. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  8739. ZeroLength = XMVectorEqual(LengthSq, Zero);
  8740. Result = XMVectorMultiply(V, Result);
  8741. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  8742. Result = XMVectorSelect(LengthSq, Result, Select);
  8743. return Result;
  8744. #elif defined(_XM_SSE_INTRINSICS_)
  8745. // Perform the dot product on x,y,z and w
  8746. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8747. // vTemp has z and w
  8748. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8749. // x+z, y+w
  8750. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8751. // x+z,x+z,x+z,y+w
  8752. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8753. // ??,??,y+w,y+w
  8754. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8755. // ??,??,x+z+y+w,??
  8756. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8757. // Splat the length
  8758. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8759. // Prepare for the division
  8760. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  8761. // Failsafe on zero (Or epsilon) length planes
  8762. // If the length is infinity, set the elements to zero
  8763. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  8764. // Divide to perform the normalization
  8765. vResult = _mm_div_ps(V,vResult);
  8766. // Any that are infinity, set to zero
  8767. vResult = _mm_and_ps(vResult,vLengthSq);
  8768. return vResult;
  8769. #else // _XM_VMX128_INTRINSICS_
  8770. #endif // _XM_VMX128_INTRINSICS_
  8771. }
  8772. //------------------------------------------------------------------------------
  8773. XMFINLINE XMVECTOR XMVector4ClampLength
  8774. (
  8775. FXMVECTOR V,
  8776. FLOAT LengthMin,
  8777. FLOAT LengthMax
  8778. )
  8779. {
  8780. #if defined(_XM_NO_INTRINSICS_)
  8781. XMVECTOR ClampMax;
  8782. XMVECTOR ClampMin;
  8783. ClampMax = XMVectorReplicate(LengthMax);
  8784. ClampMin = XMVectorReplicate(LengthMin);
  8785. return XMVector4ClampLengthV(V, ClampMin, ClampMax);
  8786. #elif defined(_XM_SSE_INTRINSICS_)
  8787. XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
  8788. XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
  8789. return XMVector4ClampLengthV(V, ClampMin, ClampMax);
  8790. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  8791. #endif // _XM_VMX128_INTRINSICS_
  8792. }
  8793. //------------------------------------------------------------------------------
  8794. XMFINLINE XMVECTOR XMVector4ClampLengthV
  8795. (
  8796. FXMVECTOR V,
  8797. FXMVECTOR LengthMin,
  8798. FXMVECTOR LengthMax
  8799. )
  8800. {
  8801. #if defined(_XM_NO_INTRINSICS_)
  8802. XMVECTOR ClampLength;
  8803. XMVECTOR LengthSq;
  8804. XMVECTOR RcpLength;
  8805. XMVECTOR Length;
  8806. XMVECTOR Normal;
  8807. XMVECTOR Zero;
  8808. XMVECTOR InfiniteLength;
  8809. XMVECTOR ZeroLength;
  8810. XMVECTOR Select;
  8811. XMVECTOR ControlMax;
  8812. XMVECTOR ControlMin;
  8813. XMVECTOR Control;
  8814. XMVECTOR Result;
  8815. XMASSERT((LengthMin.y == LengthMin.x) && (LengthMin.z == LengthMin.x) && (LengthMin.w == LengthMin.x));
  8816. XMASSERT((LengthMax.y == LengthMax.x) && (LengthMax.z == LengthMax.x) && (LengthMax.w == LengthMax.x));
  8817. XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
  8818. XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
  8819. XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
  8820. LengthSq = XMVector4LengthSq(V);
  8821. Zero = XMVectorZero();
  8822. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  8823. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  8824. ZeroLength = XMVectorEqual(LengthSq, Zero);
  8825. Normal = XMVectorMultiply(V, RcpLength);
  8826. Length = XMVectorMultiply(LengthSq, RcpLength);
  8827. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  8828. Length = XMVectorSelect(LengthSq, Length, Select);
  8829. Normal = XMVectorSelect(LengthSq, Normal, Select);
  8830. ControlMax = XMVectorGreater(Length, LengthMax);
  8831. ControlMin = XMVectorLess(Length, LengthMin);
  8832. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  8833. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  8834. Result = XMVectorMultiply(Normal, ClampLength);
  8835. // Preserve the original vector (with no precision loss) if the length falls within the given range
  8836. Control = XMVectorEqualInt(ControlMax, ControlMin);
  8837. Result = XMVectorSelect(Result, V, Control);
  8838. return Result;
  8839. #elif defined(_XM_SSE_INTRINSICS_)
  8840. XMVECTOR ClampLength;
  8841. XMVECTOR LengthSq;
  8842. XMVECTOR RcpLength;
  8843. XMVECTOR Length;
  8844. XMVECTOR Normal;
  8845. XMVECTOR Zero;
  8846. XMVECTOR InfiniteLength;
  8847. XMVECTOR ZeroLength;
  8848. XMVECTOR Select;
  8849. XMVECTOR ControlMax;
  8850. XMVECTOR ControlMin;
  8851. XMVECTOR Control;
  8852. XMVECTOR Result;
  8853. XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
  8854. XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
  8855. XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero));
  8856. XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero));
  8857. XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
  8858. LengthSq = XMVector4LengthSq(V);
  8859. Zero = XMVectorZero();
  8860. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  8861. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
  8862. ZeroLength = XMVectorEqual(LengthSq, Zero);
  8863. Normal = _mm_mul_ps(V, RcpLength);
  8864. Length = _mm_mul_ps(LengthSq, RcpLength);
  8865. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  8866. Length = XMVectorSelect(LengthSq, Length, Select);
  8867. Normal = XMVectorSelect(LengthSq, Normal, Select);
  8868. ControlMax = XMVectorGreater(Length, LengthMax);
  8869. ControlMin = XMVectorLess(Length, LengthMin);
  8870. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  8871. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  8872. Result = _mm_mul_ps(Normal, ClampLength);
  8873. // Preserve the original vector (with no precision loss) if the length falls within the given range
  8874. Control = XMVectorEqualInt(ControlMax,ControlMin);
  8875. Result = XMVectorSelect(Result,V,Control);
  8876. return Result;
  8877. #else // _XM_VMX128_INTRINSICS_
  8878. #endif // _XM_VMX128_INTRINSICS_
  8879. }
  8880. //------------------------------------------------------------------------------
  8881. XMFINLINE XMVECTOR XMVector4Reflect
  8882. (
  8883. FXMVECTOR Incident,
  8884. FXMVECTOR Normal
  8885. )
  8886. {
  8887. #if defined(_XM_NO_INTRINSICS_)
  8888. XMVECTOR Result;
  8889. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  8890. Result = XMVector4Dot(Incident, Normal);
  8891. Result = XMVectorAdd(Result, Result);
  8892. Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
  8893. return Result;
  8894. #elif defined(_XM_SSE_INTRINSICS_)
  8895. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  8896. XMVECTOR Result = XMVector4Dot(Incident,Normal);
  8897. Result = _mm_add_ps(Result,Result);
  8898. Result = _mm_mul_ps(Result,Normal);
  8899. Result = _mm_sub_ps(Incident,Result);
  8900. return Result;
  8901. #else // _XM_VMX128_INTRINSICS_
  8902. #endif // _XM_VMX128_INTRINSICS_
  8903. }
  8904. //------------------------------------------------------------------------------
  8905. XMFINLINE XMVECTOR XMVector4Refract
  8906. (
  8907. FXMVECTOR Incident,
  8908. FXMVECTOR Normal,
  8909. FLOAT RefractionIndex
  8910. )
  8911. {
  8912. #if defined(_XM_NO_INTRINSICS_)
  8913. XMVECTOR Index;
  8914. Index = XMVectorReplicate(RefractionIndex);
  8915. return XMVector4RefractV(Incident, Normal, Index);
  8916. #elif defined(_XM_SSE_INTRINSICS_)
  8917. XMVECTOR Index = _mm_set_ps1(RefractionIndex);
  8918. return XMVector4RefractV(Incident,Normal,Index);
  8919. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  8920. #endif // _XM_VMX128_INTRINSICS_
  8921. }
  8922. //------------------------------------------------------------------------------
  8923. XMFINLINE XMVECTOR XMVector4RefractV
  8924. (
  8925. FXMVECTOR Incident,
  8926. FXMVECTOR Normal,
  8927. FXMVECTOR RefractionIndex
  8928. )
  8929. {
  8930. #if defined(_XM_NO_INTRINSICS_)
  8931. XMVECTOR IDotN;
  8932. XMVECTOR R;
  8933. CONST XMVECTOR Zero = XMVectorZero();
  8934. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  8935. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  8936. IDotN = XMVector4Dot(Incident, Normal);
  8937. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  8938. R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
  8939. R = XMVectorMultiply(R, RefractionIndex);
  8940. R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
  8941. if (XMVector4LessOrEqual(R, Zero))
  8942. {
  8943. // Total internal reflection
  8944. return Zero;
  8945. }
  8946. else
  8947. {
  8948. XMVECTOR Result;
  8949. // R = RefractionIndex * IDotN + sqrt(R)
  8950. R = XMVectorSqrt(R);
  8951. R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
  8952. // Result = RefractionIndex * Incident - Normal * R
  8953. Result = XMVectorMultiply(RefractionIndex, Incident);
  8954. Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
  8955. return Result;
  8956. }
  8957. #elif defined(_XM_SSE_INTRINSICS_)
  8958. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  8959. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  8960. XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
  8961. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  8962. XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
  8963. R = _mm_sub_ps(g_XMOne,R);
  8964. R = _mm_mul_ps(R, RefractionIndex);
  8965. R = _mm_mul_ps(R, RefractionIndex);
  8966. R = _mm_sub_ps(g_XMOne,R);
  8967. XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
  8968. if (_mm_movemask_ps(vResult)==0x0f)
  8969. {
  8970. // Total internal reflection
  8971. vResult = g_XMZero;
  8972. }
  8973. else
  8974. {
  8975. // R = RefractionIndex * IDotN + sqrt(R)
  8976. R = _mm_sqrt_ps(R);
  8977. vResult = _mm_mul_ps(RefractionIndex, IDotN);
  8978. R = _mm_add_ps(R,vResult);
  8979. // Result = RefractionIndex * Incident - Normal * R
  8980. vResult = _mm_mul_ps(RefractionIndex, Incident);
  8981. R = _mm_mul_ps(R,Normal);
  8982. vResult = _mm_sub_ps(vResult,R);
  8983. }
  8984. return vResult;
  8985. #else // _XM_VMX128_INTRINSICS_
  8986. #endif // _XM_VMX128_INTRINSICS_
  8987. }
  8988. //------------------------------------------------------------------------------
  8989. XMFINLINE XMVECTOR XMVector4Orthogonal
  8990. (
  8991. FXMVECTOR V
  8992. )
  8993. {
  8994. #if defined(_XM_NO_INTRINSICS_)
  8995. XMVECTOR Result;
  8996. Result.v[0] = V.v[2];
  8997. Result.v[1] = V.v[3];
  8998. Result.v[2] = -V.v[0];
  8999. Result.v[3] = -V.v[1];
  9000. return Result;
  9001. #elif defined(_XM_SSE_INTRINSICS_)
  9002. static const XMVECTORF32 g_XMFlipZW = {1.0f,1.0f,-1.0f,-1.0f};
  9003. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2));
  9004. vResult = _mm_mul_ps(vResult,g_XMFlipZW);
  9005. return vResult;
  9006. #else // _XM_VMX128_INTRINSICS_
  9007. #endif // _XM_VMX128_INTRINSICS_
  9008. }
  9009. //------------------------------------------------------------------------------
  9010. XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst
  9011. (
  9012. FXMVECTOR N1,
  9013. FXMVECTOR N2
  9014. )
  9015. {
  9016. #if defined(_XM_NO_INTRINSICS_)
  9017. XMVECTOR NegativeOne;
  9018. XMVECTOR One;
  9019. XMVECTOR Result;
  9020. Result = XMVector4Dot(N1, N2);
  9021. NegativeOne = XMVectorSplatConstant(-1, 0);
  9022. One = XMVectorSplatOne();
  9023. Result = XMVectorClamp(Result, NegativeOne, One);
  9024. Result = XMVectorACosEst(Result);
  9025. return Result;
  9026. #elif defined(_XM_SSE_INTRINSICS_)
  9027. XMVECTOR vResult = XMVector4Dot(N1,N2);
  9028. // Clamp to -1.0f to 1.0f
  9029. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  9030. vResult = _mm_min_ps(vResult,g_XMOne);;
  9031. vResult = XMVectorACosEst(vResult);
  9032. return vResult;
  9033. #else // _XM_VMX128_INTRINSICS_
  9034. #endif // _XM_VMX128_INTRINSICS_
  9035. }
  9036. //------------------------------------------------------------------------------
  9037. XMFINLINE XMVECTOR XMVector4AngleBetweenNormals
  9038. (
  9039. FXMVECTOR N1,
  9040. FXMVECTOR N2
  9041. )
  9042. {
  9043. #if defined(_XM_NO_INTRINSICS_)
  9044. XMVECTOR NegativeOne;
  9045. XMVECTOR One;
  9046. XMVECTOR Result;
  9047. Result = XMVector4Dot(N1, N2);
  9048. NegativeOne = XMVectorSplatConstant(-1, 0);
  9049. One = XMVectorSplatOne();
  9050. Result = XMVectorClamp(Result, NegativeOne, One);
  9051. Result = XMVectorACos(Result);
  9052. return Result;
  9053. #elif defined(_XM_SSE_INTRINSICS_)
  9054. XMVECTOR vResult = XMVector4Dot(N1,N2);
  9055. // Clamp to -1.0f to 1.0f
  9056. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  9057. vResult = _mm_min_ps(vResult,g_XMOne);;
  9058. vResult = XMVectorACos(vResult);
  9059. return vResult;
  9060. #else // _XM_VMX128_INTRINSICS_
  9061. #endif // _XM_VMX128_INTRINSICS_
  9062. }
  9063. //------------------------------------------------------------------------------
  9064. XMFINLINE XMVECTOR XMVector4AngleBetweenVectors
  9065. (
  9066. FXMVECTOR V1,
  9067. FXMVECTOR V2
  9068. )
  9069. {
  9070. #if defined(_XM_NO_INTRINSICS_)
  9071. XMVECTOR L1;
  9072. XMVECTOR L2;
  9073. XMVECTOR Dot;
  9074. XMVECTOR CosAngle;
  9075. XMVECTOR NegativeOne;
  9076. XMVECTOR One;
  9077. XMVECTOR Result;
  9078. L1 = XMVector4ReciprocalLength(V1);
  9079. L2 = XMVector4ReciprocalLength(V2);
  9080. Dot = XMVector4Dot(V1, V2);
  9081. L1 = XMVectorMultiply(L1, L2);
  9082. CosAngle = XMVectorMultiply(Dot, L1);
  9083. NegativeOne = XMVectorSplatConstant(-1, 0);
  9084. One = XMVectorSplatOne();
  9085. CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
  9086. Result = XMVectorACos(CosAngle);
  9087. return Result;
  9088. #elif defined(_XM_SSE_INTRINSICS_)
  9089. XMVECTOR L1;
  9090. XMVECTOR L2;
  9091. XMVECTOR Dot;
  9092. XMVECTOR CosAngle;
  9093. XMVECTOR Result;
  9094. L1 = XMVector4ReciprocalLength(V1);
  9095. L2 = XMVector4ReciprocalLength(V2);
  9096. Dot = XMVector4Dot(V1, V2);
  9097. L1 = _mm_mul_ps(L1,L2);
  9098. CosAngle = _mm_mul_ps(Dot,L1);
  9099. CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne);
  9100. Result = XMVectorACos(CosAngle);
  9101. return Result;
  9102. #else // _XM_VMX128_INTRINSICS_
  9103. #endif // _XM_VMX128_INTRINSICS_
  9104. }
  9105. //------------------------------------------------------------------------------
  9106. XMFINLINE XMVECTOR XMVector4Transform
  9107. (
  9108. FXMVECTOR V,
  9109. CXMMATRIX M
  9110. )
  9111. {
  9112. #if defined(_XM_NO_INTRINSICS_)
  9113. FLOAT fX = (M.m[0][0]*V.x)+(M.m[1][0]*V.y)+(M.m[2][0]*V.z)+(M.m[3][0]*V.w);
  9114. FLOAT fY = (M.m[0][1]*V.x)+(M.m[1][1]*V.y)+(M.m[2][1]*V.z)+(M.m[3][1]*V.w);
  9115. FLOAT fZ = (M.m[0][2]*V.x)+(M.m[1][2]*V.y)+(M.m[2][2]*V.z)+(M.m[3][2]*V.w);
  9116. FLOAT fW = (M.m[0][3]*V.x)+(M.m[1][3]*V.y)+(M.m[2][3]*V.z)+(M.m[3][3]*V.w);
  9117. XMVECTOR vResult = {
  9118. fX,
  9119. fY,
  9120. fZ,
  9121. fW
  9122. };
  9123. return vResult;
  9124. #elif defined(_XM_SSE_INTRINSICS_)
  9125. // Splat x,y,z and w
  9126. XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  9127. XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  9128. XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  9129. XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  9130. // Mul by the matrix
  9131. vTempX = _mm_mul_ps(vTempX,M.r[0]);
  9132. vTempY = _mm_mul_ps(vTempY,M.r[1]);
  9133. vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
  9134. vTempW = _mm_mul_ps(vTempW,M.r[3]);
  9135. // Add them all together
  9136. vTempX = _mm_add_ps(vTempX,vTempY);
  9137. vTempZ = _mm_add_ps(vTempZ,vTempW);
  9138. vTempX = _mm_add_ps(vTempX,vTempZ);
  9139. return vTempX;
  9140. #else // _XM_VMX128_INTRINSICS_
  9141. #endif // _XM_VMX128_INTRINSICS_
  9142. }
  9143. //------------------------------------------------------------------------------
  9144. XMINLINE XMFLOAT4* XMVector4TransformStream
  9145. (
  9146. XMFLOAT4* pOutputStream,
  9147. UINT OutputStride,
  9148. CONST XMFLOAT4* pInputStream,
  9149. UINT InputStride,
  9150. UINT VectorCount,
  9151. CXMMATRIX M
  9152. )
  9153. {
  9154. #if defined(_XM_NO_INTRINSICS_)
  9155. XMVECTOR V;
  9156. XMVECTOR X;
  9157. XMVECTOR Y;
  9158. XMVECTOR Z;
  9159. XMVECTOR W;
  9160. XMVECTOR Result;
  9161. UINT i;
  9162. BYTE* pInputVector = (BYTE*)pInputStream;
  9163. BYTE* pOutputVector = (BYTE*)pOutputStream;
  9164. XMASSERT(pOutputStream);
  9165. XMASSERT(pInputStream);
  9166. for (i = 0; i < VectorCount; i++)
  9167. {
  9168. V = XMLoadFloat4((XMFLOAT4*)pInputVector);
  9169. W = XMVectorSplatW(V);
  9170. Z = XMVectorSplatZ(V);
  9171. Y = XMVectorSplatY(V);
  9172. X = XMVectorSplatX(V);
  9173. // W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w);
  9174. // Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z);
  9175. // Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y);
  9176. // X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x);
  9177. Result = XMVectorMultiply(W, M.r[3]);
  9178. Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
  9179. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  9180. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  9181. XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
  9182. pInputVector += InputStride;
  9183. pOutputVector += OutputStride;
  9184. }
  9185. return pOutputStream;
  9186. #elif defined(_XM_SSE_INTRINSICS_)
  9187. UINT i;
  9188. XMASSERT(pOutputStream);
  9189. XMASSERT(pInputStream);
  9190. const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream);
  9191. BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream);
  9192. for (i = 0; i < VectorCount; i++)
  9193. {
  9194. // Fetch the row and splat it
  9195. XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector));
  9196. XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1));
  9197. XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2));
  9198. XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3));
  9199. vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0));
  9200. vTempx = _mm_mul_ps(vTempx,M.r[0]);
  9201. vTempy = _mm_mul_ps(vTempy,M.r[1]);
  9202. vTempz = _mm_mul_ps(vTempz,M.r[2]);
  9203. vTempw = _mm_mul_ps(vTempw,M.r[3]);
  9204. vTempx = _mm_add_ps(vTempx,vTempy);
  9205. vTempw = _mm_add_ps(vTempw,vTempz);
  9206. vTempw = _mm_add_ps(vTempw,vTempx);
  9207. // Store the transformed vector
  9208. _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw);
  9209. pInputVector += InputStride;
  9210. pOutputVector += OutputStride;
  9211. }
  9212. return pOutputStream;
  9213. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  9214. #endif // _XM_VMX128_INTRINSICS_
  9215. }
  9216. #ifdef __cplusplus
  9217. /****************************************************************************
  9218. *
  9219. * XMVECTOR operators
  9220. *
  9221. ****************************************************************************/
  9222. #ifndef XM_NO_OPERATOR_OVERLOADS
  9223. //------------------------------------------------------------------------------
  9224. XMFINLINE XMVECTOR operator+ (FXMVECTOR V)
  9225. {
  9226. return V;
  9227. }
  9228. //------------------------------------------------------------------------------
  9229. XMFINLINE XMVECTOR operator- (FXMVECTOR V)
  9230. {
  9231. return XMVectorNegate(V);
  9232. }
  9233. //------------------------------------------------------------------------------
  9234. XMFINLINE XMVECTOR& operator+=
  9235. (
  9236. XMVECTOR& V1,
  9237. FXMVECTOR V2
  9238. )
  9239. {
  9240. V1 = XMVectorAdd(V1, V2);
  9241. return V1;
  9242. }
  9243. //------------------------------------------------------------------------------
  9244. XMFINLINE XMVECTOR& operator-=
  9245. (
  9246. XMVECTOR& V1,
  9247. FXMVECTOR V2
  9248. )
  9249. {
  9250. V1 = XMVectorSubtract(V1, V2);
  9251. return V1;
  9252. }
  9253. //------------------------------------------------------------------------------
  9254. XMFINLINE XMVECTOR& operator*=
  9255. (
  9256. XMVECTOR& V1,
  9257. FXMVECTOR V2
  9258. )
  9259. {
  9260. V1 = XMVectorMultiply(V1, V2);
  9261. return V1;
  9262. }
  9263. //------------------------------------------------------------------------------
  9264. XMFINLINE XMVECTOR& operator/=
  9265. (
  9266. XMVECTOR& V1,
  9267. FXMVECTOR V2
  9268. )
  9269. {
  9270. XMVECTOR InvV = XMVectorReciprocal(V2);
  9271. V1 = XMVectorMultiply(V1, InvV);
  9272. return V1;
  9273. }
  9274. //------------------------------------------------------------------------------
  9275. XMFINLINE XMVECTOR& operator*=
  9276. (
  9277. XMVECTOR& V,
  9278. CONST FLOAT S
  9279. )
  9280. {
  9281. V = XMVectorScale(V, S);
  9282. return V;
  9283. }
  9284. //------------------------------------------------------------------------------
  9285. XMFINLINE XMVECTOR& operator/=
  9286. (
  9287. XMVECTOR& V,
  9288. CONST FLOAT S
  9289. )
  9290. {
  9291. V = XMVectorScale(V, 1.0f / S);
  9292. return V;
  9293. }
  9294. //------------------------------------------------------------------------------
  9295. XMFINLINE XMVECTOR operator+
  9296. (
  9297. FXMVECTOR V1,
  9298. FXMVECTOR V2
  9299. )
  9300. {
  9301. return XMVectorAdd(V1, V2);
  9302. }
  9303. //------------------------------------------------------------------------------
  9304. XMFINLINE XMVECTOR operator-
  9305. (
  9306. FXMVECTOR V1,
  9307. FXMVECTOR V2
  9308. )
  9309. {
  9310. return XMVectorSubtract(V1, V2);
  9311. }
  9312. //------------------------------------------------------------------------------
  9313. XMFINLINE XMVECTOR operator*
  9314. (
  9315. FXMVECTOR V1,
  9316. FXMVECTOR V2
  9317. )
  9318. {
  9319. return XMVectorMultiply(V1, V2);
  9320. }
  9321. //------------------------------------------------------------------------------
  9322. XMFINLINE XMVECTOR operator/
  9323. (
  9324. FXMVECTOR V1,
  9325. FXMVECTOR V2
  9326. )
  9327. {
  9328. XMVECTOR InvV = XMVectorReciprocal(V2);
  9329. return XMVectorMultiply(V1, InvV);
  9330. }
  9331. //------------------------------------------------------------------------------
  9332. XMFINLINE XMVECTOR operator*
  9333. (
  9334. FXMVECTOR V,
  9335. CONST FLOAT S
  9336. )
  9337. {
  9338. return XMVectorScale(V, S);
  9339. }
  9340. //------------------------------------------------------------------------------
  9341. XMFINLINE XMVECTOR operator/
  9342. (
  9343. FXMVECTOR V,
  9344. CONST FLOAT S
  9345. )
  9346. {
  9347. return XMVectorScale(V, 1.0f / S);
  9348. }
  9349. //------------------------------------------------------------------------------
  9350. XMFINLINE XMVECTOR operator*
  9351. (
  9352. FLOAT S,
  9353. FXMVECTOR V
  9354. )
  9355. {
  9356. return XMVectorScale(V, S);
  9357. }
  9358. #endif // !XM_NO_OPERATOR_OVERLOADS
  9359. /****************************************************************************
  9360. *
  9361. * XMFLOAT2 operators
  9362. *
  9363. ****************************************************************************/
  9364. //------------------------------------------------------------------------------
  9365. XMFINLINE _XMFLOAT2::_XMFLOAT2
  9366. (
  9367. FLOAT _x,
  9368. FLOAT _y
  9369. )
  9370. {
  9371. x = _x;
  9372. y = _y;
  9373. }
  9374. //------------------------------------------------------------------------------
  9375. XMFINLINE _XMFLOAT2::_XMFLOAT2
  9376. (
  9377. CONST FLOAT* pArray
  9378. )
  9379. {
  9380. x = pArray[0];
  9381. y = pArray[1];
  9382. }
  9383. //------------------------------------------------------------------------------
  9384. XMFINLINE _XMFLOAT2& _XMFLOAT2::operator=
  9385. (
  9386. CONST _XMFLOAT2& Float2
  9387. )
  9388. {
  9389. x = Float2.x;
  9390. y = Float2.y;
  9391. return *this;
  9392. }
  9393. /****************************************************************************
  9394. *
  9395. * XMHALF2 operators
  9396. *
  9397. ****************************************************************************/
  9398. //------------------------------------------------------------------------------
  9399. XMFINLINE _XMHALF2::_XMHALF2
  9400. (
  9401. HALF _x,
  9402. HALF _y
  9403. )
  9404. {
  9405. x = _x;
  9406. y = _y;
  9407. }
  9408. //------------------------------------------------------------------------------
  9409. XMFINLINE _XMHALF2::_XMHALF2
  9410. (
  9411. CONST HALF* pArray
  9412. )
  9413. {
  9414. x = pArray[0];
  9415. y = pArray[1];
  9416. }
  9417. //------------------------------------------------------------------------------
  9418. XMFINLINE _XMHALF2::_XMHALF2
  9419. (
  9420. FLOAT _x,
  9421. FLOAT _y
  9422. )
  9423. {
  9424. x = XMConvertFloatToHalf(_x);
  9425. y = XMConvertFloatToHalf(_y);
  9426. }
  9427. //------------------------------------------------------------------------------
  9428. XMFINLINE _XMHALF2::_XMHALF2
  9429. (
  9430. CONST FLOAT* pArray
  9431. )
  9432. {
  9433. x = XMConvertFloatToHalf(pArray[0]);
  9434. y = XMConvertFloatToHalf(pArray[1]);
  9435. }
  9436. //------------------------------------------------------------------------------
  9437. XMFINLINE _XMHALF2& _XMHALF2::operator=
  9438. (
  9439. CONST _XMHALF2& Half2
  9440. )
  9441. {
  9442. x = Half2.x;
  9443. y = Half2.y;
  9444. return *this;
  9445. }
  9446. /****************************************************************************
  9447. *
  9448. * XMSHORTN2 operators
  9449. *
  9450. ****************************************************************************/
  9451. //------------------------------------------------------------------------------
  9452. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9453. (
  9454. SHORT _x,
  9455. SHORT _y
  9456. )
  9457. {
  9458. x = _x;
  9459. y = _y;
  9460. }
  9461. //------------------------------------------------------------------------------
  9462. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9463. (
  9464. CONST SHORT* pArray
  9465. )
  9466. {
  9467. x = pArray[0];
  9468. y = pArray[1];
  9469. }
  9470. //------------------------------------------------------------------------------
  9471. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9472. (
  9473. FLOAT _x,
  9474. FLOAT _y
  9475. )
  9476. {
  9477. XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9478. }
  9479. //------------------------------------------------------------------------------
  9480. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9481. (
  9482. CONST FLOAT* pArray
  9483. )
  9484. {
  9485. XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9486. }
  9487. //------------------------------------------------------------------------------
  9488. XMFINLINE _XMSHORTN2& _XMSHORTN2::operator=
  9489. (
  9490. CONST _XMSHORTN2& ShortN2
  9491. )
  9492. {
  9493. x = ShortN2.x;
  9494. y = ShortN2.y;
  9495. return *this;
  9496. }
  9497. /****************************************************************************
  9498. *
  9499. * XMSHORT2 operators
  9500. *
  9501. ****************************************************************************/
  9502. //------------------------------------------------------------------------------
  9503. XMFINLINE _XMSHORT2::_XMSHORT2
  9504. (
  9505. SHORT _x,
  9506. SHORT _y
  9507. )
  9508. {
  9509. x = _x;
  9510. y = _y;
  9511. }
  9512. //------------------------------------------------------------------------------
  9513. XMFINLINE _XMSHORT2::_XMSHORT2
  9514. (
  9515. CONST SHORT* pArray
  9516. )
  9517. {
  9518. x = pArray[0];
  9519. y = pArray[1];
  9520. }
  9521. //------------------------------------------------------------------------------
  9522. XMFINLINE _XMSHORT2::_XMSHORT2
  9523. (
  9524. FLOAT _x,
  9525. FLOAT _y
  9526. )
  9527. {
  9528. XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9529. }
  9530. //------------------------------------------------------------------------------
  9531. XMFINLINE _XMSHORT2::_XMSHORT2
  9532. (
  9533. CONST FLOAT* pArray
  9534. )
  9535. {
  9536. XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9537. }
  9538. //------------------------------------------------------------------------------
  9539. XMFINLINE _XMSHORT2& _XMSHORT2::operator=
  9540. (
  9541. CONST _XMSHORT2& Short2
  9542. )
  9543. {
  9544. x = Short2.x;
  9545. y = Short2.y;
  9546. return *this;
  9547. }
  9548. /****************************************************************************
  9549. *
  9550. * XMUSHORTN2 operators
  9551. *
  9552. ****************************************************************************/
  9553. //------------------------------------------------------------------------------
  9554. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9555. (
  9556. USHORT _x,
  9557. USHORT _y
  9558. )
  9559. {
  9560. x = _x;
  9561. y = _y;
  9562. }
  9563. //------------------------------------------------------------------------------
  9564. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9565. (
  9566. CONST USHORT* pArray
  9567. )
  9568. {
  9569. x = pArray[0];
  9570. y = pArray[1];
  9571. }
  9572. //------------------------------------------------------------------------------
  9573. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9574. (
  9575. FLOAT _x,
  9576. FLOAT _y
  9577. )
  9578. {
  9579. XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9580. }
  9581. //------------------------------------------------------------------------------
  9582. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9583. (
  9584. CONST FLOAT* pArray
  9585. )
  9586. {
  9587. XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9588. }
  9589. //------------------------------------------------------------------------------
  9590. XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator=
  9591. (
  9592. CONST _XMUSHORTN2& UShortN2
  9593. )
  9594. {
  9595. x = UShortN2.x;
  9596. y = UShortN2.y;
  9597. return *this;
  9598. }
  9599. /****************************************************************************
  9600. *
  9601. * XMUSHORT2 operators
  9602. *
  9603. ****************************************************************************/
  9604. //------------------------------------------------------------------------------
  9605. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9606. (
  9607. USHORT _x,
  9608. USHORT _y
  9609. )
  9610. {
  9611. x = _x;
  9612. y = _y;
  9613. }
  9614. //------------------------------------------------------------------------------
  9615. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9616. (
  9617. CONST USHORT* pArray
  9618. )
  9619. {
  9620. x = pArray[0];
  9621. y = pArray[1];
  9622. }
  9623. //------------------------------------------------------------------------------
  9624. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9625. (
  9626. FLOAT _x,
  9627. FLOAT _y
  9628. )
  9629. {
  9630. XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9631. }
  9632. //------------------------------------------------------------------------------
  9633. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9634. (
  9635. CONST FLOAT* pArray
  9636. )
  9637. {
  9638. XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9639. }
  9640. //------------------------------------------------------------------------------
  9641. XMFINLINE _XMUSHORT2& _XMUSHORT2::operator=
  9642. (
  9643. CONST _XMUSHORT2& UShort2
  9644. )
  9645. {
  9646. x = UShort2.x;
  9647. y = UShort2.y;
  9648. return *this;
  9649. }
  9650. /****************************************************************************
  9651. *
  9652. * XMFLOAT3 operators
  9653. *
  9654. ****************************************************************************/
  9655. //------------------------------------------------------------------------------
  9656. XMFINLINE _XMFLOAT3::_XMFLOAT3
  9657. (
  9658. FLOAT _x,
  9659. FLOAT _y,
  9660. FLOAT _z
  9661. )
  9662. {
  9663. x = _x;
  9664. y = _y;
  9665. z = _z;
  9666. }
  9667. //------------------------------------------------------------------------------
  9668. XMFINLINE _XMFLOAT3::_XMFLOAT3
  9669. (
  9670. CONST FLOAT* pArray
  9671. )
  9672. {
  9673. x = pArray[0];
  9674. y = pArray[1];
  9675. z = pArray[2];
  9676. }
  9677. //------------------------------------------------------------------------------
  9678. XMFINLINE _XMFLOAT3& _XMFLOAT3::operator=
  9679. (
  9680. CONST _XMFLOAT3& Float3
  9681. )
  9682. {
  9683. x = Float3.x;
  9684. y = Float3.y;
  9685. z = Float3.z;
  9686. return *this;
  9687. }
  9688. /****************************************************************************
  9689. *
  9690. * XMHENDN3 operators
  9691. *
  9692. ****************************************************************************/
  9693. //------------------------------------------------------------------------------
  9694. XMFINLINE _XMHENDN3::_XMHENDN3
  9695. (
  9696. UINT Packed
  9697. )
  9698. {
  9699. v = Packed;
  9700. }
  9701. //------------------------------------------------------------------------------
  9702. XMFINLINE _XMHENDN3::_XMHENDN3
  9703. (
  9704. FLOAT _x,
  9705. FLOAT _y,
  9706. FLOAT _z
  9707. )
  9708. {
  9709. XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9710. }
  9711. //------------------------------------------------------------------------------
  9712. XMFINLINE _XMHENDN3::_XMHENDN3
  9713. (
  9714. CONST FLOAT* pArray
  9715. )
  9716. {
  9717. XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9718. }
  9719. //------------------------------------------------------------------------------
  9720. XMFINLINE _XMHENDN3::operator UINT ()
  9721. {
  9722. return v;
  9723. }
  9724. //------------------------------------------------------------------------------
  9725. XMFINLINE _XMHENDN3& _XMHENDN3::operator=
  9726. (
  9727. CONST _XMHENDN3& HenDN3
  9728. )
  9729. {
  9730. v = HenDN3.v;
  9731. return *this;
  9732. }
  9733. //------------------------------------------------------------------------------
  9734. XMFINLINE _XMHENDN3& _XMHENDN3::operator=
  9735. (
  9736. CONST UINT Packed
  9737. )
  9738. {
  9739. v = Packed;
  9740. return *this;
  9741. }
  9742. /****************************************************************************
  9743. *
  9744. * XMHEND3 operators
  9745. *
  9746. ****************************************************************************/
  9747. //------------------------------------------------------------------------------
  9748. XMFINLINE _XMHEND3::_XMHEND3
  9749. (
  9750. UINT Packed
  9751. )
  9752. {
  9753. v = Packed;
  9754. }
  9755. //------------------------------------------------------------------------------
  9756. XMFINLINE _XMHEND3::_XMHEND3
  9757. (
  9758. FLOAT _x,
  9759. FLOAT _y,
  9760. FLOAT _z
  9761. )
  9762. {
  9763. XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9764. }
  9765. //------------------------------------------------------------------------------
  9766. XMFINLINE _XMHEND3::_XMHEND3
  9767. (
  9768. CONST FLOAT* pArray
  9769. )
  9770. {
  9771. XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9772. }
  9773. //------------------------------------------------------------------------------
  9774. XMFINLINE _XMHEND3::operator UINT ()
  9775. {
  9776. return v;
  9777. }
  9778. //------------------------------------------------------------------------------
  9779. XMFINLINE _XMHEND3& _XMHEND3::operator=
  9780. (
  9781. CONST _XMHEND3& HenD3
  9782. )
  9783. {
  9784. v = HenD3.v;
  9785. return *this;
  9786. }
  9787. //------------------------------------------------------------------------------
  9788. XMFINLINE _XMHEND3& _XMHEND3::operator=
  9789. (
  9790. CONST UINT Packed
  9791. )
  9792. {
  9793. v = Packed;
  9794. return *this;
  9795. }
  9796. /****************************************************************************
  9797. *
  9798. * XMUHENDN3 operators
  9799. *
  9800. ****************************************************************************/
  9801. //------------------------------------------------------------------------------
  9802. XMFINLINE _XMUHENDN3::_XMUHENDN3
  9803. (
  9804. UINT Packed
  9805. )
  9806. {
  9807. v = Packed;
  9808. }
  9809. //------------------------------------------------------------------------------
  9810. XMFINLINE _XMUHENDN3::_XMUHENDN3
  9811. (
  9812. FLOAT _x,
  9813. FLOAT _y,
  9814. FLOAT _z
  9815. )
  9816. {
  9817. XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9818. }
  9819. //------------------------------------------------------------------------------
  9820. XMFINLINE _XMUHENDN3::_XMUHENDN3
  9821. (
  9822. CONST FLOAT* pArray
  9823. )
  9824. {
  9825. XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9826. }
  9827. //------------------------------------------------------------------------------
  9828. XMFINLINE _XMUHENDN3::operator UINT ()
  9829. {
  9830. return v;
  9831. }
  9832. //------------------------------------------------------------------------------
  9833. XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
  9834. (
  9835. CONST _XMUHENDN3& UHenDN3
  9836. )
  9837. {
  9838. v = UHenDN3.v;
  9839. return *this;
  9840. }
  9841. //------------------------------------------------------------------------------
  9842. XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
  9843. (
  9844. CONST UINT Packed
  9845. )
  9846. {
  9847. v = Packed;
  9848. return *this;
  9849. }
  9850. /****************************************************************************
  9851. *
  9852. * XMUHEND3 operators
  9853. *
  9854. ****************************************************************************/
  9855. //------------------------------------------------------------------------------
  9856. XMFINLINE _XMUHEND3::_XMUHEND3
  9857. (
  9858. UINT Packed
  9859. )
  9860. {
  9861. v = Packed;
  9862. }
  9863. //------------------------------------------------------------------------------
  9864. XMFINLINE _XMUHEND3::_XMUHEND3
  9865. (
  9866. FLOAT _x,
  9867. FLOAT _y,
  9868. FLOAT _z
  9869. )
  9870. {
  9871. XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9872. }
  9873. //------------------------------------------------------------------------------
  9874. XMFINLINE _XMUHEND3::_XMUHEND3
  9875. (
  9876. CONST FLOAT* pArray
  9877. )
  9878. {
  9879. XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9880. }
  9881. //------------------------------------------------------------------------------
  9882. XMFINLINE _XMUHEND3::operator UINT ()
  9883. {
  9884. return v;
  9885. }
  9886. //------------------------------------------------------------------------------
  9887. XMFINLINE _XMUHEND3& _XMUHEND3::operator=
  9888. (
  9889. CONST _XMUHEND3& UHenD3
  9890. )
  9891. {
  9892. v = UHenD3.v;
  9893. return *this;
  9894. }
  9895. //------------------------------------------------------------------------------
  9896. XMFINLINE _XMUHEND3& _XMUHEND3::operator=
  9897. (
  9898. CONST UINT Packed
  9899. )
  9900. {
  9901. v = Packed;
  9902. return *this;
  9903. }
  9904. /****************************************************************************
  9905. *
  9906. * XMDHENN3 operators
  9907. *
  9908. ****************************************************************************/
  9909. //------------------------------------------------------------------------------
  9910. XMFINLINE _XMDHENN3::_XMDHENN3
  9911. (
  9912. UINT Packed
  9913. )
  9914. {
  9915. v = Packed;
  9916. }
  9917. //------------------------------------------------------------------------------
  9918. XMFINLINE _XMDHENN3::_XMDHENN3
  9919. (
  9920. FLOAT _x,
  9921. FLOAT _y,
  9922. FLOAT _z
  9923. )
  9924. {
  9925. XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9926. }
  9927. //------------------------------------------------------------------------------
  9928. XMFINLINE _XMDHENN3::_XMDHENN3
  9929. (
  9930. CONST FLOAT* pArray
  9931. )
  9932. {
  9933. XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9934. }
  9935. //------------------------------------------------------------------------------
  9936. XMFINLINE _XMDHENN3::operator UINT ()
  9937. {
  9938. return v;
  9939. }
  9940. //------------------------------------------------------------------------------
  9941. XMFINLINE _XMDHENN3& _XMDHENN3::operator=
  9942. (
  9943. CONST _XMDHENN3& DHenN3
  9944. )
  9945. {
  9946. v = DHenN3.v;
  9947. return *this;
  9948. }
  9949. //------------------------------------------------------------------------------
  9950. XMFINLINE _XMDHENN3& _XMDHENN3::operator=
  9951. (
  9952. CONST UINT Packed
  9953. )
  9954. {
  9955. v = Packed;
  9956. return *this;
  9957. }
  9958. /****************************************************************************
  9959. *
  9960. * XMDHEN3 operators
  9961. *
  9962. ****************************************************************************/
  9963. //------------------------------------------------------------------------------
  9964. XMFINLINE _XMDHEN3::_XMDHEN3
  9965. (
  9966. UINT Packed
  9967. )
  9968. {
  9969. v = Packed;
  9970. }
  9971. //------------------------------------------------------------------------------
  9972. XMFINLINE _XMDHEN3::_XMDHEN3
  9973. (
  9974. FLOAT _x,
  9975. FLOAT _y,
  9976. FLOAT _z
  9977. )
  9978. {
  9979. XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9980. }
  9981. //------------------------------------------------------------------------------
  9982. XMFINLINE _XMDHEN3::_XMDHEN3
  9983. (
  9984. CONST FLOAT* pArray
  9985. )
  9986. {
  9987. XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9988. }
  9989. //------------------------------------------------------------------------------
  9990. XMFINLINE _XMDHEN3::operator UINT ()
  9991. {
  9992. return v;
  9993. }
  9994. //------------------------------------------------------------------------------
  9995. XMFINLINE _XMDHEN3& _XMDHEN3::operator=
  9996. (
  9997. CONST _XMDHEN3& DHen3
  9998. )
  9999. {
  10000. v = DHen3.v;
  10001. return *this;
  10002. }
  10003. //------------------------------------------------------------------------------
  10004. XMFINLINE _XMDHEN3& _XMDHEN3::operator=
  10005. (
  10006. CONST UINT Packed
  10007. )
  10008. {
  10009. v = Packed;
  10010. return *this;
  10011. }
  10012. /****************************************************************************
  10013. *
  10014. * XMUDHENN3 operators
  10015. *
  10016. ****************************************************************************/
  10017. //------------------------------------------------------------------------------
  10018. XMFINLINE _XMUDHENN3::_XMUDHENN3
  10019. (
  10020. UINT Packed
  10021. )
  10022. {
  10023. v = Packed;
  10024. }
  10025. //------------------------------------------------------------------------------
  10026. XMFINLINE _XMUDHENN3::_XMUDHENN3
  10027. (
  10028. FLOAT _x,
  10029. FLOAT _y,
  10030. FLOAT _z
  10031. )
  10032. {
  10033. XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  10034. }
  10035. //------------------------------------------------------------------------------
  10036. XMFINLINE _XMUDHENN3::_XMUDHENN3
  10037. (
  10038. CONST FLOAT* pArray
  10039. )
  10040. {
  10041. XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  10042. }
  10043. //------------------------------------------------------------------------------
  10044. XMFINLINE _XMUDHENN3::operator UINT ()
  10045. {
  10046. return v;
  10047. }
  10048. //------------------------------------------------------------------------------
  10049. XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
  10050. (
  10051. CONST _XMUDHENN3& UDHenN3
  10052. )
  10053. {
  10054. v = UDHenN3.v;
  10055. return *this;
  10056. }
  10057. //------------------------------------------------------------------------------
  10058. XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
  10059. (
  10060. CONST UINT Packed
  10061. )
  10062. {
  10063. v = Packed;
  10064. return *this;
  10065. }
  10066. /****************************************************************************
  10067. *
  10068. * XMUDHEN3 operators
  10069. *
  10070. ****************************************************************************/
  10071. //------------------------------------------------------------------------------
  10072. XMFINLINE _XMUDHEN3::_XMUDHEN3
  10073. (
  10074. UINT Packed
  10075. )
  10076. {
  10077. v = Packed;
  10078. }
  10079. //------------------------------------------------------------------------------
  10080. XMFINLINE _XMUDHEN3::_XMUDHEN3
  10081. (
  10082. FLOAT _x,
  10083. FLOAT _y,
  10084. FLOAT _z
  10085. )
  10086. {
  10087. XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
  10088. }
  10089. //------------------------------------------------------------------------------
  10090. XMFINLINE _XMUDHEN3::_XMUDHEN3
  10091. (
  10092. CONST FLOAT* pArray
  10093. )
  10094. {
  10095. XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  10096. }
  10097. //------------------------------------------------------------------------------
  10098. XMFINLINE _XMUDHEN3::operator UINT ()
  10099. {
  10100. return v;
  10101. }
  10102. //------------------------------------------------------------------------------
  10103. XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
  10104. (
  10105. CONST _XMUDHEN3& UDHen3
  10106. )
  10107. {
  10108. v = UDHen3.v;
  10109. return *this;
  10110. }
  10111. //------------------------------------------------------------------------------
  10112. XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
  10113. (
  10114. CONST UINT Packed
  10115. )
  10116. {
  10117. v = Packed;
  10118. return *this;
  10119. }
  10120. /****************************************************************************
  10121. *
  10122. * XMFLOAT4 operators
  10123. *
  10124. ****************************************************************************/
  10125. //------------------------------------------------------------------------------
  10126. XMFINLINE _XMFLOAT4::_XMFLOAT4
  10127. (
  10128. FLOAT _x,
  10129. FLOAT _y,
  10130. FLOAT _z,
  10131. FLOAT _w
  10132. )
  10133. {
  10134. x = _x;
  10135. y = _y;
  10136. z = _z;
  10137. w = _w;
  10138. }
  10139. //------------------------------------------------------------------------------
  10140. XMFINLINE _XMFLOAT4::_XMFLOAT4
  10141. (
  10142. CONST FLOAT* pArray
  10143. )
  10144. {
  10145. x = pArray[0];
  10146. y = pArray[1];
  10147. z = pArray[2];
  10148. w = pArray[3];
  10149. }
  10150. //------------------------------------------------------------------------------
  10151. XMFINLINE _XMFLOAT4& _XMFLOAT4::operator=
  10152. (
  10153. CONST _XMFLOAT4& Float4
  10154. )
  10155. {
  10156. x = Float4.x;
  10157. y = Float4.y;
  10158. z = Float4.z;
  10159. w = Float4.w;
  10160. return *this;
  10161. }
  10162. /****************************************************************************
  10163. *
  10164. * XMHALF4 operators
  10165. *
  10166. ****************************************************************************/
  10167. //------------------------------------------------------------------------------
  10168. XMFINLINE _XMHALF4::_XMHALF4
  10169. (
  10170. HALF _x,
  10171. HALF _y,
  10172. HALF _z,
  10173. HALF _w
  10174. )
  10175. {
  10176. x = _x;
  10177. y = _y;
  10178. z = _z;
  10179. w = _w;
  10180. }
  10181. //------------------------------------------------------------------------------
  10182. XMFINLINE _XMHALF4::_XMHALF4
  10183. (
  10184. CONST HALF* pArray
  10185. )
  10186. {
  10187. x = pArray[0];
  10188. y = pArray[1];
  10189. z = pArray[2];
  10190. w = pArray[3];
  10191. }
  10192. //------------------------------------------------------------------------------
  10193. XMFINLINE _XMHALF4::_XMHALF4
  10194. (
  10195. FLOAT _x,
  10196. FLOAT _y,
  10197. FLOAT _z,
  10198. FLOAT _w
  10199. )
  10200. {
  10201. x = XMConvertFloatToHalf(_x);
  10202. y = XMConvertFloatToHalf(_y);
  10203. z = XMConvertFloatToHalf(_z);
  10204. w = XMConvertFloatToHalf(_w);
  10205. }
  10206. //------------------------------------------------------------------------------
  10207. XMFINLINE _XMHALF4::_XMHALF4
  10208. (
  10209. CONST FLOAT* pArray
  10210. )
  10211. {
  10212. XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4);
  10213. }
  10214. //------------------------------------------------------------------------------
  10215. XMFINLINE _XMHALF4& _XMHALF4::operator=
  10216. (
  10217. CONST _XMHALF4& Half4
  10218. )
  10219. {
  10220. x = Half4.x;
  10221. y = Half4.y;
  10222. z = Half4.z;
  10223. w = Half4.w;
  10224. return *this;
  10225. }
  10226. /****************************************************************************
  10227. *
  10228. * XMSHORTN4 operators
  10229. *
  10230. ****************************************************************************/
  10231. //------------------------------------------------------------------------------
  10232. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10233. (
  10234. SHORT _x,
  10235. SHORT _y,
  10236. SHORT _z,
  10237. SHORT _w
  10238. )
  10239. {
  10240. x = _x;
  10241. y = _y;
  10242. z = _z;
  10243. w = _w;
  10244. }
  10245. //------------------------------------------------------------------------------
  10246. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10247. (
  10248. CONST SHORT* pArray
  10249. )
  10250. {
  10251. x = pArray[0];
  10252. y = pArray[1];
  10253. z = pArray[2];
  10254. w = pArray[3];
  10255. }
  10256. //------------------------------------------------------------------------------
  10257. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10258. (
  10259. FLOAT _x,
  10260. FLOAT _y,
  10261. FLOAT _z,
  10262. FLOAT _w
  10263. )
  10264. {
  10265. XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
  10266. }
  10267. //------------------------------------------------------------------------------
  10268. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10269. (
  10270. CONST FLOAT* pArray
  10271. )
  10272. {
  10273. XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10274. }
  10275. //------------------------------------------------------------------------------
  10276. XMFINLINE _XMSHORTN4& _XMSHORTN4::operator=
  10277. (
  10278. CONST _XMSHORTN4& ShortN4
  10279. )
  10280. {
  10281. x = ShortN4.x;
  10282. y = ShortN4.y;
  10283. z = ShortN4.z;
  10284. w = ShortN4.w;
  10285. return *this;
  10286. }
  10287. /****************************************************************************
  10288. *
  10289. * XMSHORT4 operators
  10290. *
  10291. ****************************************************************************/
  10292. //------------------------------------------------------------------------------
  10293. XMFINLINE _XMSHORT4::_XMSHORT4
  10294. (
  10295. SHORT _x,
  10296. SHORT _y,
  10297. SHORT _z,
  10298. SHORT _w
  10299. )
  10300. {
  10301. x = _x;
  10302. y = _y;
  10303. z = _z;
  10304. w = _w;
  10305. }
  10306. //------------------------------------------------------------------------------
  10307. XMFINLINE _XMSHORT4::_XMSHORT4
  10308. (
  10309. CONST SHORT* pArray
  10310. )
  10311. {
  10312. x = pArray[0];
  10313. y = pArray[1];
  10314. z = pArray[2];
  10315. w = pArray[3];
  10316. }
  10317. //------------------------------------------------------------------------------
  10318. XMFINLINE _XMSHORT4::_XMSHORT4
  10319. (
  10320. FLOAT _x,
  10321. FLOAT _y,
  10322. FLOAT _z,
  10323. FLOAT _w
  10324. )
  10325. {
  10326. XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
  10327. }
  10328. //------------------------------------------------------------------------------
  10329. XMFINLINE _XMSHORT4::_XMSHORT4
  10330. (
  10331. CONST FLOAT* pArray
  10332. )
  10333. {
  10334. XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10335. }
  10336. //------------------------------------------------------------------------------
  10337. XMFINLINE _XMSHORT4& _XMSHORT4::operator=
  10338. (
  10339. CONST _XMSHORT4& Short4
  10340. )
  10341. {
  10342. x = Short4.x;
  10343. y = Short4.y;
  10344. z = Short4.z;
  10345. w = Short4.w;
  10346. return *this;
  10347. }
  10348. /****************************************************************************
  10349. *
  10350. * XMUSHORTN4 operators
  10351. *
  10352. ****************************************************************************/
  10353. //------------------------------------------------------------------------------
  10354. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10355. (
  10356. USHORT _x,
  10357. USHORT _y,
  10358. USHORT _z,
  10359. USHORT _w
  10360. )
  10361. {
  10362. x = _x;
  10363. y = _y;
  10364. z = _z;
  10365. w = _w;
  10366. }
  10367. //------------------------------------------------------------------------------
  10368. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10369. (
  10370. CONST USHORT* pArray
  10371. )
  10372. {
  10373. x = pArray[0];
  10374. y = pArray[1];
  10375. z = pArray[2];
  10376. w = pArray[3];
  10377. }
  10378. //------------------------------------------------------------------------------
  10379. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10380. (
  10381. FLOAT _x,
  10382. FLOAT _y,
  10383. FLOAT _z,
  10384. FLOAT _w
  10385. )
  10386. {
  10387. XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
  10388. }
  10389. //------------------------------------------------------------------------------
  10390. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10391. (
  10392. CONST FLOAT* pArray
  10393. )
  10394. {
  10395. XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10396. }
  10397. //------------------------------------------------------------------------------
  10398. XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator=
  10399. (
  10400. CONST _XMUSHORTN4& UShortN4
  10401. )
  10402. {
  10403. x = UShortN4.x;
  10404. y = UShortN4.y;
  10405. z = UShortN4.z;
  10406. w = UShortN4.w;
  10407. return *this;
  10408. }
  10409. /****************************************************************************
  10410. *
  10411. * XMUSHORT4 operators
  10412. *
  10413. ****************************************************************************/
  10414. //------------------------------------------------------------------------------
  10415. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10416. (
  10417. USHORT _x,
  10418. USHORT _y,
  10419. USHORT _z,
  10420. USHORT _w
  10421. )
  10422. {
  10423. x = _x;
  10424. y = _y;
  10425. z = _z;
  10426. w = _w;
  10427. }
  10428. //------------------------------------------------------------------------------
  10429. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10430. (
  10431. CONST USHORT* pArray
  10432. )
  10433. {
  10434. x = pArray[0];
  10435. y = pArray[1];
  10436. z = pArray[2];
  10437. w = pArray[3];
  10438. }
  10439. //------------------------------------------------------------------------------
  10440. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10441. (
  10442. FLOAT _x,
  10443. FLOAT _y,
  10444. FLOAT _z,
  10445. FLOAT _w
  10446. )
  10447. {
  10448. XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
  10449. }
  10450. //------------------------------------------------------------------------------
  10451. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10452. (
  10453. CONST FLOAT* pArray
  10454. )
  10455. {
  10456. XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10457. }
  10458. //------------------------------------------------------------------------------
  10459. XMFINLINE _XMUSHORT4& _XMUSHORT4::operator=
  10460. (
  10461. CONST _XMUSHORT4& UShort4
  10462. )
  10463. {
  10464. x = UShort4.x;
  10465. y = UShort4.y;
  10466. z = UShort4.z;
  10467. w = UShort4.w;
  10468. return *this;
  10469. }
  10470. /****************************************************************************
  10471. *
  10472. * XMXDECN4 operators
  10473. *
  10474. ****************************************************************************/
  10475. //------------------------------------------------------------------------------
  10476. XMFINLINE _XMXDECN4::_XMXDECN4
  10477. (
  10478. UINT Packed
  10479. )
  10480. {
  10481. v = Packed;
  10482. }
  10483. //------------------------------------------------------------------------------
  10484. XMFINLINE _XMXDECN4::_XMXDECN4
  10485. (
  10486. FLOAT _x,
  10487. FLOAT _y,
  10488. FLOAT _z,
  10489. FLOAT _w
  10490. )
  10491. {
  10492. XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
  10493. }
  10494. //------------------------------------------------------------------------------
  10495. XMFINLINE _XMXDECN4::_XMXDECN4
  10496. (
  10497. CONST FLOAT* pArray
  10498. )
  10499. {
  10500. XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10501. }
  10502. //------------------------------------------------------------------------------
  10503. XMFINLINE _XMXDECN4::operator UINT ()
  10504. {
  10505. return v;
  10506. }
  10507. //------------------------------------------------------------------------------
  10508. XMFINLINE _XMXDECN4& _XMXDECN4::operator=
  10509. (
  10510. CONST _XMXDECN4& XDecN4
  10511. )
  10512. {
  10513. v = XDecN4.v;
  10514. return *this;
  10515. }
  10516. //------------------------------------------------------------------------------
  10517. XMFINLINE _XMXDECN4& _XMXDECN4::operator=
  10518. (
  10519. CONST UINT Packed
  10520. )
  10521. {
  10522. v = Packed;
  10523. return *this;
  10524. }
  10525. /****************************************************************************
  10526. *
  10527. * XMXDEC4 operators
  10528. *
  10529. ****************************************************************************/
  10530. //------------------------------------------------------------------------------
  10531. XMFINLINE _XMXDEC4::_XMXDEC4
  10532. (
  10533. UINT Packed
  10534. )
  10535. {
  10536. v = Packed;
  10537. }
  10538. //------------------------------------------------------------------------------
  10539. XMFINLINE _XMXDEC4::_XMXDEC4
  10540. (
  10541. FLOAT _x,
  10542. FLOAT _y,
  10543. FLOAT _z,
  10544. FLOAT _w
  10545. )
  10546. {
  10547. XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
  10548. }
  10549. //------------------------------------------------------------------------------
  10550. XMFINLINE _XMXDEC4::_XMXDEC4
  10551. (
  10552. CONST FLOAT* pArray
  10553. )
  10554. {
  10555. XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10556. }
  10557. //------------------------------------------------------------------------------
  10558. XMFINLINE _XMXDEC4::operator UINT ()
  10559. {
  10560. return v;
  10561. }
  10562. //------------------------------------------------------------------------------
  10563. XMFINLINE _XMXDEC4& _XMXDEC4::operator=
  10564. (
  10565. CONST _XMXDEC4& XDec4
  10566. )
  10567. {
  10568. v = XDec4.v;
  10569. return *this;
  10570. }
  10571. //------------------------------------------------------------------------------
  10572. XMFINLINE _XMXDEC4& _XMXDEC4::operator=
  10573. (
  10574. CONST UINT Packed
  10575. )
  10576. {
  10577. v = Packed;
  10578. return *this;
  10579. }
  10580. /****************************************************************************
  10581. *
  10582. * XMDECN4 operators
  10583. *
  10584. ****************************************************************************/
  10585. //------------------------------------------------------------------------------
  10586. XMFINLINE _XMDECN4::_XMDECN4
  10587. (
  10588. UINT Packed
  10589. )
  10590. {
  10591. v = Packed;
  10592. }
  10593. //------------------------------------------------------------------------------
  10594. XMFINLINE _XMDECN4::_XMDECN4
  10595. (
  10596. FLOAT _x,
  10597. FLOAT _y,
  10598. FLOAT _z,
  10599. FLOAT _w
  10600. )
  10601. {
  10602. XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
  10603. }
  10604. //------------------------------------------------------------------------------
  10605. XMFINLINE _XMDECN4::_XMDECN4
  10606. (
  10607. CONST FLOAT* pArray
  10608. )
  10609. {
  10610. XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10611. }
  10612. //------------------------------------------------------------------------------
  10613. XMFINLINE _XMDECN4::operator UINT ()
  10614. {
  10615. return v;
  10616. }
  10617. //------------------------------------------------------------------------------
  10618. XMFINLINE _XMDECN4& _XMDECN4::operator=
  10619. (
  10620. CONST _XMDECN4& DecN4
  10621. )
  10622. {
  10623. v = DecN4.v;
  10624. return *this;
  10625. }
  10626. //------------------------------------------------------------------------------
  10627. XMFINLINE _XMDECN4& _XMDECN4::operator=
  10628. (
  10629. CONST UINT Packed
  10630. )
  10631. {
  10632. v = Packed;
  10633. return *this;
  10634. }
  10635. /****************************************************************************
  10636. *
  10637. * XMDEC4 operators
  10638. *
  10639. ****************************************************************************/
  10640. //------------------------------------------------------------------------------
  10641. XMFINLINE _XMDEC4::_XMDEC4
  10642. (
  10643. UINT Packed
  10644. )
  10645. {
  10646. v = Packed;
  10647. }
  10648. //------------------------------------------------------------------------------
  10649. XMFINLINE _XMDEC4::_XMDEC4
  10650. (
  10651. FLOAT _x,
  10652. FLOAT _y,
  10653. FLOAT _z,
  10654. FLOAT _w
  10655. )
  10656. {
  10657. XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
  10658. }
  10659. //------------------------------------------------------------------------------
  10660. XMFINLINE _XMDEC4::_XMDEC4
  10661. (
  10662. CONST FLOAT* pArray
  10663. )
  10664. {
  10665. XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10666. }
  10667. //------------------------------------------------------------------------------
  10668. XMFINLINE _XMDEC4::operator UINT ()
  10669. {
  10670. return v;
  10671. }
  10672. //------------------------------------------------------------------------------
  10673. XMFINLINE _XMDEC4& _XMDEC4::operator=
  10674. (
  10675. CONST _XMDEC4& Dec4
  10676. )
  10677. {
  10678. v = Dec4.v;
  10679. return *this;
  10680. }
  10681. //------------------------------------------------------------------------------
  10682. XMFINLINE _XMDEC4& _XMDEC4::operator=
  10683. (
  10684. CONST UINT Packed
  10685. )
  10686. {
  10687. v = Packed;
  10688. return *this;
  10689. }
  10690. /****************************************************************************
  10691. *
  10692. * XMUDECN4 operators
  10693. *
  10694. ****************************************************************************/
  10695. //------------------------------------------------------------------------------
  10696. XMFINLINE _XMUDECN4::_XMUDECN4
  10697. (
  10698. UINT Packed
  10699. )
  10700. {
  10701. v = Packed;
  10702. }
  10703. //------------------------------------------------------------------------------
  10704. XMFINLINE _XMUDECN4::_XMUDECN4
  10705. (
  10706. FLOAT _x,
  10707. FLOAT _y,
  10708. FLOAT _z,
  10709. FLOAT _w
  10710. )
  10711. {
  10712. XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
  10713. }
  10714. //------------------------------------------------------------------------------
  10715. XMFINLINE _XMUDECN4::_XMUDECN4
  10716. (
  10717. CONST FLOAT* pArray
  10718. )
  10719. {
  10720. XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10721. }
  10722. //------------------------------------------------------------------------------
  10723. XMFINLINE _XMUDECN4::operator UINT ()
  10724. {
  10725. return v;
  10726. }
  10727. //------------------------------------------------------------------------------
  10728. XMFINLINE _XMUDECN4& _XMUDECN4::operator=
  10729. (
  10730. CONST _XMUDECN4& UDecN4
  10731. )
  10732. {
  10733. v = UDecN4.v;
  10734. return *this;
  10735. }
  10736. //------------------------------------------------------------------------------
  10737. XMFINLINE _XMUDECN4& _XMUDECN4::operator=
  10738. (
  10739. CONST UINT Packed
  10740. )
  10741. {
  10742. v = Packed;
  10743. return *this;
  10744. }
  10745. /****************************************************************************
  10746. *
  10747. * XMUDEC4 operators
  10748. *
  10749. ****************************************************************************/
  10750. //------------------------------------------------------------------------------
  10751. XMFINLINE _XMUDEC4::_XMUDEC4
  10752. (
  10753. UINT Packed
  10754. )
  10755. {
  10756. v = Packed;
  10757. }
  10758. //------------------------------------------------------------------------------
  10759. XMFINLINE _XMUDEC4::_XMUDEC4
  10760. (
  10761. FLOAT _x,
  10762. FLOAT _y,
  10763. FLOAT _z,
  10764. FLOAT _w
  10765. )
  10766. {
  10767. XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
  10768. }
  10769. //------------------------------------------------------------------------------
  10770. XMFINLINE _XMUDEC4::_XMUDEC4
  10771. (
  10772. CONST FLOAT* pArray
  10773. )
  10774. {
  10775. XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10776. }
  10777. //------------------------------------------------------------------------------
  10778. XMFINLINE _XMUDEC4::operator UINT ()
  10779. {
  10780. return v;
  10781. }
  10782. //------------------------------------------------------------------------------
  10783. XMFINLINE _XMUDEC4& _XMUDEC4::operator=
  10784. (
  10785. CONST _XMUDEC4& UDec4
  10786. )
  10787. {
  10788. v = UDec4.v;
  10789. return *this;
  10790. }
  10791. //------------------------------------------------------------------------------
  10792. XMFINLINE _XMUDEC4& _XMUDEC4::operator=
  10793. (
  10794. CONST UINT Packed
  10795. )
  10796. {
  10797. v = Packed;
  10798. return *this;
  10799. }
  10800. /****************************************************************************
  10801. *
  10802. * XMXICON4 operators
  10803. *
  10804. ****************************************************************************/
  10805. //------------------------------------------------------------------------------
  10806. XMFINLINE _XMXICON4::_XMXICON4
  10807. (
  10808. UINT64 Packed
  10809. )
  10810. {
  10811. v = Packed;
  10812. }
  10813. //------------------------------------------------------------------------------
  10814. XMFINLINE _XMXICON4::_XMXICON4
  10815. (
  10816. FLOAT _x,
  10817. FLOAT _y,
  10818. FLOAT _z,
  10819. FLOAT _w
  10820. )
  10821. {
  10822. XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w));
  10823. }
  10824. //------------------------------------------------------------------------------
  10825. XMFINLINE _XMXICON4::_XMXICON4
  10826. (
  10827. CONST FLOAT* pArray
  10828. )
  10829. {
  10830. XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10831. }
  10832. //------------------------------------------------------------------------------
  10833. XMFINLINE _XMXICON4::operator UINT64 ()
  10834. {
  10835. return v;
  10836. }
  10837. //------------------------------------------------------------------------------
  10838. XMFINLINE _XMXICON4& _XMXICON4::operator=
  10839. (
  10840. CONST _XMXICON4& XIcoN4
  10841. )
  10842. {
  10843. v = XIcoN4.v;
  10844. return *this;
  10845. }
  10846. //------------------------------------------------------------------------------
  10847. XMFINLINE _XMXICON4& _XMXICON4::operator=
  10848. (
  10849. CONST UINT64 Packed
  10850. )
  10851. {
  10852. v = Packed;
  10853. return *this;
  10854. }
  10855. /****************************************************************************
  10856. *
  10857. * XMXICO4 operators
  10858. *
  10859. ****************************************************************************/
  10860. //------------------------------------------------------------------------------
  10861. XMFINLINE _XMXICO4::_XMXICO4
  10862. (
  10863. UINT64 Packed
  10864. )
  10865. {
  10866. v = Packed;
  10867. }
  10868. //------------------------------------------------------------------------------
  10869. XMFINLINE _XMXICO4::_XMXICO4
  10870. (
  10871. FLOAT _x,
  10872. FLOAT _y,
  10873. FLOAT _z,
  10874. FLOAT _w
  10875. )
  10876. {
  10877. XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w));
  10878. }
  10879. //------------------------------------------------------------------------------
  10880. XMFINLINE _XMXICO4::_XMXICO4
  10881. (
  10882. CONST FLOAT* pArray
  10883. )
  10884. {
  10885. XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10886. }
  10887. //------------------------------------------------------------------------------
  10888. XMFINLINE _XMXICO4::operator UINT64 ()
  10889. {
  10890. return v;
  10891. }
  10892. //------------------------------------------------------------------------------
  10893. XMFINLINE _XMXICO4& _XMXICO4::operator=
  10894. (
  10895. CONST _XMXICO4& XIco4
  10896. )
  10897. {
  10898. v = XIco4.v;
  10899. return *this;
  10900. }
  10901. //------------------------------------------------------------------------------
  10902. XMFINLINE _XMXICO4& _XMXICO4::operator=
  10903. (
  10904. CONST UINT64 Packed
  10905. )
  10906. {
  10907. v = Packed;
  10908. return *this;
  10909. }
  10910. /****************************************************************************
  10911. *
  10912. * XMICON4 operators
  10913. *
  10914. ****************************************************************************/
  10915. //------------------------------------------------------------------------------
  10916. XMFINLINE _XMICON4::_XMICON4
  10917. (
  10918. UINT64 Packed
  10919. )
  10920. {
  10921. v = Packed;
  10922. }
  10923. //------------------------------------------------------------------------------
  10924. XMFINLINE _XMICON4::_XMICON4
  10925. (
  10926. FLOAT _x,
  10927. FLOAT _y,
  10928. FLOAT _z,
  10929. FLOAT _w
  10930. )
  10931. {
  10932. XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w));
  10933. }
  10934. //------------------------------------------------------------------------------
  10935. XMFINLINE _XMICON4::_XMICON4
  10936. (
  10937. CONST FLOAT* pArray
  10938. )
  10939. {
  10940. XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10941. }
  10942. //------------------------------------------------------------------------------
  10943. XMFINLINE _XMICON4::operator UINT64 ()
  10944. {
  10945. return v;
  10946. }
  10947. //------------------------------------------------------------------------------
  10948. XMFINLINE _XMICON4& _XMICON4::operator=
  10949. (
  10950. CONST _XMICON4& IcoN4
  10951. )
  10952. {
  10953. v = IcoN4.v;
  10954. return *this;
  10955. }
  10956. //------------------------------------------------------------------------------
  10957. XMFINLINE _XMICON4& _XMICON4::operator=
  10958. (
  10959. CONST UINT64 Packed
  10960. )
  10961. {
  10962. v = Packed;
  10963. return *this;
  10964. }
  10965. /****************************************************************************
  10966. *
  10967. * XMICO4 operators
  10968. *
  10969. ****************************************************************************/
  10970. //------------------------------------------------------------------------------
  10971. XMFINLINE _XMICO4::_XMICO4
  10972. (
  10973. UINT64 Packed
  10974. )
  10975. {
  10976. v = Packed;
  10977. }
  10978. //------------------------------------------------------------------------------
  10979. XMFINLINE _XMICO4::_XMICO4
  10980. (
  10981. FLOAT _x,
  10982. FLOAT _y,
  10983. FLOAT _z,
  10984. FLOAT _w
  10985. )
  10986. {
  10987. XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w));
  10988. }
  10989. //------------------------------------------------------------------------------
  10990. XMFINLINE _XMICO4::_XMICO4
  10991. (
  10992. CONST FLOAT* pArray
  10993. )
  10994. {
  10995. XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10996. }
  10997. //------------------------------------------------------------------------------
  10998. XMFINLINE _XMICO4::operator UINT64 ()
  10999. {
  11000. return v;
  11001. }
  11002. //------------------------------------------------------------------------------
  11003. XMFINLINE _XMICO4& _XMICO4::operator=
  11004. (
  11005. CONST _XMICO4& Ico4
  11006. )
  11007. {
  11008. v = Ico4.v;
  11009. return *this;
  11010. }
  11011. //------------------------------------------------------------------------------
  11012. XMFINLINE _XMICO4& _XMICO4::operator=
  11013. (
  11014. CONST UINT64 Packed
  11015. )
  11016. {
  11017. v = Packed;
  11018. return *this;
  11019. }
  11020. /****************************************************************************
  11021. *
  11022. * XMUICON4 operators
  11023. *
  11024. ****************************************************************************/
  11025. //------------------------------------------------------------------------------
  11026. XMFINLINE _XMUICON4::_XMUICON4
  11027. (
  11028. UINT64 Packed
  11029. )
  11030. {
  11031. v = Packed;
  11032. }
  11033. //------------------------------------------------------------------------------
  11034. XMFINLINE _XMUICON4::_XMUICON4
  11035. (
  11036. FLOAT _x,
  11037. FLOAT _y,
  11038. FLOAT _z,
  11039. FLOAT _w
  11040. )
  11041. {
  11042. XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w));
  11043. }
  11044. //------------------------------------------------------------------------------
  11045. XMFINLINE _XMUICON4::_XMUICON4
  11046. (
  11047. CONST FLOAT* pArray
  11048. )
  11049. {
  11050. XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11051. }
  11052. //------------------------------------------------------------------------------
  11053. XMFINLINE _XMUICON4::operator UINT64 ()
  11054. {
  11055. return v;
  11056. }
  11057. //------------------------------------------------------------------------------
  11058. XMFINLINE _XMUICON4& _XMUICON4::operator=
  11059. (
  11060. CONST _XMUICON4& UIcoN4
  11061. )
  11062. {
  11063. v = UIcoN4.v;
  11064. return *this;
  11065. }
  11066. //------------------------------------------------------------------------------
  11067. XMFINLINE _XMUICON4& _XMUICON4::operator=
  11068. (
  11069. CONST UINT64 Packed
  11070. )
  11071. {
  11072. v = Packed;
  11073. return *this;
  11074. }
  11075. /****************************************************************************
  11076. *
  11077. * XMUICO4 operators
  11078. *
  11079. ****************************************************************************/
  11080. //------------------------------------------------------------------------------
  11081. XMFINLINE _XMUICO4::_XMUICO4
  11082. (
  11083. UINT64 Packed
  11084. )
  11085. {
  11086. v = Packed;
  11087. }
  11088. //------------------------------------------------------------------------------
  11089. XMFINLINE _XMUICO4::_XMUICO4
  11090. (
  11091. FLOAT _x,
  11092. FLOAT _y,
  11093. FLOAT _z,
  11094. FLOAT _w
  11095. )
  11096. {
  11097. XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w));
  11098. }
  11099. //------------------------------------------------------------------------------
  11100. XMFINLINE _XMUICO4::_XMUICO4
  11101. (
  11102. CONST FLOAT* pArray
  11103. )
  11104. {
  11105. XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11106. }
  11107. //------------------------------------------------------------------------------
  11108. XMFINLINE _XMUICO4::operator UINT64 ()
  11109. {
  11110. return v;
  11111. }
  11112. //------------------------------------------------------------------------------
  11113. XMFINLINE _XMUICO4& _XMUICO4::operator=
  11114. (
  11115. CONST _XMUICO4& UIco4
  11116. )
  11117. {
  11118. v = UIco4.v;
  11119. return *this;
  11120. }
  11121. //------------------------------------------------------------------------------
  11122. XMFINLINE _XMUICO4& _XMUICO4::operator=
  11123. (
  11124. CONST UINT64 Packed
  11125. )
  11126. {
  11127. v = Packed;
  11128. return *this;
  11129. }
  11130. /****************************************************************************
  11131. *
  11132. * XMCOLOR4 operators
  11133. *
  11134. ****************************************************************************/
  11135. //------------------------------------------------------------------------------
  11136. XMFINLINE _XMCOLOR::_XMCOLOR
  11137. (
  11138. UINT Color
  11139. )
  11140. {
  11141. c = Color;
  11142. }
  11143. //------------------------------------------------------------------------------
  11144. XMFINLINE _XMCOLOR::_XMCOLOR
  11145. (
  11146. FLOAT _x,
  11147. FLOAT _y,
  11148. FLOAT _z,
  11149. FLOAT _w
  11150. )
  11151. {
  11152. XMStoreColor(this, XMVectorSet(_x, _y, _z, _w));
  11153. }
  11154. //------------------------------------------------------------------------------
  11155. XMFINLINE _XMCOLOR::_XMCOLOR
  11156. (
  11157. CONST FLOAT* pArray
  11158. )
  11159. {
  11160. XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11161. }
  11162. //------------------------------------------------------------------------------
  11163. XMFINLINE _XMCOLOR::operator UINT ()
  11164. {
  11165. return c;
  11166. }
  11167. //------------------------------------------------------------------------------
  11168. XMFINLINE _XMCOLOR& _XMCOLOR::operator=
  11169. (
  11170. CONST _XMCOLOR& Color
  11171. )
  11172. {
  11173. c = Color.c;
  11174. return *this;
  11175. }
  11176. //------------------------------------------------------------------------------
  11177. XMFINLINE _XMCOLOR& _XMCOLOR::operator=
  11178. (
  11179. CONST UINT Color
  11180. )
  11181. {
  11182. c = Color;
  11183. return *this;
  11184. }
  11185. /****************************************************************************
  11186. *
  11187. * XMBYTEN4 operators
  11188. *
  11189. ****************************************************************************/
  11190. //------------------------------------------------------------------------------
  11191. XMFINLINE _XMBYTEN4::_XMBYTEN4
  11192. (
  11193. CHAR _x,
  11194. CHAR _y,
  11195. CHAR _z,
  11196. CHAR _w
  11197. )
  11198. {
  11199. x = _x;
  11200. y = _y;
  11201. z = _z;
  11202. w = _w;
  11203. }
  11204. //------------------------------------------------------------------------------
  11205. XMFINLINE _XMBYTEN4::_XMBYTEN4
  11206. (
  11207. UINT _v
  11208. )
  11209. {
  11210. v = _v;
  11211. }
  11212. //------------------------------------------------------------------------------
  11213. XMFINLINE _XMBYTEN4::_XMBYTEN4
  11214. (
  11215. CONST CHAR* pArray
  11216. )
  11217. {
  11218. x = pArray[0];
  11219. y = pArray[1];
  11220. z = pArray[2];
  11221. w = pArray[3];
  11222. }
  11223. //------------------------------------------------------------------------------
  11224. XMFINLINE _XMBYTEN4::_XMBYTEN4
  11225. (
  11226. FLOAT _x,
  11227. FLOAT _y,
  11228. FLOAT _z,
  11229. FLOAT _w
  11230. )
  11231. {
  11232. XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
  11233. }
  11234. //------------------------------------------------------------------------------
  11235. XMFINLINE _XMBYTEN4::_XMBYTEN4
  11236. (
  11237. CONST FLOAT* pArray
  11238. )
  11239. {
  11240. XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11241. }
  11242. //------------------------------------------------------------------------------
  11243. XMFINLINE _XMBYTEN4& _XMBYTEN4::operator=
  11244. (
  11245. CONST _XMBYTEN4& ByteN4
  11246. )
  11247. {
  11248. x = ByteN4.x;
  11249. y = ByteN4.y;
  11250. z = ByteN4.z;
  11251. w = ByteN4.w;
  11252. return *this;
  11253. }
  11254. /****************************************************************************
  11255. *
  11256. * XMBYTE4 operators
  11257. *
  11258. ****************************************************************************/
  11259. //------------------------------------------------------------------------------
  11260. XMFINLINE _XMBYTE4::_XMBYTE4
  11261. (
  11262. CHAR _x,
  11263. CHAR _y,
  11264. CHAR _z,
  11265. CHAR _w
  11266. )
  11267. {
  11268. x = _x;
  11269. y = _y;
  11270. z = _z;
  11271. w = _w;
  11272. }
  11273. //------------------------------------------------------------------------------
  11274. XMFINLINE _XMBYTE4::_XMBYTE4
  11275. (
  11276. UINT _v
  11277. )
  11278. {
  11279. v = _v;
  11280. }
  11281. //------------------------------------------------------------------------------
  11282. XMFINLINE _XMBYTE4::_XMBYTE4
  11283. (
  11284. CONST CHAR* pArray
  11285. )
  11286. {
  11287. x = pArray[0];
  11288. y = pArray[1];
  11289. z = pArray[2];
  11290. w = pArray[3];
  11291. }
  11292. //------------------------------------------------------------------------------
  11293. XMFINLINE _XMBYTE4::_XMBYTE4
  11294. (
  11295. FLOAT _x,
  11296. FLOAT _y,
  11297. FLOAT _z,
  11298. FLOAT _w
  11299. )
  11300. {
  11301. XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
  11302. }
  11303. //------------------------------------------------------------------------------
  11304. XMFINLINE _XMBYTE4::_XMBYTE4
  11305. (
  11306. CONST FLOAT* pArray
  11307. )
  11308. {
  11309. XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11310. }
  11311. //------------------------------------------------------------------------------
  11312. XMFINLINE _XMBYTE4& _XMBYTE4::operator=
  11313. (
  11314. CONST _XMBYTE4& Byte4
  11315. )
  11316. {
  11317. x = Byte4.x;
  11318. y = Byte4.y;
  11319. z = Byte4.z;
  11320. w = Byte4.w;
  11321. return *this;
  11322. }
  11323. /****************************************************************************
  11324. *
  11325. * XMUBYTEN4 operators
  11326. *
  11327. ****************************************************************************/
  11328. //------------------------------------------------------------------------------
  11329. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11330. (
  11331. BYTE _x,
  11332. BYTE _y,
  11333. BYTE _z,
  11334. BYTE _w
  11335. )
  11336. {
  11337. x = _x;
  11338. y = _y;
  11339. z = _z;
  11340. w = _w;
  11341. }
  11342. //------------------------------------------------------------------------------
  11343. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11344. (
  11345. UINT _v
  11346. )
  11347. {
  11348. v = _v;
  11349. }
  11350. //------------------------------------------------------------------------------
  11351. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11352. (
  11353. CONST BYTE* pArray
  11354. )
  11355. {
  11356. x = pArray[0];
  11357. y = pArray[1];
  11358. z = pArray[2];
  11359. w = pArray[3];
  11360. }
  11361. //------------------------------------------------------------------------------
  11362. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11363. (
  11364. FLOAT _x,
  11365. FLOAT _y,
  11366. FLOAT _z,
  11367. FLOAT _w
  11368. )
  11369. {
  11370. XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
  11371. }
  11372. //------------------------------------------------------------------------------
  11373. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11374. (
  11375. CONST FLOAT* pArray
  11376. )
  11377. {
  11378. XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11379. }
  11380. //------------------------------------------------------------------------------
  11381. XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator=
  11382. (
  11383. CONST _XMUBYTEN4& UByteN4
  11384. )
  11385. {
  11386. x = UByteN4.x;
  11387. y = UByteN4.y;
  11388. z = UByteN4.z;
  11389. w = UByteN4.w;
  11390. return *this;
  11391. }
  11392. /****************************************************************************
  11393. *
  11394. * XMUBYTE4 operators
  11395. *
  11396. ****************************************************************************/
  11397. //------------------------------------------------------------------------------
  11398. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11399. (
  11400. BYTE _x,
  11401. BYTE _y,
  11402. BYTE _z,
  11403. BYTE _w
  11404. )
  11405. {
  11406. x = _x;
  11407. y = _y;
  11408. z = _z;
  11409. w = _w;
  11410. }
  11411. //------------------------------------------------------------------------------
  11412. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11413. (
  11414. UINT _v
  11415. )
  11416. {
  11417. v = _v;
  11418. }
  11419. //------------------------------------------------------------------------------
  11420. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11421. (
  11422. CONST BYTE* pArray
  11423. )
  11424. {
  11425. x = pArray[0];
  11426. y = pArray[1];
  11427. z = pArray[2];
  11428. w = pArray[3];
  11429. }
  11430. //------------------------------------------------------------------------------
  11431. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11432. (
  11433. FLOAT _x,
  11434. FLOAT _y,
  11435. FLOAT _z,
  11436. FLOAT _w
  11437. )
  11438. {
  11439. XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
  11440. }
  11441. //------------------------------------------------------------------------------
  11442. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11443. (
  11444. CONST FLOAT* pArray
  11445. )
  11446. {
  11447. XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11448. }
  11449. //------------------------------------------------------------------------------
  11450. XMFINLINE _XMUBYTE4& _XMUBYTE4::operator=
  11451. (
  11452. CONST _XMUBYTE4& UByte4
  11453. )
  11454. {
  11455. x = UByte4.x;
  11456. y = UByte4.y;
  11457. z = UByte4.z;
  11458. w = UByte4.w;
  11459. return *this;
  11460. }
  11461. #endif // __cplusplus
  11462. #if defined(_XM_NO_INTRINSICS_)
  11463. #undef XMISNAN
  11464. #undef XMISINF
  11465. #endif
  11466. #endif // __XNAMATHVECTOR_INL__