Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

13279 lines
372 KiB

  1. /*++
  2. Copyright (c) Microsoft Corporation. All rights reserved.
  3. Module Name:
  4. xnamathvector.inl
  5. Abstract:
  6. XNA math library for Windows and Xbox 360: Vector functions
  7. --*/
  8. #if defined(_MSC_VER) && (_MSC_VER > 1000)
  9. #pragma once
  10. #endif
  11. #ifndef __XNAMATHVECTOR_INL__
  12. #define __XNAMATHVECTOR_INL__
  13. #if defined(_XM_NO_INTRINSICS_)
  14. #define XMISNAN(x) ((*(UINT*)&(x) & 0x7F800000) == 0x7F800000 && (*(UINT*)&(x) & 0x7FFFFF) != 0)
  15. #define XMISINF(x) ((*(UINT*)&(x) & 0x7FFFFFFF) == 0x7F800000)
  16. #endif
  17. /****************************************************************************
  18. *
  19. * General Vector
  20. *
  21. ****************************************************************************/
  22. //------------------------------------------------------------------------------
  23. // Assignment operations
  24. //------------------------------------------------------------------------------
  25. //------------------------------------------------------------------------------
  26. // Return a vector with all elements equaling zero
  27. XMFINLINE XMVECTOR XMVectorZero()
  28. {
  29. #if defined(_XM_NO_INTRINSICS_)
  30. XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
  31. return vResult;
  32. #elif defined(_XM_SSE_INTRINSICS_)
  33. return _mm_setzero_ps();
  34. #else // _XM_VMX128_INTRINSICS_
  35. #endif // _XM_VMX128_INTRINSICS_
  36. }
  37. //------------------------------------------------------------------------------
  38. // Initialize a vector with four floating point values
  39. XMFINLINE XMVECTOR XMVectorSet
  40. (
  41. FLOAT x,
  42. FLOAT y,
  43. FLOAT z,
  44. FLOAT w
  45. )
  46. {
  47. #if defined(_XM_NO_INTRINSICS_)
  48. XMVECTORF32 vResult = {x,y,z,w};
  49. return vResult.v;
  50. #elif defined(_XM_SSE_INTRINSICS_)
  51. return _mm_set_ps( w, z, y, x );
  52. #else // _XM_VMX128_INTRINSICS_
  53. #endif // _XM_VMX128_INTRINSICS_
  54. }
  55. //------------------------------------------------------------------------------
  56. // Initialize a vector with four integer values
  57. XMFINLINE XMVECTOR XMVectorSetInt
  58. (
  59. UINT x,
  60. UINT y,
  61. UINT z,
  62. UINT w
  63. )
  64. {
  65. #if defined(_XM_NO_INTRINSICS_)
  66. XMVECTORU32 vResult = {x,y,z,w};
  67. return vResult.v;
  68. #elif defined(_XM_SSE_INTRINSICS_)
  69. __m128i V = _mm_set_epi32( w, z, y, x );
  70. return reinterpret_cast<__m128 *>(&V)[0];
  71. #else // _XM_VMX128_INTRINSICS_
  72. #endif // _XM_VMX128_INTRINSICS_
  73. }
  74. //------------------------------------------------------------------------------
  75. // Initialize a vector with a replicated floating point value
  76. XMFINLINE XMVECTOR XMVectorReplicate
  77. (
  78. FLOAT Value
  79. )
  80. {
  81. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  82. XMVECTORF32 vResult = {Value,Value,Value,Value};
  83. return vResult.v;
  84. #elif defined(_XM_SSE_INTRINSICS_)
  85. return _mm_set_ps1( Value );
  86. #else // _XM_VMX128_INTRINSICS_
  87. #endif // _XM_VMX128_INTRINSICS_
  88. }
  89. //------------------------------------------------------------------------------
  90. // Initialize a vector with a replicated floating point value passed by pointer
  91. XMFINLINE XMVECTOR XMVectorReplicatePtr
  92. (
  93. CONST FLOAT *pValue
  94. )
  95. {
  96. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  97. FLOAT Value = pValue[0];
  98. XMVECTORF32 vResult = {Value,Value,Value,Value};
  99. return vResult.v;
  100. #elif defined(_XM_SSE_INTRINSICS_)
  101. return _mm_load_ps1( pValue );
  102. #else // _XM_VMX128_INTRINSICS_
  103. #endif // _XM_VMX128_INTRINSICS_
  104. }
  105. //------------------------------------------------------------------------------
  106. // Initialize a vector with a replicated integer value
  107. XMFINLINE XMVECTOR XMVectorReplicateInt
  108. (
  109. UINT Value
  110. )
  111. {
  112. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  113. XMVECTORU32 vResult = {Value,Value,Value,Value};
  114. return vResult.v;
  115. #elif defined(_XM_SSE_INTRINSICS_)
  116. __m128i vTemp = _mm_set1_epi32( Value );
  117. return reinterpret_cast<const __m128 *>(&vTemp)[0];
  118. #else // _XM_VMX128_INTRINSICS_
  119. #endif // _XM_VMX128_INTRINSICS_
  120. }
  121. //------------------------------------------------------------------------------
  122. // Initialize a vector with a replicated integer value passed by pointer
  123. XMFINLINE XMVECTOR XMVectorReplicateIntPtr
  124. (
  125. CONST UINT *pValue
  126. )
  127. {
  128. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  129. UINT Value = pValue[0];
  130. XMVECTORU32 vResult = {Value,Value,Value,Value};
  131. return vResult.v;
  132. #elif defined(_XM_SSE_INTRINSICS_)
  133. return _mm_load_ps1(reinterpret_cast<const float *>(pValue));
  134. #else // _XM_VMX128_INTRINSICS_
  135. #endif // _XM_VMX128_INTRINSICS_
  136. }
  137. //------------------------------------------------------------------------------
  138. // Initialize a vector with all bits set (true mask)
  139. XMFINLINE XMVECTOR XMVectorTrueInt()
  140. {
  141. #if defined(_XM_NO_INTRINSICS_)
  142. XMVECTORU32 vResult = {0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU,0xFFFFFFFFU};
  143. return vResult.v;
  144. #elif defined(_XM_SSE_INTRINSICS_)
  145. __m128i V = _mm_set1_epi32(-1);
  146. return reinterpret_cast<__m128 *>(&V)[0];
  147. #else // _XM_VMX128_INTRINSICS_
  148. #endif // _XM_VMX128_INTRINSICS_
  149. }
  150. //------------------------------------------------------------------------------
  151. // Initialize a vector with all bits clear (false mask)
  152. XMFINLINE XMVECTOR XMVectorFalseInt()
  153. {
  154. #if defined(_XM_NO_INTRINSICS_)
  155. XMVECTOR vResult = {0.0f,0.0f,0.0f,0.0f};
  156. return vResult;
  157. #elif defined(_XM_SSE_INTRINSICS_)
  158. return _mm_setzero_ps();
  159. #else // _XM_VMX128_INTRINSICS_
  160. #endif // _XM_VMX128_INTRINSICS_
  161. }
  162. //------------------------------------------------------------------------------
  163. // Replicate the x component of the vector
  164. XMFINLINE XMVECTOR XMVectorSplatX
  165. (
  166. FXMVECTOR V
  167. )
  168. {
  169. #if defined(_XM_NO_INTRINSICS_)
  170. XMVECTOR vResult;
  171. vResult.vector4_f32[0] =
  172. vResult.vector4_f32[1] =
  173. vResult.vector4_f32[2] =
  174. vResult.vector4_f32[3] = V.vector4_f32[0];
  175. return vResult;
  176. #elif defined(_XM_SSE_INTRINSICS_)
  177. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(0, 0, 0, 0) );
  178. #else // _XM_VMX128_INTRINSICS_
  179. #endif // _XM_VMX128_INTRINSICS_
  180. }
  181. //------------------------------------------------------------------------------
  182. // Replicate the y component of the vector
  183. XMFINLINE XMVECTOR XMVectorSplatY
  184. (
  185. FXMVECTOR V
  186. )
  187. {
  188. #if defined(_XM_NO_INTRINSICS_)
  189. XMVECTOR vResult;
  190. vResult.vector4_f32[0] =
  191. vResult.vector4_f32[1] =
  192. vResult.vector4_f32[2] =
  193. vResult.vector4_f32[3] = V.vector4_f32[1];
  194. return vResult;
  195. #elif defined(_XM_SSE_INTRINSICS_)
  196. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(1, 1, 1, 1) );
  197. #else // _XM_VMX128_INTRINSICS_
  198. #endif // _XM_VMX128_INTRINSICS_
  199. }
  200. //------------------------------------------------------------------------------
  201. // Replicate the z component of the vector
  202. XMFINLINE XMVECTOR XMVectorSplatZ
  203. (
  204. FXMVECTOR V
  205. )
  206. {
  207. #if defined(_XM_NO_INTRINSICS_)
  208. XMVECTOR vResult;
  209. vResult.vector4_f32[0] =
  210. vResult.vector4_f32[1] =
  211. vResult.vector4_f32[2] =
  212. vResult.vector4_f32[3] = V.vector4_f32[2];
  213. return vResult;
  214. #elif defined(_XM_SSE_INTRINSICS_)
  215. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(2, 2, 2, 2) );
  216. #else // _XM_VMX128_INTRINSICS_
  217. #endif // _XM_VMX128_INTRINSICS_
  218. }
  219. //------------------------------------------------------------------------------
  220. // Replicate the w component of the vector
  221. XMFINLINE XMVECTOR XMVectorSplatW
  222. (
  223. FXMVECTOR V
  224. )
  225. {
  226. #if defined(_XM_NO_INTRINSICS_)
  227. XMVECTOR vResult;
  228. vResult.vector4_f32[0] =
  229. vResult.vector4_f32[1] =
  230. vResult.vector4_f32[2] =
  231. vResult.vector4_f32[3] = V.vector4_f32[3];
  232. return vResult;
  233. #elif defined(_XM_SSE_INTRINSICS_)
  234. return _mm_shuffle_ps( V, V, _MM_SHUFFLE(3, 3, 3, 3) );
  235. #else // _XM_VMX128_INTRINSICS_
  236. #endif // _XM_VMX128_INTRINSICS_
  237. }
  238. //------------------------------------------------------------------------------
  239. // Return a vector of 1.0f,1.0f,1.0f,1.0f
  240. XMFINLINE XMVECTOR XMVectorSplatOne()
  241. {
  242. #if defined(_XM_NO_INTRINSICS_)
  243. XMVECTOR vResult;
  244. vResult.vector4_f32[0] =
  245. vResult.vector4_f32[1] =
  246. vResult.vector4_f32[2] =
  247. vResult.vector4_f32[3] = 1.0f;
  248. return vResult;
  249. #elif defined(_XM_SSE_INTRINSICS_)
  250. return g_XMOne;
  251. #else // _XM_VMX128_INTRINSICS_
  252. #endif // _XM_VMX128_INTRINSICS_
  253. }
  254. //------------------------------------------------------------------------------
  255. // Return a vector of INF,INF,INF,INF
  256. XMFINLINE XMVECTOR XMVectorSplatInfinity()
  257. {
  258. #if defined(_XM_NO_INTRINSICS_)
  259. XMVECTOR vResult;
  260. vResult.vector4_u32[0] =
  261. vResult.vector4_u32[1] =
  262. vResult.vector4_u32[2] =
  263. vResult.vector4_u32[3] = 0x7F800000;
  264. return vResult;
  265. #elif defined(_XM_SSE_INTRINSICS_)
  266. return g_XMInfinity;
  267. #else // _XM_VMX128_INTRINSICS_
  268. #endif // _XM_VMX128_INTRINSICS_
  269. }
  270. //------------------------------------------------------------------------------
  271. // Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN
  272. XMFINLINE XMVECTOR XMVectorSplatQNaN()
  273. {
  274. #if defined(_XM_NO_INTRINSICS_)
  275. XMVECTOR vResult;
  276. vResult.vector4_u32[0] =
  277. vResult.vector4_u32[1] =
  278. vResult.vector4_u32[2] =
  279. vResult.vector4_u32[3] = 0x7FC00000;
  280. return vResult;
  281. #elif defined(_XM_SSE_INTRINSICS_)
  282. return g_XMQNaN;
  283. #else // _XM_VMX128_INTRINSICS_
  284. #endif // _XM_VMX128_INTRINSICS_
  285. }
  286. //------------------------------------------------------------------------------
  287. // Return a vector of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f
  288. XMFINLINE XMVECTOR XMVectorSplatEpsilon()
  289. {
  290. #if defined(_XM_NO_INTRINSICS_)
  291. XMVECTOR vResult;
  292. vResult.vector4_u32[0] =
  293. vResult.vector4_u32[1] =
  294. vResult.vector4_u32[2] =
  295. vResult.vector4_u32[3] = 0x34000000;
  296. return vResult;
  297. #elif defined(_XM_SSE_INTRINSICS_)
  298. return g_XMEpsilon;
  299. #else // _XM_VMX128_INTRINSICS_
  300. #endif // _XM_VMX128_INTRINSICS_
  301. }
  302. //------------------------------------------------------------------------------
  303. // Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f
  304. XMFINLINE XMVECTOR XMVectorSplatSignMask()
  305. {
  306. #if defined(_XM_NO_INTRINSICS_)
  307. XMVECTOR vResult;
  308. vResult.vector4_u32[0] =
  309. vResult.vector4_u32[1] =
  310. vResult.vector4_u32[2] =
  311. vResult.vector4_u32[3] = 0x80000000U;
  312. return vResult;
  313. #elif defined(_XM_SSE_INTRINSICS_)
  314. __m128i V = _mm_set1_epi32( 0x80000000 );
  315. return reinterpret_cast<__m128*>(&V)[0];
  316. #else // _XM_VMX128_INTRINSICS_
  317. #endif // _XM_VMX128_INTRINSICS_
  318. }
  319. //------------------------------------------------------------------------------
  320. // Return a floating point value via an index. This is not a recommended
  321. // function to use due to performance loss.
  322. XMFINLINE FLOAT XMVectorGetByIndex(FXMVECTOR V,UINT i)
  323. {
  324. XMASSERT( i <= 3 );
  325. #if defined(_XM_NO_INTRINSICS_)
  326. return V.vector4_f32[i];
  327. #elif defined(_XM_SSE_INTRINSICS_)
  328. return V.m128_f32[i];
  329. #else // _XM_VMX128_INTRINSICS_
  330. #endif // _XM_VMX128_INTRINSICS_
  331. }
  332. //------------------------------------------------------------------------------
  333. // Return the X component in an FPU register.
  334. // This causes Load/Hit/Store on VMX targets
  335. XMFINLINE FLOAT XMVectorGetX(FXMVECTOR V)
  336. {
  337. #if defined(_XM_NO_INTRINSICS_)
  338. return V.vector4_f32[0];
  339. #elif defined(_XM_SSE_INTRINSICS_)
  340. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  341. return _mm_cvtss_f32(V);
  342. #else
  343. return V.m128_f32[0];
  344. #endif
  345. #else // _XM_VMX128_INTRINSICS_
  346. #endif // _XM_VMX128_INTRINSICS_
  347. }
  348. // Return the Y component in an FPU register.
  349. // This causes Load/Hit/Store on VMX targets
  350. XMFINLINE FLOAT XMVectorGetY(FXMVECTOR V)
  351. {
  352. #if defined(_XM_NO_INTRINSICS_)
  353. return V.vector4_f32[1];
  354. #elif defined(_XM_SSE_INTRINSICS_)
  355. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  356. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  357. return _mm_cvtss_f32(vTemp);
  358. #else
  359. return V.m128_f32[1];
  360. #endif
  361. #else // _XM_VMX128_INTRINSICS_
  362. #endif // _XM_VMX128_INTRINSICS_
  363. }
  364. // Return the Z component in an FPU register.
  365. // This causes Load/Hit/Store on VMX targets
  366. XMFINLINE FLOAT XMVectorGetZ(FXMVECTOR V)
  367. {
  368. #if defined(_XM_NO_INTRINSICS_)
  369. return V.vector4_f32[2];
  370. #elif defined(_XM_SSE_INTRINSICS_)
  371. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  372. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  373. return _mm_cvtss_f32(vTemp);
  374. #else
  375. return V.m128_f32[2];
  376. #endif
  377. #else // _XM_VMX128_INTRINSICS_
  378. #endif // _XM_VMX128_INTRINSICS_
  379. }
  380. // Return the W component in an FPU register.
  381. // This causes Load/Hit/Store on VMX targets
  382. XMFINLINE FLOAT XMVectorGetW(FXMVECTOR V)
  383. {
  384. #if defined(_XM_NO_INTRINSICS_)
  385. return V.vector4_f32[3];
  386. #elif defined(_XM_SSE_INTRINSICS_)
  387. #if defined(_MSC_VER) && (_MSC_VER>=1500)
  388. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  389. return _mm_cvtss_f32(vTemp);
  390. #else
  391. return V.m128_f32[3];
  392. #endif
  393. #else // _XM_VMX128_INTRINSICS_
  394. #endif // _XM_VMX128_INTRINSICS_
  395. }
  396. //------------------------------------------------------------------------------
  397. // Store a component indexed by i into a 32 bit float location in memory.
  398. // This causes Load/Hit/Store on VMX targets
  399. XMFINLINE VOID XMVectorGetByIndexPtr(FLOAT *f,FXMVECTOR V,UINT i)
  400. {
  401. XMASSERT( f != 0 );
  402. XMASSERT( i < 4 );
  403. #if defined(_XM_NO_INTRINSICS_)
  404. *f = V.vector4_f32[i];
  405. #elif defined(_XM_SSE_INTRINSICS_)
  406. *f = V.m128_f32[i];
  407. #else // _XM_VMX128_INTRINSICS_
  408. #endif // _XM_VMX128_INTRINSICS_
  409. }
  410. //------------------------------------------------------------------------------
  411. // Store the X component into a 32 bit float location in memory.
  412. XMFINLINE VOID XMVectorGetXPtr(FLOAT *x,FXMVECTOR V)
  413. {
  414. XMASSERT( x != 0 );
  415. #if defined(_XM_NO_INTRINSICS_)
  416. *x = V.vector4_f32[0];
  417. #elif defined(_XM_SSE_INTRINSICS_)
  418. _mm_store_ss(x,V);
  419. #else // _XM_VMX128_INTRINSICS_
  420. #endif // _XM_VMX128_INTRINSICS_
  421. }
  422. // Store the Y component into a 32 bit float location in memory.
  423. XMFINLINE VOID XMVectorGetYPtr(FLOAT *y,FXMVECTOR V)
  424. {
  425. XMASSERT( y != 0 );
  426. #if defined(_XM_NO_INTRINSICS_)
  427. *y = V.vector4_f32[1];
  428. #elif defined(_XM_SSE_INTRINSICS_)
  429. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  430. _mm_store_ss(y,vResult);
  431. #else // _XM_VMX128_INTRINSICS_
  432. #endif // _XM_VMX128_INTRINSICS_
  433. }
  434. // Store the Z component into a 32 bit float location in memory.
  435. XMFINLINE VOID XMVectorGetZPtr(FLOAT *z,FXMVECTOR V)
  436. {
  437. XMASSERT( z != 0 );
  438. #if defined(_XM_NO_INTRINSICS_)
  439. *z = V.vector4_f32[2];
  440. #elif defined(_XM_SSE_INTRINSICS_)
  441. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  442. _mm_store_ss(z,vResult);
  443. #else // _XM_VMX128_INTRINSICS_
  444. #endif // _XM_VMX128_INTRINSICS_
  445. }
  446. // Store the W component into a 32 bit float location in memory.
  447. XMFINLINE VOID XMVectorGetWPtr(FLOAT *w,FXMVECTOR V)
  448. {
  449. XMASSERT( w != 0 );
  450. #if defined(_XM_NO_INTRINSICS_)
  451. *w = V.vector4_f32[3];
  452. #elif defined(_XM_SSE_INTRINSICS_)
  453. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  454. _mm_store_ss(w,vResult);
  455. #else // _XM_VMX128_INTRINSICS_
  456. #endif // _XM_VMX128_INTRINSICS_
  457. }
  458. //------------------------------------------------------------------------------
  459. // Return an integer value via an index. This is not a recommended
  460. // function to use due to performance loss.
  461. XMFINLINE UINT XMVectorGetIntByIndex(FXMVECTOR V, UINT i)
  462. {
  463. XMASSERT( i < 4 );
  464. #if defined(_XM_NO_INTRINSICS_)
  465. return V.vector4_u32[i];
  466. #elif defined(_XM_SSE_INTRINSICS_)
  467. #if defined(_MSC_VER) && (_MSC_VER<1400)
  468. XMVECTORU32 tmp;
  469. tmp.v = V;
  470. return tmp.u[i];
  471. #else
  472. return V.m128_u32[i];
  473. #endif
  474. #else // _XM_VMX128_INTRINSICS_
  475. #endif // _XM_VMX128_INTRINSICS_
  476. }
  477. //------------------------------------------------------------------------------
  478. // Return the X component in an integer register.
  479. // This causes Load/Hit/Store on VMX targets
  480. XMFINLINE UINT XMVectorGetIntX(FXMVECTOR V)
  481. {
  482. #if defined(_XM_NO_INTRINSICS_)
  483. return V.vector4_u32[0];
  484. #elif defined(_XM_SSE_INTRINSICS_)
  485. return static_cast<UINT>(_mm_cvtsi128_si32(reinterpret_cast<const __m128i *>(&V)[0]));
  486. #else // _XM_VMX128_INTRINSICS_
  487. #endif // _XM_VMX128_INTRINSICS_
  488. }
  489. // Return the Y component in an integer register.
  490. // This causes Load/Hit/Store on VMX targets
  491. XMFINLINE UINT XMVectorGetIntY(FXMVECTOR V)
  492. {
  493. #if defined(_XM_NO_INTRINSICS_)
  494. return V.vector4_u32[1];
  495. #elif defined(_XM_SSE_INTRINSICS_)
  496. __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(1,1,1,1));
  497. return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
  498. #else // _XM_VMX128_INTRINSICS_
  499. #endif // _XM_VMX128_INTRINSICS_
  500. }
  501. // Return the Z component in an integer register.
  502. // This causes Load/Hit/Store on VMX targets
  503. XMFINLINE UINT XMVectorGetIntZ(FXMVECTOR V)
  504. {
  505. #if defined(_XM_NO_INTRINSICS_)
  506. return V.vector4_u32[2];
  507. #elif defined(_XM_SSE_INTRINSICS_)
  508. __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(2,2,2,2));
  509. return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
  510. #else // _XM_VMX128_INTRINSICS_
  511. #endif // _XM_VMX128_INTRINSICS_
  512. }
  513. // Return the W component in an integer register.
  514. // This causes Load/Hit/Store on VMX targets
  515. XMFINLINE UINT XMVectorGetIntW(FXMVECTOR V)
  516. {
  517. #if defined(_XM_NO_INTRINSICS_)
  518. return V.vector4_u32[3];
  519. #elif defined(_XM_SSE_INTRINSICS_)
  520. __m128i vResulti = _mm_shuffle_epi32(reinterpret_cast<const __m128i *>(&V)[0],_MM_SHUFFLE(3,3,3,3));
  521. return static_cast<UINT>(_mm_cvtsi128_si32(vResulti));
  522. #else // _XM_VMX128_INTRINSICS_
  523. #endif // _XM_VMX128_INTRINSICS_
  524. }
  525. //------------------------------------------------------------------------------
  526. // Store a component indexed by i into a 32 bit integer location in memory.
  527. // This causes Load/Hit/Store on VMX targets
  528. XMFINLINE VOID XMVectorGetIntByIndexPtr(UINT *x,FXMVECTOR V,UINT i)
  529. {
  530. XMASSERT( x != 0 );
  531. XMASSERT( i < 4 );
  532. #if defined(_XM_NO_INTRINSICS_)
  533. *x = V.vector4_u32[i];
  534. #elif defined(_XM_SSE_INTRINSICS_)
  535. #if defined(_MSC_VER) && (_MSC_VER<1400)
  536. XMVECTORU32 tmp;
  537. tmp.v = V;
  538. *x = tmp.u[i];
  539. #else
  540. *x = V.m128_u32[i];
  541. #endif
  542. #else // _XM_VMX128_INTRINSICS_
  543. #endif // _XM_VMX128_INTRINSICS_
  544. }
  545. //------------------------------------------------------------------------------
  546. // Store the X component into a 32 bit integer location in memory.
  547. XMFINLINE VOID XMVectorGetIntXPtr(UINT *x,FXMVECTOR V)
  548. {
  549. XMASSERT( x != 0 );
  550. #if defined(_XM_NO_INTRINSICS_)
  551. *x = V.vector4_u32[0];
  552. #elif defined(_XM_SSE_INTRINSICS_)
  553. _mm_store_ss(reinterpret_cast<float *>(x),V);
  554. #else // _XM_VMX128_INTRINSICS_
  555. #endif // _XM_VMX128_INTRINSICS_
  556. }
  557. // Store the Y component into a 32 bit integer location in memory.
  558. XMFINLINE VOID XMVectorGetIntYPtr(UINT *y,FXMVECTOR V)
  559. {
  560. XMASSERT( y != 0 );
  561. #if defined(_XM_NO_INTRINSICS_)
  562. *y = V.vector4_u32[1];
  563. #elif defined(_XM_SSE_INTRINSICS_)
  564. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  565. _mm_store_ss(reinterpret_cast<float *>(y),vResult);
  566. #else // _XM_VMX128_INTRINSICS_
  567. #endif // _XM_VMX128_INTRINSICS_
  568. }
  569. // Store the Z component into a 32 bit integer locaCantion in memory.
  570. XMFINLINE VOID XMVectorGetIntZPtr(UINT *z,FXMVECTOR V)
  571. {
  572. XMASSERT( z != 0 );
  573. #if defined(_XM_NO_INTRINSICS_)
  574. *z = V.vector4_u32[2];
  575. #elif defined(_XM_SSE_INTRINSICS_)
  576. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  577. _mm_store_ss(reinterpret_cast<float *>(z),vResult);
  578. #else // _XM_VMX128_INTRINSICS_
  579. #endif // _XM_VMX128_INTRINSICS_
  580. }
  581. // Store the W component into a 32 bit integer location in memory.
  582. XMFINLINE VOID XMVectorGetIntWPtr(UINT *w,FXMVECTOR V)
  583. {
  584. XMASSERT( w != 0 );
  585. #if defined(_XM_NO_INTRINSICS_)
  586. *w = V.vector4_u32[3];
  587. #elif defined(_XM_SSE_INTRINSICS_)
  588. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  589. _mm_store_ss(reinterpret_cast<float *>(w),vResult);
  590. #else // _XM_VMX128_INTRINSICS_
  591. #endif // _XM_VMX128_INTRINSICS_
  592. }
  593. //------------------------------------------------------------------------------
  594. // Set a single indexed floating point component
  595. // This causes Load/Hit/Store on VMX targets
  596. XMFINLINE XMVECTOR XMVectorSetByIndex(FXMVECTOR V, FLOAT f,UINT i)
  597. {
  598. #if defined(_XM_NO_INTRINSICS_)
  599. XMVECTOR U;
  600. XMASSERT( i <= 3 );
  601. U = V;
  602. U.vector4_f32[i] = f;
  603. return U;
  604. #elif defined(_XM_SSE_INTRINSICS_)
  605. XMASSERT( i <= 3 );
  606. XMVECTOR U = V;
  607. U.m128_f32[i] = f;
  608. return U;
  609. #else // _XM_VMX128_INTRINSICS_
  610. #endif // _XM_VMX128_INTRINSICS_
  611. }
  612. //------------------------------------------------------------------------------
  613. // Sets the X component of a vector to a passed floating point value
  614. // This causes Load/Hit/Store on VMX targets
  615. XMFINLINE XMVECTOR XMVectorSetX(FXMVECTOR V, FLOAT x)
  616. {
  617. #if defined(_XM_NO_INTRINSICS_)
  618. XMVECTOR U;
  619. U.vector4_f32[0] = x;
  620. U.vector4_f32[1] = V.vector4_f32[1];
  621. U.vector4_f32[2] = V.vector4_f32[2];
  622. U.vector4_f32[3] = V.vector4_f32[3];
  623. return U;
  624. #elif defined(_XM_SSE_INTRINSICS_)
  625. #if defined(_XM_ISVS2005_)
  626. XMVECTOR vResult = V;
  627. vResult.m128_f32[0] = x;
  628. return vResult;
  629. #else
  630. XMVECTOR vResult = _mm_set_ss(x);
  631. vResult = _mm_move_ss(V,vResult);
  632. return vResult;
  633. #endif // _XM_ISVS2005_
  634. #else // _XM_VMX128_INTRINSICS_
  635. #endif // _XM_VMX128_INTRINSICS_
  636. }
  637. // Sets the Y component of a vector to a passed floating point value
  638. // This causes Load/Hit/Store on VMX targets
  639. XMFINLINE XMVECTOR XMVectorSetY(FXMVECTOR V, FLOAT y)
  640. {
  641. #if defined(_XM_NO_INTRINSICS_)
  642. XMVECTOR U;
  643. U.vector4_f32[0] = V.vector4_f32[0];
  644. U.vector4_f32[1] = y;
  645. U.vector4_f32[2] = V.vector4_f32[2];
  646. U.vector4_f32[3] = V.vector4_f32[3];
  647. return U;
  648. #elif defined(_XM_SSE_INTRINSICS_)
  649. #if defined(_XM_ISVS2005_)
  650. XMVECTOR vResult = V;
  651. vResult.m128_f32[1] = y;
  652. return vResult;
  653. #else
  654. // Swap y and x
  655. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  656. // Convert input to vector
  657. XMVECTOR vTemp = _mm_set_ss(y);
  658. // Replace the x component
  659. vResult = _mm_move_ss(vResult,vTemp);
  660. // Swap y and x again
  661. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  662. return vResult;
  663. #endif // _XM_ISVS2005_
  664. #else // _XM_VMX128_INTRINSICS_
  665. #endif // _XM_VMX128_INTRINSICS_
  666. }
  667. // Sets the Z component of a vector to a passed floating point value
  668. // This causes Load/Hit/Store on VMX targets
  669. XMFINLINE XMVECTOR XMVectorSetZ(FXMVECTOR V, FLOAT z)
  670. {
  671. #if defined(_XM_NO_INTRINSICS_)
  672. XMVECTOR U;
  673. U.vector4_f32[0] = V.vector4_f32[0];
  674. U.vector4_f32[1] = V.vector4_f32[1];
  675. U.vector4_f32[2] = z;
  676. U.vector4_f32[3] = V.vector4_f32[3];
  677. return U;
  678. #elif defined(_XM_SSE_INTRINSICS_)
  679. #if defined(_XM_ISVS2005_)
  680. XMVECTOR vResult = V;
  681. vResult.m128_f32[2] = z;
  682. return vResult;
  683. #else
  684. // Swap z and x
  685. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  686. // Convert input to vector
  687. XMVECTOR vTemp = _mm_set_ss(z);
  688. // Replace the x component
  689. vResult = _mm_move_ss(vResult,vTemp);
  690. // Swap z and x again
  691. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  692. return vResult;
  693. #endif // _XM_ISVS2005_
  694. #else // _XM_VMX128_INTRINSICS_
  695. #endif // _XM_VMX128_INTRINSICS_
  696. }
  697. // Sets the W component of a vector to a passed floating point value
  698. // This causes Load/Hit/Store on VMX targets
  699. XMFINLINE XMVECTOR XMVectorSetW(FXMVECTOR V, FLOAT w)
  700. {
  701. #if defined(_XM_NO_INTRINSICS_)
  702. XMVECTOR U;
  703. U.vector4_f32[0] = V.vector4_f32[0];
  704. U.vector4_f32[1] = V.vector4_f32[1];
  705. U.vector4_f32[2] = V.vector4_f32[2];
  706. U.vector4_f32[3] = w;
  707. return U;
  708. #elif defined(_XM_SSE_INTRINSICS_)
  709. #if defined(_XM_ISVS2005_)
  710. XMVECTOR vResult = V;
  711. vResult.m128_f32[3] = w;
  712. return vResult;
  713. #else
  714. // Swap w and x
  715. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  716. // Convert input to vector
  717. XMVECTOR vTemp = _mm_set_ss(w);
  718. // Replace the x component
  719. vResult = _mm_move_ss(vResult,vTemp);
  720. // Swap w and x again
  721. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  722. return vResult;
  723. #endif // _XM_ISVS2005_
  724. #else // _XM_VMX128_INTRINSICS_
  725. #endif // _XM_VMX128_INTRINSICS_
  726. }
  727. //------------------------------------------------------------------------------
  728. // Sets a component of a vector to a floating point value passed by pointer
  729. // This causes Load/Hit/Store on VMX targets
  730. XMFINLINE XMVECTOR XMVectorSetByIndexPtr(FXMVECTOR V,CONST FLOAT *f,UINT i)
  731. {
  732. #if defined(_XM_NO_INTRINSICS_)
  733. XMVECTOR U;
  734. XMASSERT( f != 0 );
  735. XMASSERT( i <= 3 );
  736. U = V;
  737. U.vector4_f32[i] = *f;
  738. return U;
  739. #elif defined(_XM_SSE_INTRINSICS_)
  740. XMASSERT( f != 0 );
  741. XMASSERT( i <= 3 );
  742. XMVECTOR U = V;
  743. U.m128_f32[i] = *f;
  744. return U;
  745. #else // _XM_VMX128_INTRINSICS_
  746. #endif // _XM_VMX128_INTRINSICS_
  747. }
  748. //------------------------------------------------------------------------------
  749. // Sets the X component of a vector to a floating point value passed by pointer
  750. XMFINLINE XMVECTOR XMVectorSetXPtr(FXMVECTOR V,CONST FLOAT *x)
  751. {
  752. #if defined(_XM_NO_INTRINSICS_)
  753. XMVECTOR U;
  754. XMASSERT( x != 0 );
  755. U.vector4_f32[0] = *x;
  756. U.vector4_f32[1] = V.vector4_f32[1];
  757. U.vector4_f32[2] = V.vector4_f32[2];
  758. U.vector4_f32[3] = V.vector4_f32[3];
  759. return U;
  760. #elif defined(_XM_SSE_INTRINSICS_)
  761. XMASSERT( x != 0 );
  762. XMVECTOR vResult = _mm_load_ss(x);
  763. vResult = _mm_move_ss(V,vResult);
  764. return vResult;
  765. #else // _XM_VMX128_INTRINSICS_
  766. #endif // _XM_VMX128_INTRINSICS_
  767. }
  768. // Sets the Y component of a vector to a floating point value passed by pointer
  769. XMFINLINE XMVECTOR XMVectorSetYPtr(FXMVECTOR V,CONST FLOAT *y)
  770. {
  771. #if defined(_XM_NO_INTRINSICS_)
  772. XMVECTOR U;
  773. XMASSERT( y != 0 );
  774. U.vector4_f32[0] = V.vector4_f32[0];
  775. U.vector4_f32[1] = *y;
  776. U.vector4_f32[2] = V.vector4_f32[2];
  777. U.vector4_f32[3] = V.vector4_f32[3];
  778. return U;
  779. #elif defined(_XM_SSE_INTRINSICS_)
  780. XMASSERT( y != 0 );
  781. // Swap y and x
  782. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  783. // Convert input to vector
  784. XMVECTOR vTemp = _mm_load_ss(y);
  785. // Replace the x component
  786. vResult = _mm_move_ss(vResult,vTemp);
  787. // Swap y and x again
  788. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  789. return vResult;
  790. #else // _XM_VMX128_INTRINSICS_
  791. #endif // _XM_VMX128_INTRINSICS_
  792. }
  793. // Sets the Z component of a vector to a floating point value passed by pointer
  794. XMFINLINE XMVECTOR XMVectorSetZPtr(FXMVECTOR V,CONST FLOAT *z)
  795. {
  796. #if defined(_XM_NO_INTRINSICS_)
  797. XMVECTOR U;
  798. XMASSERT( z != 0 );
  799. U.vector4_f32[0] = V.vector4_f32[0];
  800. U.vector4_f32[1] = V.vector4_f32[1];
  801. U.vector4_f32[2] = *z;
  802. U.vector4_f32[3] = V.vector4_f32[3];
  803. return U;
  804. #elif defined(_XM_SSE_INTRINSICS_)
  805. XMASSERT( z != 0 );
  806. // Swap z and x
  807. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  808. // Convert input to vector
  809. XMVECTOR vTemp = _mm_load_ss(z);
  810. // Replace the x component
  811. vResult = _mm_move_ss(vResult,vTemp);
  812. // Swap z and x again
  813. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  814. return vResult;
  815. #else // _XM_VMX128_INTRINSICS_
  816. #endif // _XM_VMX128_INTRINSICS_
  817. }
  818. // Sets the W component of a vector to a floating point value passed by pointer
  819. XMFINLINE XMVECTOR XMVectorSetWPtr(FXMVECTOR V,CONST FLOAT *w)
  820. {
  821. #if defined(_XM_NO_INTRINSICS_)
  822. XMVECTOR U;
  823. XMASSERT( w != 0 );
  824. U.vector4_f32[0] = V.vector4_f32[0];
  825. U.vector4_f32[1] = V.vector4_f32[1];
  826. U.vector4_f32[2] = V.vector4_f32[2];
  827. U.vector4_f32[3] = *w;
  828. return U;
  829. #elif defined(_XM_SSE_INTRINSICS_)
  830. XMASSERT( w != 0 );
  831. // Swap w and x
  832. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  833. // Convert input to vector
  834. XMVECTOR vTemp = _mm_load_ss(w);
  835. // Replace the x component
  836. vResult = _mm_move_ss(vResult,vTemp);
  837. // Swap w and x again
  838. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  839. return vResult;
  840. #else // _XM_VMX128_INTRINSICS_
  841. #endif // _XM_VMX128_INTRINSICS_
  842. }
  843. //------------------------------------------------------------------------------
  844. // Sets a component of a vector to an integer passed by value
  845. // This causes Load/Hit/Store on VMX targets
  846. XMFINLINE XMVECTOR XMVectorSetIntByIndex(FXMVECTOR V, UINT x, UINT i)
  847. {
  848. #if defined(_XM_NO_INTRINSICS_)
  849. XMVECTOR U;
  850. XMASSERT( i <= 3 );
  851. U = V;
  852. U.vector4_u32[i] = x;
  853. return U;
  854. #elif defined(_XM_SSE_INTRINSICS_)
  855. XMASSERT( i <= 3 );
  856. XMVECTORU32 tmp;
  857. tmp.v = V;
  858. tmp.u[i] = x;
  859. return tmp;
  860. #else // _XM_VMX128_INTRINSICS_
  861. #endif // _XM_VMX128_INTRINSICS_
  862. }
  863. //------------------------------------------------------------------------------
  864. // Sets the X component of a vector to an integer passed by value
  865. // This causes Load/Hit/Store on VMX targets
  866. XMFINLINE XMVECTOR XMVectorSetIntX(FXMVECTOR V, UINT x)
  867. {
  868. #if defined(_XM_NO_INTRINSICS_)
  869. XMVECTOR U;
  870. U.vector4_u32[0] = x;
  871. U.vector4_u32[1] = V.vector4_u32[1];
  872. U.vector4_u32[2] = V.vector4_u32[2];
  873. U.vector4_u32[3] = V.vector4_u32[3];
  874. return U;
  875. #elif defined(_XM_SSE_INTRINSICS_)
  876. #if defined(_XM_ISVS2005_)
  877. XMVECTOR vResult = V;
  878. vResult.m128_i32[0] = x;
  879. return vResult;
  880. #else
  881. __m128i vTemp = _mm_cvtsi32_si128(x);
  882. XMVECTOR vResult = _mm_move_ss(V,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  883. return vResult;
  884. #endif // _XM_ISVS2005_
  885. #else // _XM_VMX128_INTRINSICS_
  886. #endif // _XM_VMX128_INTRINSICS_
  887. }
  888. // Sets the Y component of a vector to an integer passed by value
  889. // This causes Load/Hit/Store on VMX targets
  890. XMFINLINE XMVECTOR XMVectorSetIntY(FXMVECTOR V, UINT y)
  891. {
  892. #if defined(_XM_NO_INTRINSICS_)
  893. XMVECTOR U;
  894. U.vector4_u32[0] = V.vector4_u32[0];
  895. U.vector4_u32[1] = y;
  896. U.vector4_u32[2] = V.vector4_u32[2];
  897. U.vector4_u32[3] = V.vector4_u32[3];
  898. return U;
  899. #elif defined(_XM_SSE_INTRINSICS_)
  900. #if defined(_XM_ISVS2005_)
  901. XMVECTOR vResult = V;
  902. vResult.m128_i32[1] = y;
  903. return vResult;
  904. #else // Swap y and x
  905. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  906. // Convert input to vector
  907. __m128i vTemp = _mm_cvtsi32_si128(y);
  908. // Replace the x component
  909. vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  910. // Swap y and x again
  911. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  912. return vResult;
  913. #endif // _XM_ISVS2005_
  914. #else // _XM_VMX128_INTRINSICS_
  915. #endif // _XM_VMX128_INTRINSICS_
  916. }
  917. // Sets the Z component of a vector to an integer passed by value
  918. // This causes Load/Hit/Store on VMX targets
  919. XMFINLINE XMVECTOR XMVectorSetIntZ(FXMVECTOR V, UINT z)
  920. {
  921. #if defined(_XM_NO_INTRINSICS_)
  922. XMVECTOR U;
  923. U.vector4_u32[0] = V.vector4_u32[0];
  924. U.vector4_u32[1] = V.vector4_u32[1];
  925. U.vector4_u32[2] = z;
  926. U.vector4_u32[3] = V.vector4_u32[3];
  927. return U;
  928. #elif defined(_XM_SSE_INTRINSICS_)
  929. #if defined(_XM_ISVS2005_)
  930. XMVECTOR vResult = V;
  931. vResult.m128_i32[2] = z;
  932. return vResult;
  933. #else
  934. // Swap z and x
  935. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  936. // Convert input to vector
  937. __m128i vTemp = _mm_cvtsi32_si128(z);
  938. // Replace the x component
  939. vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  940. // Swap z and x again
  941. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  942. return vResult;
  943. #endif // _XM_ISVS2005_
  944. #else // _XM_VMX128_INTRINSICS_
  945. #endif // _XM_VMX128_INTRINSICS_
  946. }
  947. // Sets the W component of a vector to an integer passed by value
  948. // This causes Load/Hit/Store on VMX targets
  949. XMFINLINE XMVECTOR XMVectorSetIntW(FXMVECTOR V, UINT w)
  950. {
  951. #if defined(_XM_NO_INTRINSICS_)
  952. XMVECTOR U;
  953. U.vector4_u32[0] = V.vector4_u32[0];
  954. U.vector4_u32[1] = V.vector4_u32[1];
  955. U.vector4_u32[2] = V.vector4_u32[2];
  956. U.vector4_u32[3] = w;
  957. return U;
  958. #elif defined(_XM_SSE_INTRINSICS_)
  959. #if defined(_XM_ISVS2005_)
  960. XMVECTOR vResult = V;
  961. vResult.m128_i32[3] = w;
  962. return vResult;
  963. #else
  964. // Swap w and x
  965. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  966. // Convert input to vector
  967. __m128i vTemp = _mm_cvtsi32_si128(w);
  968. // Replace the x component
  969. vResult = _mm_move_ss(vResult,reinterpret_cast<const __m128 *>(&vTemp)[0]);
  970. // Swap w and x again
  971. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  972. return vResult;
  973. #endif // _XM_ISVS2005_
  974. #else // _XM_VMX128_INTRINSICS_
  975. #endif // _XM_VMX128_INTRINSICS_
  976. }
  977. //------------------------------------------------------------------------------
  978. // Sets a component of a vector to an integer value passed by pointer
  979. // This causes Load/Hit/Store on VMX targets
  980. XMFINLINE XMVECTOR XMVectorSetIntByIndexPtr(FXMVECTOR V, CONST UINT *x,UINT i)
  981. {
  982. #if defined(_XM_NO_INTRINSICS_)
  983. XMVECTOR U;
  984. XMASSERT( x != 0 );
  985. XMASSERT( i <= 3 );
  986. U = V;
  987. U.vector4_u32[i] = *x;
  988. return U;
  989. #elif defined(_XM_SSE_INTRINSICS_)
  990. XMASSERT( x != 0 );
  991. XMASSERT( i <= 3 );
  992. XMVECTORU32 tmp;
  993. tmp.v = V;
  994. tmp.u[i] = *x;
  995. return tmp;
  996. #else // _XM_VMX128_INTRINSICS_
  997. #endif // _XM_VMX128_INTRINSICS_
  998. }
  999. //------------------------------------------------------------------------------
  1000. // Sets the X component of a vector to an integer value passed by pointer
  1001. XMFINLINE XMVECTOR XMVectorSetIntXPtr(FXMVECTOR V,CONST UINT *x)
  1002. {
  1003. #if defined(_XM_NO_INTRINSICS_)
  1004. XMVECTOR U;
  1005. XMASSERT( x != 0 );
  1006. U.vector4_u32[0] = *x;
  1007. U.vector4_u32[1] = V.vector4_u32[1];
  1008. U.vector4_u32[2] = V.vector4_u32[2];
  1009. U.vector4_u32[3] = V.vector4_u32[3];
  1010. return U;
  1011. #elif defined(_XM_SSE_INTRINSICS_)
  1012. XMASSERT( x != 0 );
  1013. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(x));
  1014. XMVECTOR vResult = _mm_move_ss(V,vTemp);
  1015. return vResult;
  1016. #else // _XM_VMX128_INTRINSICS_
  1017. #endif // _XM_VMX128_INTRINSICS_
  1018. }
  1019. // Sets the Y component of a vector to an integer value passed by pointer
  1020. XMFINLINE XMVECTOR XMVectorSetIntYPtr(FXMVECTOR V,CONST UINT *y)
  1021. {
  1022. #if defined(_XM_NO_INTRINSICS_)
  1023. XMVECTOR U;
  1024. XMASSERT( y != 0 );
  1025. U.vector4_u32[0] = V.vector4_u32[0];
  1026. U.vector4_u32[1] = *y;
  1027. U.vector4_u32[2] = V.vector4_u32[2];
  1028. U.vector4_u32[3] = V.vector4_u32[3];
  1029. return U;
  1030. #elif defined(_XM_SSE_INTRINSICS_)
  1031. XMASSERT( y != 0 );
  1032. // Swap y and x
  1033. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  1034. // Convert input to vector
  1035. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(y));
  1036. // Replace the x component
  1037. vResult = _mm_move_ss(vResult,vTemp);
  1038. // Swap y and x again
  1039. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,2,0,1));
  1040. return vResult;
  1041. #else // _XM_VMX128_INTRINSICS_
  1042. #endif // _XM_VMX128_INTRINSICS_
  1043. }
  1044. // Sets the Z component of a vector to an integer value passed by pointer
  1045. XMFINLINE XMVECTOR XMVectorSetIntZPtr(FXMVECTOR V,CONST UINT *z)
  1046. {
  1047. #if defined(_XM_NO_INTRINSICS_)
  1048. XMVECTOR U;
  1049. XMASSERT( z != 0 );
  1050. U.vector4_u32[0] = V.vector4_u32[0];
  1051. U.vector4_u32[1] = V.vector4_u32[1];
  1052. U.vector4_u32[2] = *z;
  1053. U.vector4_u32[3] = V.vector4_u32[3];
  1054. return U;
  1055. #elif defined(_XM_SSE_INTRINSICS_)
  1056. XMASSERT( z != 0 );
  1057. // Swap z and x
  1058. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,0,1,2));
  1059. // Convert input to vector
  1060. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(z));
  1061. // Replace the x component
  1062. vResult = _mm_move_ss(vResult,vTemp);
  1063. // Swap z and x again
  1064. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,0,1,2));
  1065. return vResult;
  1066. #else // _XM_VMX128_INTRINSICS_
  1067. #endif // _XM_VMX128_INTRINSICS_
  1068. }
  1069. // Sets the W component of a vector to an integer value passed by pointer
  1070. XMFINLINE XMVECTOR XMVectorSetIntWPtr(FXMVECTOR V,CONST UINT *w)
  1071. {
  1072. #if defined(_XM_NO_INTRINSICS_)
  1073. XMVECTOR U;
  1074. XMASSERT( w != 0 );
  1075. U.vector4_u32[0] = V.vector4_u32[0];
  1076. U.vector4_u32[1] = V.vector4_u32[1];
  1077. U.vector4_u32[2] = V.vector4_u32[2];
  1078. U.vector4_u32[3] = *w;
  1079. return U;
  1080. #elif defined(_XM_SSE_INTRINSICS_)
  1081. XMASSERT( w != 0 );
  1082. // Swap w and x
  1083. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,2,1,3));
  1084. // Convert input to vector
  1085. XMVECTOR vTemp = _mm_load_ss(reinterpret_cast<const float *>(w));
  1086. // Replace the x component
  1087. vResult = _mm_move_ss(vResult,vTemp);
  1088. // Swap w and x again
  1089. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,2,1,3));
  1090. return vResult;
  1091. #else // _XM_VMX128_INTRINSICS_
  1092. #endif // _XM_VMX128_INTRINSICS_
  1093. }
  1094. //------------------------------------------------------------------------------
  1095. // Define a control vector to be used in XMVectorPermute
  1096. // operations. Visualize the two vectors V1 and V2 given
  1097. // in a permute as arranged back to back in a linear fashion,
  1098. // such that they form an array of 8 floating point values.
  1099. // The four integers specified in XMVectorPermuteControl
  1100. // will serve as indices into the array to select components
  1101. // from the two vectors. ElementIndex0 is used to select
  1102. // an element from the vectors to be placed in the first
  1103. // component of the resulting vector, ElementIndex1 is used
  1104. // to select an element for the second component, etc.
  1105. XMFINLINE XMVECTOR XMVectorPermuteControl
  1106. (
  1107. UINT ElementIndex0,
  1108. UINT ElementIndex1,
  1109. UINT ElementIndex2,
  1110. UINT ElementIndex3
  1111. )
  1112. {
  1113. #if defined(_XM_SSE_INTRINSICS_) || defined(_XM_NO_INTRINSICS_)
  1114. XMVECTORU32 vControl;
  1115. static CONST UINT ControlElement[] = {
  1116. XM_PERMUTE_0X,
  1117. XM_PERMUTE_0Y,
  1118. XM_PERMUTE_0Z,
  1119. XM_PERMUTE_0W,
  1120. XM_PERMUTE_1X,
  1121. XM_PERMUTE_1Y,
  1122. XM_PERMUTE_1Z,
  1123. XM_PERMUTE_1W
  1124. };
  1125. XMASSERT(ElementIndex0 < 8);
  1126. XMASSERT(ElementIndex1 < 8);
  1127. XMASSERT(ElementIndex2 < 8);
  1128. XMASSERT(ElementIndex3 < 8);
  1129. vControl.u[0] = ControlElement[ElementIndex0];
  1130. vControl.u[1] = ControlElement[ElementIndex1];
  1131. vControl.u[2] = ControlElement[ElementIndex2];
  1132. vControl.u[3] = ControlElement[ElementIndex3];
  1133. return vControl.v;
  1134. #else
  1135. #endif
  1136. }
  1137. //------------------------------------------------------------------------------
  1138. // Using a control vector made up of 16 bytes from 0-31, remap V1 and V2's byte
  1139. // entries into a single 16 byte vector and return it. Index 0-15 = V1,
  1140. // 16-31 = V2
  1141. XMFINLINE XMVECTOR XMVectorPermute
  1142. (
  1143. FXMVECTOR V1,
  1144. FXMVECTOR V2,
  1145. FXMVECTOR Control
  1146. )
  1147. {
  1148. #if defined(_XM_NO_INTRINSICS_)
  1149. const BYTE *aByte[2];
  1150. XMVECTOR Result;
  1151. UINT i, uIndex, VectorIndex;
  1152. const BYTE *pControl;
  1153. BYTE *pWork;
  1154. // Indices must be in range from 0 to 31
  1155. XMASSERT((Control.vector4_u32[0] & 0xE0E0E0E0) == 0);
  1156. XMASSERT((Control.vector4_u32[1] & 0xE0E0E0E0) == 0);
  1157. XMASSERT((Control.vector4_u32[2] & 0xE0E0E0E0) == 0);
  1158. XMASSERT((Control.vector4_u32[3] & 0xE0E0E0E0) == 0);
  1159. // 0-15 = V1, 16-31 = V2
  1160. aByte[0] = (const BYTE*)(&V1);
  1161. aByte[1] = (const BYTE*)(&V2);
  1162. i = 16;
  1163. pControl = (const BYTE *)(&Control);
  1164. pWork = (BYTE *)(&Result);
  1165. do {
  1166. // Get the byte to map from
  1167. uIndex = pControl[0];
  1168. ++pControl;
  1169. VectorIndex = (uIndex>>4)&1;
  1170. uIndex &= 0x0F;
  1171. #if defined(_XM_LITTLEENDIAN_)
  1172. uIndex ^= 3; // Swap byte ordering on little endian machines
  1173. #endif
  1174. pWork[0] = aByte[VectorIndex][uIndex];
  1175. ++pWork;
  1176. } while (--i);
  1177. return Result;
  1178. #elif defined(_XM_SSE_INTRINSICS_)
  1179. #if defined(_PREFAST_) || defined(XMDEBUG)
  1180. // Indices must be in range from 0 to 31
  1181. static const XMVECTORI32 PremuteTest = {0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0,0xE0E0E0E0};
  1182. XMVECTOR vAssert = _mm_and_ps(Control,PremuteTest);
  1183. __m128i vAsserti = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&vAssert)[0],g_XMZero);
  1184. XMASSERT(_mm_movemask_ps(*reinterpret_cast<const __m128 *>(&vAsserti)) == 0xf);
  1185. #endif
  1186. // Store the vectors onto local memory on the stack
  1187. XMVECTOR Array[2];
  1188. Array[0] = V1;
  1189. Array[1] = V2;
  1190. // Output vector, on the stack
  1191. XMVECTORU8 vResult;
  1192. // Get pointer to the two vectors on the stack
  1193. const BYTE *pInput = reinterpret_cast<const BYTE *>(Array);
  1194. // Store the Control vector on the stack to access the bytes
  1195. // don't use Control, it can cause a register variable to spill on the stack.
  1196. XMVECTORU8 vControl;
  1197. vControl.v = Control; // Write to memory
  1198. UINT i = 0;
  1199. do {
  1200. UINT ComponentIndex = vControl.u[i] & 0x1FU;
  1201. ComponentIndex ^= 3; // Swap byte ordering
  1202. vResult.u[i] = pInput[ComponentIndex];
  1203. } while (++i<16);
  1204. return vResult;
  1205. #else // _XM_SSE_INTRINSICS_
  1206. #endif // _XM_VMX128_INTRINSICS_
  1207. }
  1208. //------------------------------------------------------------------------------
  1209. // Define a control vector to be used in XMVectorSelect
  1210. // operations. The four integers specified in XMVectorSelectControl
  1211. // serve as indices to select between components in two vectors.
  1212. // The first index controls selection for the first component of
  1213. // the vectors involved in a select operation, the second index
  1214. // controls selection for the second component etc. A value of
  1215. // zero for an index causes the corresponding component from the first
  1216. // vector to be selected whereas a one causes the component from the
  1217. // second vector to be selected instead.
  1218. XMFINLINE XMVECTOR XMVectorSelectControl
  1219. (
  1220. UINT VectorIndex0,
  1221. UINT VectorIndex1,
  1222. UINT VectorIndex2,
  1223. UINT VectorIndex3
  1224. )
  1225. {
  1226. #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
  1227. // x=Index0,y=Index1,z=Index2,w=Index3
  1228. __m128i vTemp = _mm_set_epi32(VectorIndex3,VectorIndex2,VectorIndex1,VectorIndex0);
  1229. // Any non-zero entries become 0xFFFFFFFF else 0
  1230. vTemp = _mm_cmpgt_epi32(vTemp,g_XMZero);
  1231. return reinterpret_cast<__m128 *>(&vTemp)[0];
  1232. #else
  1233. XMVECTOR ControlVector;
  1234. CONST UINT ControlElement[] =
  1235. {
  1236. XM_SELECT_0,
  1237. XM_SELECT_1
  1238. };
  1239. XMASSERT(VectorIndex0 < 2);
  1240. XMASSERT(VectorIndex1 < 2);
  1241. XMASSERT(VectorIndex2 < 2);
  1242. XMASSERT(VectorIndex3 < 2);
  1243. ControlVector.vector4_u32[0] = ControlElement[VectorIndex0];
  1244. ControlVector.vector4_u32[1] = ControlElement[VectorIndex1];
  1245. ControlVector.vector4_u32[2] = ControlElement[VectorIndex2];
  1246. ControlVector.vector4_u32[3] = ControlElement[VectorIndex3];
  1247. return ControlVector;
  1248. #endif
  1249. }
  1250. //------------------------------------------------------------------------------
  1251. XMFINLINE XMVECTOR XMVectorSelect
  1252. (
  1253. FXMVECTOR V1,
  1254. FXMVECTOR V2,
  1255. FXMVECTOR Control
  1256. )
  1257. {
  1258. #if defined(_XM_NO_INTRINSICS_)
  1259. XMVECTOR Result;
  1260. Result.vector4_u32[0] = (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | (V2.vector4_u32[0] & Control.vector4_u32[0]);
  1261. Result.vector4_u32[1] = (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | (V2.vector4_u32[1] & Control.vector4_u32[1]);
  1262. Result.vector4_u32[2] = (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | (V2.vector4_u32[2] & Control.vector4_u32[2]);
  1263. Result.vector4_u32[3] = (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | (V2.vector4_u32[3] & Control.vector4_u32[3]);
  1264. return Result;
  1265. #elif defined(_XM_SSE_INTRINSICS_)
  1266. XMVECTOR vTemp1 = _mm_andnot_ps(Control,V1);
  1267. XMVECTOR vTemp2 = _mm_and_ps(V2,Control);
  1268. return _mm_or_ps(vTemp1,vTemp2);
  1269. #else // _XM_VMX128_INTRINSICS_
  1270. #endif // _XM_VMX128_INTRINSICS_
  1271. }
  1272. //------------------------------------------------------------------------------
  1273. XMFINLINE XMVECTOR XMVectorMergeXY
  1274. (
  1275. FXMVECTOR V1,
  1276. FXMVECTOR V2
  1277. )
  1278. {
  1279. #if defined(_XM_NO_INTRINSICS_)
  1280. XMVECTOR Result;
  1281. Result.vector4_u32[0] = V1.vector4_u32[0];
  1282. Result.vector4_u32[1] = V2.vector4_u32[0];
  1283. Result.vector4_u32[2] = V1.vector4_u32[1];
  1284. Result.vector4_u32[3] = V2.vector4_u32[1];
  1285. return Result;
  1286. #elif defined(_XM_SSE_INTRINSICS_)
  1287. return _mm_unpacklo_ps( V1, V2 );
  1288. #else // _XM_VMX128_INTRINSICS_
  1289. #endif // _XM_VMX128_INTRINSICS_
  1290. }
  1291. //------------------------------------------------------------------------------
  1292. XMFINLINE XMVECTOR XMVectorMergeZW
  1293. (
  1294. FXMVECTOR V1,
  1295. FXMVECTOR V2
  1296. )
  1297. {
  1298. #if defined(_XM_NO_INTRINSICS_)
  1299. XMVECTOR Result;
  1300. Result.vector4_u32[0] = V1.vector4_u32[2];
  1301. Result.vector4_u32[1] = V2.vector4_u32[2];
  1302. Result.vector4_u32[2] = V1.vector4_u32[3];
  1303. Result.vector4_u32[3] = V2.vector4_u32[3];
  1304. return Result;
  1305. #elif defined(_XM_SSE_INTRINSICS_)
  1306. return _mm_unpackhi_ps( V1, V2 );
  1307. #else // _XM_VMX128_INTRINSICS_
  1308. #endif // _XM_VMX128_INTRINSICS_
  1309. }
  1310. //------------------------------------------------------------------------------
  1311. // Comparison operations
  1312. //------------------------------------------------------------------------------
  1313. //------------------------------------------------------------------------------
  1314. XMFINLINE XMVECTOR XMVectorEqual
  1315. (
  1316. FXMVECTOR V1,
  1317. FXMVECTOR V2
  1318. )
  1319. {
  1320. #if defined(_XM_NO_INTRINSICS_)
  1321. XMVECTOR Control;
  1322. Control.vector4_u32[0] = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1323. Control.vector4_u32[1] = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1324. Control.vector4_u32[2] = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1325. Control.vector4_u32[3] = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1326. return Control;
  1327. #elif defined(_XM_SSE_INTRINSICS_)
  1328. return _mm_cmpeq_ps( V1, V2 );
  1329. #else // _XM_VMX128_INTRINSICS_
  1330. #endif // _XM_VMX128_INTRINSICS_
  1331. }
  1332. //------------------------------------------------------------------------------
  1333. XMFINLINE XMVECTOR XMVectorEqualR
  1334. (
  1335. UINT* pCR,
  1336. FXMVECTOR V1,
  1337. FXMVECTOR V2
  1338. )
  1339. {
  1340. #if defined(_XM_NO_INTRINSICS_)
  1341. UINT ux, uy, uz, uw, CR;
  1342. XMVECTOR Control;
  1343. XMASSERT( pCR );
  1344. ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1345. uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1346. uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1347. uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1348. CR = 0;
  1349. if (ux&uy&uz&uw)
  1350. {
  1351. // All elements are greater
  1352. CR = XM_CRMASK_CR6TRUE;
  1353. }
  1354. else if (!(ux|uy|uz|uw))
  1355. {
  1356. // All elements are not greater
  1357. CR = XM_CRMASK_CR6FALSE;
  1358. }
  1359. *pCR = CR;
  1360. Control.vector4_u32[0] = ux;
  1361. Control.vector4_u32[1] = uy;
  1362. Control.vector4_u32[2] = uz;
  1363. Control.vector4_u32[3] = uw;
  1364. return Control;
  1365. #elif defined(_XM_SSE_INTRINSICS_)
  1366. XMASSERT( pCR );
  1367. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  1368. UINT CR = 0;
  1369. int iTest = _mm_movemask_ps(vTemp);
  1370. if (iTest==0xf)
  1371. {
  1372. CR = XM_CRMASK_CR6TRUE;
  1373. }
  1374. else if (!iTest)
  1375. {
  1376. // All elements are not greater
  1377. CR = XM_CRMASK_CR6FALSE;
  1378. }
  1379. *pCR = CR;
  1380. return vTemp;
  1381. #else // _XM_VMX128_INTRINSICS_
  1382. #endif // _XM_VMX128_INTRINSICS_
  1383. }
  1384. //------------------------------------------------------------------------------
  1385. // Treat the components of the vectors as unsigned integers and
  1386. // compare individual bits between the two. This is useful for
  1387. // comparing control vectors and result vectors returned from
  1388. // other comparison operations.
  1389. XMFINLINE XMVECTOR XMVectorEqualInt
  1390. (
  1391. FXMVECTOR V1,
  1392. FXMVECTOR V2
  1393. )
  1394. {
  1395. #if defined(_XM_NO_INTRINSICS_)
  1396. XMVECTOR Control;
  1397. Control.vector4_u32[0] = (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0;
  1398. Control.vector4_u32[1] = (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0;
  1399. Control.vector4_u32[2] = (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0;
  1400. Control.vector4_u32[3] = (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0;
  1401. return Control;
  1402. #elif defined(_XM_SSE_INTRINSICS_)
  1403. __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
  1404. return reinterpret_cast<__m128 *>(&V)[0];
  1405. #else // _XM_VMX128_INTRINSICS_
  1406. #endif // _XM_VMX128_INTRINSICS_
  1407. }
  1408. //------------------------------------------------------------------------------
  1409. XMFINLINE XMVECTOR XMVectorEqualIntR
  1410. (
  1411. UINT* pCR,
  1412. FXMVECTOR V1,
  1413. FXMVECTOR V2
  1414. )
  1415. {
  1416. #if defined(_XM_NO_INTRINSICS_)
  1417. XMVECTOR Control;
  1418. XMASSERT(pCR);
  1419. Control = XMVectorEqualInt(V1, V2);
  1420. *pCR = 0;
  1421. if (XMVector4EqualInt(Control, XMVectorTrueInt()))
  1422. {
  1423. // All elements are equal
  1424. *pCR |= XM_CRMASK_CR6TRUE;
  1425. }
  1426. else if (XMVector4EqualInt(Control, XMVectorFalseInt()))
  1427. {
  1428. // All elements are not equal
  1429. *pCR |= XM_CRMASK_CR6FALSE;
  1430. }
  1431. return Control;
  1432. #elif defined(_XM_SSE_INTRINSICS_)
  1433. XMASSERT(pCR);
  1434. __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
  1435. int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128*>(&V)[0]);
  1436. UINT CR = 0;
  1437. if (iTemp==0x0F)
  1438. {
  1439. CR = XM_CRMASK_CR6TRUE;
  1440. }
  1441. else if (!iTemp)
  1442. {
  1443. CR = XM_CRMASK_CR6FALSE;
  1444. }
  1445. *pCR = CR;
  1446. return reinterpret_cast<__m128 *>(&V)[0];
  1447. #else // _XM_VMX128_INTRINSICS_
  1448. #endif // _XM_VMX128_INTRINSICS_
  1449. }
  1450. //------------------------------------------------------------------------------
  1451. XMFINLINE XMVECTOR XMVectorNearEqual
  1452. (
  1453. FXMVECTOR V1,
  1454. FXMVECTOR V2,
  1455. FXMVECTOR Epsilon
  1456. )
  1457. {
  1458. #if defined(_XM_NO_INTRINSICS_)
  1459. FLOAT fDeltax, fDeltay, fDeltaz, fDeltaw;
  1460. XMVECTOR Control;
  1461. fDeltax = V1.vector4_f32[0]-V2.vector4_f32[0];
  1462. fDeltay = V1.vector4_f32[1]-V2.vector4_f32[1];
  1463. fDeltaz = V1.vector4_f32[2]-V2.vector4_f32[2];
  1464. fDeltaw = V1.vector4_f32[3]-V2.vector4_f32[3];
  1465. fDeltax = fabsf(fDeltax);
  1466. fDeltay = fabsf(fDeltay);
  1467. fDeltaz = fabsf(fDeltaz);
  1468. fDeltaw = fabsf(fDeltaw);
  1469. Control.vector4_u32[0] = (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1470. Control.vector4_u32[1] = (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1471. Control.vector4_u32[2] = (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1472. Control.vector4_u32[3] = (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1473. return Control;
  1474. #elif defined(_XM_SSE_INTRINSICS_)
  1475. // Get the difference
  1476. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  1477. // Get the absolute value of the difference
  1478. XMVECTOR vTemp = _mm_setzero_ps();
  1479. vTemp = _mm_sub_ps(vTemp,vDelta);
  1480. vTemp = _mm_max_ps(vTemp,vDelta);
  1481. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  1482. return vTemp;
  1483. #else // _XM_VMX128_INTRINSICS_
  1484. #endif // _XM_VMX128_INTRINSICS_
  1485. }
  1486. //------------------------------------------------------------------------------
  1487. XMFINLINE XMVECTOR XMVectorNotEqual
  1488. (
  1489. FXMVECTOR V1,
  1490. FXMVECTOR V2
  1491. )
  1492. {
  1493. #if defined(_XM_NO_INTRINSICS_)
  1494. XMVECTOR Control;
  1495. Control.vector4_u32[0] = (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1496. Control.vector4_u32[1] = (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1497. Control.vector4_u32[2] = (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1498. Control.vector4_u32[3] = (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1499. return Control;
  1500. #elif defined(_XM_SSE_INTRINSICS_)
  1501. return _mm_cmpneq_ps( V1, V2 );
  1502. #else // _XM_VMX128_INTRINSICS_
  1503. #endif // _XM_VMX128_INTRINSICS_
  1504. }
  1505. //------------------------------------------------------------------------------
  1506. XMFINLINE XMVECTOR XMVectorNotEqualInt
  1507. (
  1508. FXMVECTOR V1,
  1509. FXMVECTOR V2
  1510. )
  1511. {
  1512. #if defined(_XM_NO_INTRINSICS_)
  1513. XMVECTOR Control;
  1514. Control.vector4_u32[0] = (V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0;
  1515. Control.vector4_u32[1] = (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0;
  1516. Control.vector4_u32[2] = (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0;
  1517. Control.vector4_u32[3] = (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0;
  1518. return Control;
  1519. #elif defined(_XM_SSE_INTRINSICS_)
  1520. __m128i V = _mm_cmpeq_epi32( reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0] );
  1521. return _mm_xor_ps(reinterpret_cast<__m128 *>(&V)[0],g_XMNegOneMask);
  1522. #else // _XM_VMX128_INTRINSICS_
  1523. #endif // _XM_VMX128_INTRINSICS_
  1524. }
  1525. //------------------------------------------------------------------------------
  1526. XMFINLINE XMVECTOR XMVectorGreater
  1527. (
  1528. FXMVECTOR V1,
  1529. FXMVECTOR V2
  1530. )
  1531. {
  1532. #if defined(_XM_NO_INTRINSICS_)
  1533. XMVECTOR Control;
  1534. Control.vector4_u32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1535. Control.vector4_u32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1536. Control.vector4_u32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1537. Control.vector4_u32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1538. return Control;
  1539. #elif defined(_XM_SSE_INTRINSICS_)
  1540. return _mm_cmpgt_ps( V1, V2 );
  1541. #else // _XM_VMX128_INTRINSICS_
  1542. #endif // _XM_VMX128_INTRINSICS_
  1543. }
  1544. //------------------------------------------------------------------------------
  1545. XMFINLINE XMVECTOR XMVectorGreaterR
  1546. (
  1547. UINT* pCR,
  1548. FXMVECTOR V1,
  1549. FXMVECTOR V2
  1550. )
  1551. {
  1552. #if defined(_XM_NO_INTRINSICS_)
  1553. UINT ux, uy, uz, uw, CR;
  1554. XMVECTOR Control;
  1555. XMASSERT( pCR );
  1556. ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1557. uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1558. uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1559. uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1560. CR = 0;
  1561. if (ux&uy&uz&uw)
  1562. {
  1563. // All elements are greater
  1564. CR = XM_CRMASK_CR6TRUE;
  1565. }
  1566. else if (!(ux|uy|uz|uw))
  1567. {
  1568. // All elements are not greater
  1569. CR = XM_CRMASK_CR6FALSE;
  1570. }
  1571. *pCR = CR;
  1572. Control.vector4_u32[0] = ux;
  1573. Control.vector4_u32[1] = uy;
  1574. Control.vector4_u32[2] = uz;
  1575. Control.vector4_u32[3] = uw;
  1576. return Control;
  1577. #elif defined(_XM_SSE_INTRINSICS_)
  1578. XMASSERT( pCR );
  1579. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  1580. UINT CR = 0;
  1581. int iTest = _mm_movemask_ps(vTemp);
  1582. if (iTest==0xf)
  1583. {
  1584. CR = XM_CRMASK_CR6TRUE;
  1585. }
  1586. else if (!iTest)
  1587. {
  1588. // All elements are not greater
  1589. CR = XM_CRMASK_CR6FALSE;
  1590. }
  1591. *pCR = CR;
  1592. return vTemp;
  1593. #else // _XM_VMX128_INTRINSICS_
  1594. #endif // _XM_VMX128_INTRINSICS_
  1595. }
  1596. //------------------------------------------------------------------------------
  1597. XMFINLINE XMVECTOR XMVectorGreaterOrEqual
  1598. (
  1599. FXMVECTOR V1,
  1600. FXMVECTOR V2
  1601. )
  1602. {
  1603. #if defined(_XM_NO_INTRINSICS_)
  1604. XMVECTOR Control;
  1605. Control.vector4_u32[0] = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1606. Control.vector4_u32[1] = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1607. Control.vector4_u32[2] = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1608. Control.vector4_u32[3] = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1609. return Control;
  1610. #elif defined(_XM_SSE_INTRINSICS_)
  1611. return _mm_cmpge_ps( V1, V2 );
  1612. #else // _XM_VMX128_INTRINSICS_
  1613. #endif // _XM_VMX128_INTRINSICS_
  1614. }
  1615. //------------------------------------------------------------------------------
  1616. XMFINLINE XMVECTOR XMVectorGreaterOrEqualR
  1617. (
  1618. UINT* pCR,
  1619. FXMVECTOR V1,
  1620. FXMVECTOR V2
  1621. )
  1622. {
  1623. #if defined(_XM_NO_INTRINSICS_)
  1624. UINT ux, uy, uz, uw, CR;
  1625. XMVECTOR Control;
  1626. XMASSERT( pCR );
  1627. ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1628. uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1629. uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1630. uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1631. CR = 0;
  1632. if (ux&uy&uz&uw)
  1633. {
  1634. // All elements are greater
  1635. CR = XM_CRMASK_CR6TRUE;
  1636. }
  1637. else if (!(ux|uy|uz|uw))
  1638. {
  1639. // All elements are not greater
  1640. CR = XM_CRMASK_CR6FALSE;
  1641. }
  1642. *pCR = CR;
  1643. Control.vector4_u32[0] = ux;
  1644. Control.vector4_u32[1] = uy;
  1645. Control.vector4_u32[2] = uz;
  1646. Control.vector4_u32[3] = uw;
  1647. return Control;
  1648. #elif defined(_XM_SSE_INTRINSICS_)
  1649. XMASSERT( pCR );
  1650. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  1651. UINT CR = 0;
  1652. int iTest = _mm_movemask_ps(vTemp);
  1653. if (iTest==0xf)
  1654. {
  1655. CR = XM_CRMASK_CR6TRUE;
  1656. }
  1657. else if (!iTest)
  1658. {
  1659. // All elements are not greater
  1660. CR = XM_CRMASK_CR6FALSE;
  1661. }
  1662. *pCR = CR;
  1663. return vTemp;
  1664. #else // _XM_VMX128_INTRINSICS_
  1665. #endif // _XM_VMX128_INTRINSICS_
  1666. }
  1667. //------------------------------------------------------------------------------
  1668. XMFINLINE XMVECTOR XMVectorLess
  1669. (
  1670. FXMVECTOR V1,
  1671. FXMVECTOR V2
  1672. )
  1673. {
  1674. #if defined(_XM_NO_INTRINSICS_)
  1675. XMVECTOR Control;
  1676. Control.vector4_u32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1677. Control.vector4_u32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1678. Control.vector4_u32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1679. Control.vector4_u32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1680. return Control;
  1681. #elif defined(_XM_SSE_INTRINSICS_)
  1682. return _mm_cmplt_ps( V1, V2 );
  1683. #else // _XM_VMX128_INTRINSICS_
  1684. #endif // _XM_VMX128_INTRINSICS_
  1685. }
  1686. //------------------------------------------------------------------------------
  1687. XMFINLINE XMVECTOR XMVectorLessOrEqual
  1688. (
  1689. FXMVECTOR V1,
  1690. FXMVECTOR V2
  1691. )
  1692. {
  1693. #if defined(_XM_NO_INTRINSICS_)
  1694. XMVECTOR Control;
  1695. Control.vector4_u32[0] = (V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1696. Control.vector4_u32[1] = (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1697. Control.vector4_u32[2] = (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1698. Control.vector4_u32[3] = (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1699. return Control;
  1700. #elif defined(_XM_SSE_INTRINSICS_)
  1701. return _mm_cmple_ps( V1, V2 );
  1702. #else // _XM_VMX128_INTRINSICS_
  1703. #endif // _XM_VMX128_INTRINSICS_
  1704. }
  1705. //------------------------------------------------------------------------------
  1706. XMFINLINE XMVECTOR XMVectorInBounds
  1707. (
  1708. FXMVECTOR V,
  1709. FXMVECTOR Bounds
  1710. )
  1711. {
  1712. #if defined(_XM_NO_INTRINSICS_)
  1713. XMVECTOR Control;
  1714. Control.vector4_u32[0] = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFF : 0;
  1715. Control.vector4_u32[1] = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFF : 0;
  1716. Control.vector4_u32[2] = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFF : 0;
  1717. Control.vector4_u32[3] = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFF : 0;
  1718. return Control;
  1719. #elif defined(_XM_SSE_INTRINSICS_)
  1720. // Test if less than or equal
  1721. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  1722. // Negate the bounds
  1723. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  1724. // Test if greater or equal (Reversed)
  1725. vTemp2 = _mm_cmple_ps(vTemp2,V);
  1726. // Blend answers
  1727. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  1728. return vTemp1;
  1729. #else // _XM_VMX128_INTRINSICS_
  1730. #endif // _XM_VMX128_INTRINSICS_
  1731. }
  1732. //------------------------------------------------------------------------------
  1733. XMFINLINE XMVECTOR XMVectorInBoundsR
  1734. (
  1735. UINT* pCR,
  1736. FXMVECTOR V,
  1737. FXMVECTOR Bounds
  1738. )
  1739. {
  1740. #if defined(_XM_NO_INTRINSICS_)
  1741. UINT ux, uy, uz, uw, CR;
  1742. XMVECTOR Control;
  1743. XMASSERT( pCR != 0 );
  1744. ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1745. uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1746. uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1747. uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1748. CR = 0;
  1749. if (ux&uy&uz&uw)
  1750. {
  1751. // All elements are in bounds
  1752. CR = XM_CRMASK_CR6BOUNDS;
  1753. }
  1754. *pCR = CR;
  1755. Control.vector4_u32[0] = ux;
  1756. Control.vector4_u32[1] = uy;
  1757. Control.vector4_u32[2] = uz;
  1758. Control.vector4_u32[3] = uw;
  1759. return Control;
  1760. #elif defined(_XM_SSE_INTRINSICS_)
  1761. XMASSERT( pCR != 0 );
  1762. // Test if less than or equal
  1763. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  1764. // Negate the bounds
  1765. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  1766. // Test if greater or equal (Reversed)
  1767. vTemp2 = _mm_cmple_ps(vTemp2,V);
  1768. // Blend answers
  1769. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  1770. UINT CR = 0;
  1771. if (_mm_movemask_ps(vTemp1)==0xf) {
  1772. // All elements are in bounds
  1773. CR = XM_CRMASK_CR6BOUNDS;
  1774. }
  1775. *pCR = CR;
  1776. return vTemp1;
  1777. #else // _XM_VMX128_INTRINSICS_
  1778. #endif // _XM_VMX128_INTRINSICS_
  1779. }
  1780. //------------------------------------------------------------------------------
  1781. XMFINLINE XMVECTOR XMVectorIsNaN
  1782. (
  1783. FXMVECTOR V
  1784. )
  1785. {
  1786. #if defined(_XM_NO_INTRINSICS_)
  1787. XMVECTOR Control;
  1788. Control.vector4_u32[0] = XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1789. Control.vector4_u32[1] = XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1790. Control.vector4_u32[2] = XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1791. Control.vector4_u32[3] = XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1792. return Control;
  1793. #elif defined(_XM_SSE_INTRINSICS_)
  1794. // Mask off the exponent
  1795. __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
  1796. // Mask off the mantissa
  1797. __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
  1798. // Are any of the exponents == 0x7F800000?
  1799. vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
  1800. // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
  1801. vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
  1802. // Perform a not on the NaN test to be true on NON-zero mantissas
  1803. vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
  1804. // If any are NaN, the signs are true after the merge above
  1805. return reinterpret_cast<const XMVECTOR *>(&vTempNan)[0];
  1806. #else // _XM_VMX128_INTRINSICS_
  1807. #endif // _XM_VMX128_INTRINSICS_
  1808. }
  1809. //------------------------------------------------------------------------------
  1810. XMFINLINE XMVECTOR XMVectorIsInfinite
  1811. (
  1812. FXMVECTOR V
  1813. )
  1814. {
  1815. #if defined(_XM_NO_INTRINSICS_)
  1816. XMVECTOR Control;
  1817. Control.vector4_u32[0] = XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0;
  1818. Control.vector4_u32[1] = XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0;
  1819. Control.vector4_u32[2] = XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0;
  1820. Control.vector4_u32[3] = XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0;
  1821. return Control;
  1822. #elif defined(_XM_SSE_INTRINSICS_)
  1823. // Mask off the sign bit
  1824. __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
  1825. // Compare to infinity
  1826. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  1827. // If any are infinity, the signs are true.
  1828. return vTemp;
  1829. #else // _XM_VMX128_INTRINSICS_
  1830. #endif // _XM_VMX128_INTRINSICS_
  1831. }
  1832. //------------------------------------------------------------------------------
  1833. // Rounding and clamping operations
  1834. //------------------------------------------------------------------------------
  1835. //------------------------------------------------------------------------------
  1836. XMFINLINE XMVECTOR XMVectorMin
  1837. (
  1838. FXMVECTOR V1,
  1839. FXMVECTOR V2
  1840. )
  1841. {
  1842. #if defined(_XM_NO_INTRINSICS_)
  1843. XMVECTOR Result;
  1844. Result.vector4_f32[0] = (V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
  1845. Result.vector4_f32[1] = (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
  1846. Result.vector4_f32[2] = (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
  1847. Result.vector4_f32[3] = (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
  1848. return Result;
  1849. #elif defined(_XM_SSE_INTRINSICS_)
  1850. return _mm_min_ps( V1, V2 );
  1851. #else // _XM_VMX128_INTRINSICS_
  1852. #endif // _XM_VMX128_INTRINSICS_
  1853. }
  1854. //------------------------------------------------------------------------------
  1855. XMFINLINE XMVECTOR XMVectorMax
  1856. (
  1857. FXMVECTOR V1,
  1858. FXMVECTOR V2
  1859. )
  1860. {
  1861. #if defined(_XM_NO_INTRINSICS_)
  1862. XMVECTOR Result;
  1863. Result.vector4_f32[0] = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] : V2.vector4_f32[0];
  1864. Result.vector4_f32[1] = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] : V2.vector4_f32[1];
  1865. Result.vector4_f32[2] = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] : V2.vector4_f32[2];
  1866. Result.vector4_f32[3] = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] : V2.vector4_f32[3];
  1867. return Result;
  1868. #elif defined(_XM_SSE_INTRINSICS_)
  1869. return _mm_max_ps( V1, V2 );
  1870. #else // _XM_VMX128_INTRINSICS_
  1871. #endif // _XM_VMX128_INTRINSICS_
  1872. }
  1873. //------------------------------------------------------------------------------
  1874. XMFINLINE XMVECTOR XMVectorRound
  1875. (
  1876. FXMVECTOR V
  1877. )
  1878. {
  1879. #if defined(_XM_NO_INTRINSICS_)
  1880. XMVECTOR Result;
  1881. XMVECTOR Bias;
  1882. CONST XMVECTOR Zero = XMVectorZero();
  1883. CONST XMVECTOR BiasPos = XMVectorReplicate(0.5f);
  1884. CONST XMVECTOR BiasNeg = XMVectorReplicate(-0.5f);
  1885. Bias = XMVectorLess(V, Zero);
  1886. Bias = XMVectorSelect(BiasPos, BiasNeg, Bias);
  1887. Result = XMVectorAdd(V, Bias);
  1888. Result = XMVectorTruncate(Result);
  1889. return Result;
  1890. #elif defined(_XM_SSE_INTRINSICS_)
  1891. // To handle NAN, INF and numbers greater than 8388608, use masking
  1892. // Get the abs value
  1893. __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
  1894. // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
  1895. vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
  1896. // Convert to int and back to float for rounding
  1897. __m128i vInt = _mm_cvtps_epi32(V);
  1898. // Convert back to floats
  1899. XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
  1900. // All numbers less than 8388608 will use the round to int
  1901. vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1902. // All others, use the ORIGINAL value
  1903. vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
  1904. vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1905. return vResult;
  1906. #else // _XM_VMX128_INTRINSICS_
  1907. #endif // _XM_VMX128_INTRINSICS_
  1908. }
  1909. //------------------------------------------------------------------------------
  1910. XMFINLINE XMVECTOR XMVectorTruncate
  1911. (
  1912. FXMVECTOR V
  1913. )
  1914. {
  1915. #if defined(_XM_NO_INTRINSICS_)
  1916. XMVECTOR Result;
  1917. UINT i;
  1918. // Avoid C4701
  1919. Result.vector4_f32[0] = 0.0f;
  1920. for (i = 0; i < 4; i++)
  1921. {
  1922. if (XMISNAN(V.vector4_f32[i]))
  1923. {
  1924. Result.vector4_u32[i] = 0x7FC00000;
  1925. }
  1926. else if (fabsf(V.vector4_f32[i]) < 8388608.0f)
  1927. {
  1928. Result.vector4_f32[i] = (FLOAT)((INT)V.vector4_f32[i]);
  1929. }
  1930. else
  1931. {
  1932. Result.vector4_f32[i] = V.vector4_f32[i];
  1933. }
  1934. }
  1935. return Result;
  1936. #elif defined(_XM_SSE_INTRINSICS_)
  1937. // To handle NAN, INF and numbers greater than 8388608, use masking
  1938. // Get the abs value
  1939. __m128i vTest = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMAbsMask);
  1940. // Test for greater than 8388608 (All floats with NO fractionals, NAN and INF
  1941. vTest = _mm_cmplt_epi32(vTest,g_XMNoFraction);
  1942. // Convert to int and back to float for rounding with truncation
  1943. __m128i vInt = _mm_cvttps_epi32(V);
  1944. // Convert back to floats
  1945. XMVECTOR vResult = _mm_cvtepi32_ps(vInt);
  1946. // All numbers less than 8388608 will use the round to int
  1947. vResult = _mm_and_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1948. // All others, use the ORIGINAL value
  1949. vTest = _mm_andnot_si128(vTest,reinterpret_cast<const __m128i *>(&V)[0]);
  1950. vResult = _mm_or_ps(vResult,reinterpret_cast<const XMVECTOR *>(&vTest)[0]);
  1951. return vResult;
  1952. #else // _XM_VMX128_INTRINSICS_
  1953. #endif // _XM_VMX128_INTRINSICS_
  1954. }
  1955. //------------------------------------------------------------------------------
  1956. XMFINLINE XMVECTOR XMVectorFloor
  1957. (
  1958. FXMVECTOR V
  1959. )
  1960. {
  1961. #if defined(_XM_NO_INTRINSICS_)
  1962. XMVECTOR vResult = {
  1963. floorf(V.vector4_f32[0]),
  1964. floorf(V.vector4_f32[1]),
  1965. floorf(V.vector4_f32[2]),
  1966. floorf(V.vector4_f32[3])
  1967. };
  1968. return vResult;
  1969. #elif defined(_XM_SSE_INTRINSICS_)
  1970. XMVECTOR vResult = _mm_sub_ps(V,g_XMOneHalfMinusEpsilon);
  1971. __m128i vInt = _mm_cvtps_epi32(vResult);
  1972. vResult = _mm_cvtepi32_ps(vInt);
  1973. return vResult;
  1974. #else // _XM_VMX128_INTRINSICS_
  1975. #endif // _XM_VMX128_INTRINSICS_
  1976. }
  1977. //------------------------------------------------------------------------------
  1978. XMFINLINE XMVECTOR XMVectorCeiling
  1979. (
  1980. FXMVECTOR V
  1981. )
  1982. {
  1983. #if defined(_XM_NO_INTRINSICS_)
  1984. XMVECTOR vResult = {
  1985. ceilf(V.vector4_f32[0]),
  1986. ceilf(V.vector4_f32[1]),
  1987. ceilf(V.vector4_f32[2]),
  1988. ceilf(V.vector4_f32[3])
  1989. };
  1990. return vResult;
  1991. #elif defined(_XM_SSE_INTRINSICS_)
  1992. XMVECTOR vResult = _mm_add_ps(V,g_XMOneHalfMinusEpsilon);
  1993. __m128i vInt = _mm_cvtps_epi32(vResult);
  1994. vResult = _mm_cvtepi32_ps(vInt);
  1995. return vResult;
  1996. #else // _XM_VMX128_INTRINSICS_
  1997. #endif // _XM_VMX128_INTRINSICS_
  1998. }
  1999. //------------------------------------------------------------------------------
  2000. XMFINLINE XMVECTOR XMVectorClamp
  2001. (
  2002. FXMVECTOR V,
  2003. FXMVECTOR Min,
  2004. FXMVECTOR Max
  2005. )
  2006. {
  2007. #if defined(_XM_NO_INTRINSICS_)
  2008. XMVECTOR Result;
  2009. XMASSERT(XMVector4LessOrEqual(Min, Max));
  2010. Result = XMVectorMax(Min, V);
  2011. Result = XMVectorMin(Max, Result);
  2012. return Result;
  2013. #elif defined(_XM_SSE_INTRINSICS_)
  2014. XMVECTOR vResult;
  2015. XMASSERT(XMVector4LessOrEqual(Min, Max));
  2016. vResult = _mm_max_ps(Min,V);
  2017. vResult = _mm_min_ps(vResult,Max);
  2018. return vResult;
  2019. #else // _XM_VMX128_INTRINSICS_
  2020. #endif // _XM_VMX128_INTRINSICS_
  2021. }
  2022. //------------------------------------------------------------------------------
  2023. XMFINLINE XMVECTOR XMVectorSaturate
  2024. (
  2025. FXMVECTOR V
  2026. )
  2027. {
  2028. #if defined(_XM_NO_INTRINSICS_)
  2029. CONST XMVECTOR Zero = XMVectorZero();
  2030. return XMVectorClamp(V, Zero, g_XMOne.v);
  2031. #elif defined(_XM_SSE_INTRINSICS_)
  2032. // Set <0 to 0
  2033. XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
  2034. // Set>1 to 1
  2035. return _mm_min_ps(vResult,g_XMOne);
  2036. #else // _XM_VMX128_INTRINSICS_
  2037. #endif // _XM_VMX128_INTRINSICS_
  2038. }
  2039. //------------------------------------------------------------------------------
  2040. // Bitwise logical operations
  2041. //------------------------------------------------------------------------------
  2042. XMFINLINE XMVECTOR XMVectorAndInt
  2043. (
  2044. FXMVECTOR V1,
  2045. FXMVECTOR V2
  2046. )
  2047. {
  2048. #if defined(_XM_NO_INTRINSICS_)
  2049. XMVECTOR Result;
  2050. Result.vector4_u32[0] = V1.vector4_u32[0] & V2.vector4_u32[0];
  2051. Result.vector4_u32[1] = V1.vector4_u32[1] & V2.vector4_u32[1];
  2052. Result.vector4_u32[2] = V1.vector4_u32[2] & V2.vector4_u32[2];
  2053. Result.vector4_u32[3] = V1.vector4_u32[3] & V2.vector4_u32[3];
  2054. return Result;
  2055. #elif defined(_XM_SSE_INTRINSICS_)
  2056. return _mm_and_ps(V1,V2);
  2057. #else // _XM_VMX128_INTRINSICS_
  2058. #endif // _XM_VMX128_INTRINSICS_
  2059. }
  2060. //------------------------------------------------------------------------------
  2061. XMFINLINE XMVECTOR XMVectorAndCInt
  2062. (
  2063. FXMVECTOR V1,
  2064. FXMVECTOR V2
  2065. )
  2066. {
  2067. #if defined(_XM_NO_INTRINSICS_)
  2068. XMVECTOR Result;
  2069. Result.vector4_u32[0] = V1.vector4_u32[0] & ~V2.vector4_u32[0];
  2070. Result.vector4_u32[1] = V1.vector4_u32[1] & ~V2.vector4_u32[1];
  2071. Result.vector4_u32[2] = V1.vector4_u32[2] & ~V2.vector4_u32[2];
  2072. Result.vector4_u32[3] = V1.vector4_u32[3] & ~V2.vector4_u32[3];
  2073. return Result;
  2074. #elif defined(_XM_SSE_INTRINSICS_)
  2075. __m128i V = _mm_andnot_si128( reinterpret_cast<const __m128i *>(&V2)[0], reinterpret_cast<const __m128i *>(&V1)[0] );
  2076. return reinterpret_cast<__m128 *>(&V)[0];
  2077. #else // _XM_VMX128_INTRINSICS_
  2078. #endif // _XM_VMX128_INTRINSICS_
  2079. }
  2080. //------------------------------------------------------------------------------
  2081. XMFINLINE XMVECTOR XMVectorOrInt
  2082. (
  2083. FXMVECTOR V1,
  2084. FXMVECTOR V2
  2085. )
  2086. {
  2087. #if defined(_XM_NO_INTRINSICS_)
  2088. XMVECTOR Result;
  2089. Result.vector4_u32[0] = V1.vector4_u32[0] | V2.vector4_u32[0];
  2090. Result.vector4_u32[1] = V1.vector4_u32[1] | V2.vector4_u32[1];
  2091. Result.vector4_u32[2] = V1.vector4_u32[2] | V2.vector4_u32[2];
  2092. Result.vector4_u32[3] = V1.vector4_u32[3] | V2.vector4_u32[3];
  2093. return Result;
  2094. #elif defined(_XM_SSE_INTRINSICS_)
  2095. __m128i V = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
  2096. return reinterpret_cast<__m128 *>(&V)[0];
  2097. #else // _XM_VMX128_INTRINSICS_
  2098. #endif // _XM_VMX128_INTRINSICS_
  2099. }
  2100. //------------------------------------------------------------------------------
  2101. XMFINLINE XMVECTOR XMVectorNorInt
  2102. (
  2103. FXMVECTOR V1,
  2104. FXMVECTOR V2
  2105. )
  2106. {
  2107. #if defined(_XM_NO_INTRINSICS_)
  2108. XMVECTOR Result;
  2109. Result.vector4_u32[0] = ~(V1.vector4_u32[0] | V2.vector4_u32[0]);
  2110. Result.vector4_u32[1] = ~(V1.vector4_u32[1] | V2.vector4_u32[1]);
  2111. Result.vector4_u32[2] = ~(V1.vector4_u32[2] | V2.vector4_u32[2]);
  2112. Result.vector4_u32[3] = ~(V1.vector4_u32[3] | V2.vector4_u32[3]);
  2113. return Result;
  2114. #elif defined(_XM_SSE_INTRINSICS_)
  2115. __m128i Result;
  2116. Result = _mm_or_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
  2117. Result = _mm_andnot_si128( Result,g_XMNegOneMask);
  2118. return reinterpret_cast<__m128 *>(&Result)[0];
  2119. #else // _XM_VMX128_INTRINSICS_
  2120. #endif // _XM_VMX128_INTRINSICS_
  2121. }
  2122. //------------------------------------------------------------------------------
  2123. XMFINLINE XMVECTOR XMVectorXorInt
  2124. (
  2125. FXMVECTOR V1,
  2126. FXMVECTOR V2
  2127. )
  2128. {
  2129. #if defined(_XM_NO_INTRINSICS_)
  2130. XMVECTOR Result;
  2131. Result.vector4_u32[0] = V1.vector4_u32[0] ^ V2.vector4_u32[0];
  2132. Result.vector4_u32[1] = V1.vector4_u32[1] ^ V2.vector4_u32[1];
  2133. Result.vector4_u32[2] = V1.vector4_u32[2] ^ V2.vector4_u32[2];
  2134. Result.vector4_u32[3] = V1.vector4_u32[3] ^ V2.vector4_u32[3];
  2135. return Result;
  2136. #elif defined(_XM_SSE_INTRINSICS_)
  2137. __m128i V = _mm_xor_si128( reinterpret_cast<const __m128i *>(&V1)[0], reinterpret_cast<const __m128i *>(&V2)[0] );
  2138. return reinterpret_cast<__m128 *>(&V)[0];
  2139. #else // _XM_VMX128_INTRINSICS_
  2140. #endif // _XM_VMX128_INTRINSICS_
  2141. }
  2142. //------------------------------------------------------------------------------
  2143. // Computation operations
  2144. //------------------------------------------------------------------------------
  2145. //------------------------------------------------------------------------------
  2146. XMFINLINE XMVECTOR XMVectorNegate
  2147. (
  2148. FXMVECTOR V
  2149. )
  2150. {
  2151. #if defined(_XM_NO_INTRINSICS_)
  2152. XMVECTOR Result;
  2153. Result.vector4_f32[0] = -V.vector4_f32[0];
  2154. Result.vector4_f32[1] = -V.vector4_f32[1];
  2155. Result.vector4_f32[2] = -V.vector4_f32[2];
  2156. Result.vector4_f32[3] = -V.vector4_f32[3];
  2157. return Result;
  2158. #elif defined(_XM_SSE_INTRINSICS_)
  2159. XMVECTOR Z;
  2160. Z = _mm_setzero_ps();
  2161. return _mm_sub_ps( Z, V );
  2162. #else // _XM_VMX128_INTRINSICS_
  2163. #endif // _XM_VMX128_INTRINSICS_
  2164. }
  2165. //------------------------------------------------------------------------------
  2166. XMFINLINE XMVECTOR XMVectorAdd
  2167. (
  2168. FXMVECTOR V1,
  2169. FXMVECTOR V2
  2170. )
  2171. {
  2172. #if defined(_XM_NO_INTRINSICS_)
  2173. XMVECTOR Result;
  2174. Result.vector4_f32[0] = V1.vector4_f32[0] + V2.vector4_f32[0];
  2175. Result.vector4_f32[1] = V1.vector4_f32[1] + V2.vector4_f32[1];
  2176. Result.vector4_f32[2] = V1.vector4_f32[2] + V2.vector4_f32[2];
  2177. Result.vector4_f32[3] = V1.vector4_f32[3] + V2.vector4_f32[3];
  2178. return Result;
  2179. #elif defined(_XM_SSE_INTRINSICS_)
  2180. return _mm_add_ps( V1, V2 );
  2181. #else // _XM_VMX128_INTRINSICS_
  2182. #endif // _XM_VMX128_INTRINSICS_
  2183. }
  2184. //------------------------------------------------------------------------------
  2185. XMFINLINE XMVECTOR XMVectorAddAngles
  2186. (
  2187. FXMVECTOR V1,
  2188. FXMVECTOR V2
  2189. )
  2190. {
  2191. #if defined(_XM_NO_INTRINSICS_)
  2192. XMVECTOR Mask;
  2193. XMVECTOR Offset;
  2194. XMVECTOR Result;
  2195. CONST XMVECTOR Zero = XMVectorZero();
  2196. // Add the given angles together. If the range of V1 is such
  2197. // that -Pi <= V1 < Pi and the range of V2 is such that
  2198. // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
  2199. // will be -Pi <= Result < Pi.
  2200. Result = XMVectorAdd(V1, V2);
  2201. Mask = XMVectorLess(Result, g_XMNegativePi.v);
  2202. Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
  2203. Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
  2204. Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
  2205. Result = XMVectorAdd(Result, Offset);
  2206. return Result;
  2207. #elif defined(_XM_SSE_INTRINSICS_)
  2208. // Adjust the angles
  2209. XMVECTOR vResult = _mm_add_ps(V1,V2);
  2210. // Less than Pi?
  2211. XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
  2212. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2213. // Add 2Pi to all entries less than -Pi
  2214. vResult = _mm_add_ps(vResult,vOffset);
  2215. // Greater than or equal to Pi?
  2216. vOffset = _mm_cmpge_ps(vResult,g_XMPi);
  2217. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2218. // Sub 2Pi to all entries greater than Pi
  2219. vResult = _mm_sub_ps(vResult,vOffset);
  2220. return vResult;
  2221. #else // _XM_VMX128_INTRINSICS_
  2222. #endif // _XM_VMX128_INTRINSICS_
  2223. }
  2224. //------------------------------------------------------------------------------
  2225. XMFINLINE XMVECTOR XMVectorSubtract
  2226. (
  2227. FXMVECTOR V1,
  2228. FXMVECTOR V2
  2229. )
  2230. {
  2231. #if defined(_XM_NO_INTRINSICS_)
  2232. XMVECTOR Result;
  2233. Result.vector4_f32[0] = V1.vector4_f32[0] - V2.vector4_f32[0];
  2234. Result.vector4_f32[1] = V1.vector4_f32[1] - V2.vector4_f32[1];
  2235. Result.vector4_f32[2] = V1.vector4_f32[2] - V2.vector4_f32[2];
  2236. Result.vector4_f32[3] = V1.vector4_f32[3] - V2.vector4_f32[3];
  2237. return Result;
  2238. #elif defined(_XM_SSE_INTRINSICS_)
  2239. return _mm_sub_ps( V1, V2 );
  2240. #else // _XM_VMX128_INTRINSICS_
  2241. #endif // _XM_VMX128_INTRINSICS_
  2242. }
  2243. //------------------------------------------------------------------------------
  2244. XMFINLINE XMVECTOR XMVectorSubtractAngles
  2245. (
  2246. FXMVECTOR V1,
  2247. FXMVECTOR V2
  2248. )
  2249. {
  2250. #if defined(_XM_NO_INTRINSICS_)
  2251. XMVECTOR Mask;
  2252. XMVECTOR Offset;
  2253. XMVECTOR Result;
  2254. CONST XMVECTOR Zero = XMVectorZero();
  2255. // Subtract the given angles. If the range of V1 is such
  2256. // that -Pi <= V1 < Pi and the range of V2 is such that
  2257. // -2Pi <= V2 <= 2Pi, then the range of the resulting angle
  2258. // will be -Pi <= Result < Pi.
  2259. Result = XMVectorSubtract(V1, V2);
  2260. Mask = XMVectorLess(Result, g_XMNegativePi.v);
  2261. Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask);
  2262. Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v);
  2263. Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask);
  2264. Result = XMVectorAdd(Result, Offset);
  2265. return Result;
  2266. #elif defined(_XM_SSE_INTRINSICS_)
  2267. // Adjust the angles
  2268. XMVECTOR vResult = _mm_sub_ps(V1,V2);
  2269. // Less than Pi?
  2270. XMVECTOR vOffset = _mm_cmplt_ps(vResult,g_XMNegativePi);
  2271. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2272. // Add 2Pi to all entries less than -Pi
  2273. vResult = _mm_add_ps(vResult,vOffset);
  2274. // Greater than or equal to Pi?
  2275. vOffset = _mm_cmpge_ps(vResult,g_XMPi);
  2276. vOffset = _mm_and_ps(vOffset,g_XMTwoPi);
  2277. // Sub 2Pi to all entries greater than Pi
  2278. vResult = _mm_sub_ps(vResult,vOffset);
  2279. return vResult;
  2280. #else // _XM_VMX128_INTRINSICS_
  2281. #endif // _XM_VMX128_INTRINSICS_
  2282. }
  2283. //------------------------------------------------------------------------------
  2284. XMFINLINE XMVECTOR XMVectorMultiply
  2285. (
  2286. FXMVECTOR V1,
  2287. FXMVECTOR V2
  2288. )
  2289. {
  2290. #if defined(_XM_NO_INTRINSICS_)
  2291. XMVECTOR Result = {
  2292. V1.vector4_f32[0] * V2.vector4_f32[0],
  2293. V1.vector4_f32[1] * V2.vector4_f32[1],
  2294. V1.vector4_f32[2] * V2.vector4_f32[2],
  2295. V1.vector4_f32[3] * V2.vector4_f32[3]
  2296. };
  2297. return Result;
  2298. #elif defined(_XM_SSE_INTRINSICS_)
  2299. return _mm_mul_ps( V1, V2 );
  2300. #else // _XM_VMX128_INTRINSICS_
  2301. #endif // _XM_VMX128_INTRINSICS_
  2302. }
  2303. //------------------------------------------------------------------------------
  2304. XMFINLINE XMVECTOR XMVectorMultiplyAdd
  2305. (
  2306. FXMVECTOR V1,
  2307. FXMVECTOR V2,
  2308. FXMVECTOR V3
  2309. )
  2310. {
  2311. #if defined(_XM_NO_INTRINSICS_)
  2312. XMVECTOR vResult = {
  2313. (V1.vector4_f32[0] * V2.vector4_f32[0]) + V3.vector4_f32[0],
  2314. (V1.vector4_f32[1] * V2.vector4_f32[1]) + V3.vector4_f32[1],
  2315. (V1.vector4_f32[2] * V2.vector4_f32[2]) + V3.vector4_f32[2],
  2316. (V1.vector4_f32[3] * V2.vector4_f32[3]) + V3.vector4_f32[3]
  2317. };
  2318. return vResult;
  2319. #elif defined(_XM_SSE_INTRINSICS_)
  2320. XMVECTOR vResult = _mm_mul_ps( V1, V2 );
  2321. return _mm_add_ps(vResult, V3 );
  2322. #else // _XM_VMX128_INTRINSICS_
  2323. #endif // _XM_VMX128_INTRINSICS_
  2324. }
  2325. //------------------------------------------------------------------------------
  2326. XMFINLINE XMVECTOR XMVectorDivide
  2327. (
  2328. FXMVECTOR V1,
  2329. FXMVECTOR V2
  2330. )
  2331. {
  2332. #if defined(_XM_NO_INTRINSICS_)
  2333. XMVECTOR Result;
  2334. Result.vector4_f32[0] = V1.vector4_f32[0] / V2.vector4_f32[0];
  2335. Result.vector4_f32[1] = V1.vector4_f32[1] / V2.vector4_f32[1];
  2336. Result.vector4_f32[2] = V1.vector4_f32[2] / V2.vector4_f32[2];
  2337. Result.vector4_f32[3] = V1.vector4_f32[3] / V2.vector4_f32[3];
  2338. return Result;
  2339. #elif defined(_XM_SSE_INTRINSICS_)
  2340. return _mm_div_ps( V1, V2 );
  2341. #else // _XM_VMX128_INTRINSICS_
  2342. #endif // _XM_VMX128_INTRINSICS_
  2343. }
  2344. //------------------------------------------------------------------------------
  2345. XMFINLINE XMVECTOR XMVectorNegativeMultiplySubtract
  2346. (
  2347. FXMVECTOR V1,
  2348. FXMVECTOR V2,
  2349. FXMVECTOR V3
  2350. )
  2351. {
  2352. #if defined(_XM_NO_INTRINSICS_)
  2353. XMVECTOR vResult = {
  2354. V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]),
  2355. V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]),
  2356. V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]),
  2357. V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])
  2358. };
  2359. return vResult;
  2360. #elif defined(_XM_SSE_INTRINSICS_)
  2361. XMVECTOR R = _mm_mul_ps( V1, V2 );
  2362. return _mm_sub_ps( V3, R );
  2363. #else // _XM_VMX128_INTRINSICS_
  2364. #endif // _XM_VMX128_INTRINSICS_
  2365. }
  2366. //------------------------------------------------------------------------------
  2367. XMFINLINE XMVECTOR XMVectorScale
  2368. (
  2369. FXMVECTOR V,
  2370. FLOAT ScaleFactor
  2371. )
  2372. {
  2373. #if defined(_XM_NO_INTRINSICS_)
  2374. XMVECTOR vResult = {
  2375. V.vector4_f32[0] * ScaleFactor,
  2376. V.vector4_f32[1] * ScaleFactor,
  2377. V.vector4_f32[2] * ScaleFactor,
  2378. V.vector4_f32[3] * ScaleFactor
  2379. };
  2380. return vResult;
  2381. #elif defined(_XM_SSE_INTRINSICS_)
  2382. XMVECTOR vResult = _mm_set_ps1(ScaleFactor);
  2383. return _mm_mul_ps(vResult,V);
  2384. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  2385. #endif // _XM_VMX128_INTRINSICS_
  2386. }
  2387. //------------------------------------------------------------------------------
  2388. XMFINLINE XMVECTOR XMVectorReciprocalEst
  2389. (
  2390. FXMVECTOR V
  2391. )
  2392. {
  2393. #if defined(_XM_NO_INTRINSICS_)
  2394. XMVECTOR Result;
  2395. UINT i;
  2396. // Avoid C4701
  2397. Result.vector4_f32[0] = 0.0f;
  2398. for (i = 0; i < 4; i++)
  2399. {
  2400. if (XMISNAN(V.vector4_f32[i]))
  2401. {
  2402. Result.vector4_u32[i] = 0x7FC00000;
  2403. }
  2404. else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f)
  2405. {
  2406. Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000);
  2407. }
  2408. else
  2409. {
  2410. Result.vector4_f32[i] = 1.f / V.vector4_f32[i];
  2411. }
  2412. }
  2413. return Result;
  2414. #elif defined(_XM_SSE_INTRINSICS_)
  2415. return _mm_rcp_ps(V);
  2416. #else // _XM_VMX128_INTRINSICS_
  2417. #endif // _XM_VMX128_INTRINSICS_
  2418. }
  2419. //------------------------------------------------------------------------------
  2420. XMFINLINE XMVECTOR XMVectorReciprocal
  2421. (
  2422. FXMVECTOR V
  2423. )
  2424. {
  2425. #if defined(_XM_NO_INTRINSICS_)
  2426. return XMVectorReciprocalEst(V);
  2427. #elif defined(_XM_SSE_INTRINSICS_)
  2428. return _mm_div_ps(g_XMOne,V);
  2429. #else // _XM_VMX128_INTRINSICS_
  2430. #endif // _XM_VMX128_INTRINSICS_
  2431. }
  2432. //------------------------------------------------------------------------------
  2433. // Return an estimated square root
  2434. XMFINLINE XMVECTOR XMVectorSqrtEst
  2435. (
  2436. FXMVECTOR V
  2437. )
  2438. {
  2439. #if defined(_XM_NO_INTRINSICS_)
  2440. XMVECTOR Select;
  2441. // if (x == +Infinity) sqrt(x) = +Infinity
  2442. // if (x == +0.0f) sqrt(x) = +0.0f
  2443. // if (x == -0.0f) sqrt(x) = -0.0f
  2444. // if (x < 0.0f) sqrt(x) = QNaN
  2445. XMVECTOR Result = XMVectorReciprocalSqrtEst(V);
  2446. XMVECTOR Zero = XMVectorZero();
  2447. XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
  2448. XMVECTOR VEqualsZero = XMVectorEqual(V, Zero);
  2449. Result = XMVectorMultiply(V, Result);
  2450. Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
  2451. Result = XMVectorSelect(V, Result, Select);
  2452. return Result;
  2453. #elif defined(_XM_SSE_INTRINSICS_)
  2454. return _mm_sqrt_ps(V);
  2455. #else // _XM_VMX128_INTRINSICS_
  2456. #endif // _XM_VMX128_INTRINSICS_
  2457. }
  2458. //------------------------------------------------------------------------------
  2459. XMFINLINE XMVECTOR XMVectorSqrt
  2460. (
  2461. FXMVECTOR V
  2462. )
  2463. {
  2464. #if defined(_XM_NO_INTRINSICS_)
  2465. XMVECTOR Zero;
  2466. XMVECTOR VEqualsInfinity, VEqualsZero;
  2467. XMVECTOR Select;
  2468. XMVECTOR Result;
  2469. // if (x == +Infinity) sqrt(x) = +Infinity
  2470. // if (x == +0.0f) sqrt(x) = +0.0f
  2471. // if (x == -0.0f) sqrt(x) = -0.0f
  2472. // if (x < 0.0f) sqrt(x) = QNaN
  2473. Result = XMVectorReciprocalSqrt(V);
  2474. Zero = XMVectorZero();
  2475. VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v);
  2476. VEqualsZero = XMVectorEqual(V, Zero);
  2477. Result = XMVectorMultiply(V, Result);
  2478. Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero);
  2479. Result = XMVectorSelect(V, Result, Select);
  2480. return Result;
  2481. #elif defined(_XM_SSE_INTRINSICS_)
  2482. return _mm_sqrt_ps(V);
  2483. #else // _XM_VMX128_INTRINSICS_
  2484. #endif // _XM_VMX128_INTRINSICS_
  2485. }
  2486. //------------------------------------------------------------------------------
  2487. XMFINLINE XMVECTOR XMVectorReciprocalSqrtEst
  2488. (
  2489. FXMVECTOR V
  2490. )
  2491. {
  2492. #if defined(_XM_NO_INTRINSICS_)
  2493. // if (x == +Infinity) rsqrt(x) = 0
  2494. // if (x == +0.0f) rsqrt(x) = +Infinity
  2495. // if (x == -0.0f) rsqrt(x) = -Infinity
  2496. // if (x < 0.0f) rsqrt(x) = QNaN
  2497. XMVECTOR Result;
  2498. UINT i;
  2499. // Avoid C4701
  2500. Result.vector4_f32[0] = 0.0f;
  2501. for (i = 0; i < 4; i++)
  2502. {
  2503. if (XMISNAN(V.vector4_f32[i]))
  2504. {
  2505. Result.vector4_u32[i] = 0x7FC00000;
  2506. }
  2507. else if (V.vector4_f32[i] == 0.0f || V.vector4_f32[i] == -0.0f)
  2508. {
  2509. Result.vector4_u32[i] = 0x7F800000 | (V.vector4_u32[i] & 0x80000000);
  2510. }
  2511. else if (V.vector4_f32[i] < 0.0f)
  2512. {
  2513. Result.vector4_u32[i] = 0x7FFFFFFF;
  2514. }
  2515. else if (XMISINF(V.vector4_f32[i]))
  2516. {
  2517. Result.vector4_f32[i] = 0.0f;
  2518. }
  2519. else
  2520. {
  2521. Result.vector4_f32[i] = 1.0f / sqrtf(V.vector4_f32[i]);
  2522. }
  2523. }
  2524. return Result;
  2525. #elif defined(_XM_SSE_INTRINSICS_)
  2526. return _mm_rsqrt_ps(V);
  2527. #else // _XM_VMX128_INTRINSICS_
  2528. #endif // _XM_VMX128_INTRINSICS_
  2529. }
  2530. //------------------------------------------------------------------------------
  2531. XMFINLINE XMVECTOR XMVectorReciprocalSqrt
  2532. (
  2533. FXMVECTOR V
  2534. )
  2535. {
  2536. #if defined(_XM_NO_INTRINSICS_)
  2537. return XMVectorReciprocalSqrtEst(V);
  2538. #elif defined(_XM_SSE_INTRINSICS_)
  2539. XMVECTOR vResult = _mm_sqrt_ps(V);
  2540. vResult = _mm_div_ps(g_XMOne,vResult);
  2541. return vResult;
  2542. #else // _XM_VMX128_INTRINSICS_
  2543. #endif // _XM_VMX128_INTRINSICS_
  2544. }
  2545. //------------------------------------------------------------------------------
  2546. XMFINLINE XMVECTOR XMVectorExpEst
  2547. (
  2548. FXMVECTOR V
  2549. )
  2550. {
  2551. #if defined(_XM_NO_INTRINSICS_)
  2552. XMVECTOR Result;
  2553. Result.vector4_f32[0] = powf(2.0f, V.vector4_f32[0]);
  2554. Result.vector4_f32[1] = powf(2.0f, V.vector4_f32[1]);
  2555. Result.vector4_f32[2] = powf(2.0f, V.vector4_f32[2]);
  2556. Result.vector4_f32[3] = powf(2.0f, V.vector4_f32[3]);
  2557. return Result;
  2558. #elif defined(_XM_SSE_INTRINSICS_)
  2559. XMVECTOR vResult = _mm_setr_ps(
  2560. powf(2.0f,XMVectorGetX(V)),
  2561. powf(2.0f,XMVectorGetY(V)),
  2562. powf(2.0f,XMVectorGetZ(V)),
  2563. powf(2.0f,XMVectorGetW(V)));
  2564. return vResult;
  2565. #else // _XM_VMX128_INTRINSICS_
  2566. #endif // _XM_VMX128_INTRINSICS_
  2567. }
  2568. //------------------------------------------------------------------------------
  2569. XMINLINE XMVECTOR XMVectorExp
  2570. (
  2571. FXMVECTOR V
  2572. )
  2573. {
  2574. #if defined(_XM_NO_INTRINSICS_)
  2575. XMVECTOR E, S;
  2576. XMVECTOR R, R2, R3, R4;
  2577. XMVECTOR V0, V1;
  2578. XMVECTOR C0X, C0Y, C0Z, C0W;
  2579. XMVECTOR C1X, C1Y, C1Z, C1W;
  2580. XMVECTOR Result;
  2581. static CONST XMVECTOR C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
  2582. static CONST XMVECTOR C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
  2583. R = XMVectorFloor(V);
  2584. E = XMVectorExpEst(R);
  2585. R = XMVectorSubtract(V, R);
  2586. R2 = XMVectorMultiply(R, R);
  2587. R3 = XMVectorMultiply(R, R2);
  2588. R4 = XMVectorMultiply(R2, R2);
  2589. C0X = XMVectorSplatX(C0);
  2590. C0Y = XMVectorSplatY(C0);
  2591. C0Z = XMVectorSplatZ(C0);
  2592. C0W = XMVectorSplatW(C0);
  2593. C1X = XMVectorSplatX(C1);
  2594. C1Y = XMVectorSplatY(C1);
  2595. C1Z = XMVectorSplatZ(C1);
  2596. C1W = XMVectorSplatW(C1);
  2597. V0 = XMVectorMultiplyAdd(R, C0Y, C0X);
  2598. V0 = XMVectorMultiplyAdd(R2, C0Z, V0);
  2599. V0 = XMVectorMultiplyAdd(R3, C0W, V0);
  2600. V1 = XMVectorMultiplyAdd(R, C1Y, C1X);
  2601. V1 = XMVectorMultiplyAdd(R2, C1Z, V1);
  2602. V1 = XMVectorMultiplyAdd(R3, C1W, V1);
  2603. S = XMVectorMultiplyAdd(R4, V1, V0);
  2604. S = XMVectorReciprocal(S);
  2605. Result = XMVectorMultiply(E, S);
  2606. return Result;
  2607. #elif defined(_XM_SSE_INTRINSICS_)
  2608. static CONST XMVECTORF32 C0 = {1.0f, -6.93147182e-1f, 2.40226462e-1f, -5.55036440e-2f};
  2609. static CONST XMVECTORF32 C1 = {9.61597636e-3f, -1.32823968e-3f, 1.47491097e-4f, -1.08635004e-5f};
  2610. // Get the integer of the input
  2611. XMVECTOR R = XMVectorFloor(V);
  2612. // Get the exponent estimate
  2613. XMVECTOR E = XMVectorExpEst(R);
  2614. // Get the fractional only
  2615. R = _mm_sub_ps(V,R);
  2616. // Get R^2
  2617. XMVECTOR R2 = _mm_mul_ps(R,R);
  2618. // And R^3
  2619. XMVECTOR R3 = _mm_mul_ps(R,R2);
  2620. XMVECTOR V0 = _mm_load_ps1(&C0.f[1]);
  2621. V0 = _mm_mul_ps(V0,R);
  2622. XMVECTOR vConstants = _mm_load_ps1(&C0.f[0]);
  2623. V0 = _mm_add_ps(V0,vConstants);
  2624. vConstants = _mm_load_ps1(&C0.f[2]);
  2625. vConstants = _mm_mul_ps(vConstants,R2);
  2626. V0 = _mm_add_ps(V0,vConstants);
  2627. vConstants = _mm_load_ps1(&C0.f[3]);
  2628. vConstants = _mm_mul_ps(vConstants,R3);
  2629. V0 = _mm_add_ps(V0,vConstants);
  2630. XMVECTOR V1 = _mm_load_ps1(&C1.f[1]);
  2631. V1 = _mm_mul_ps(V1,R);
  2632. vConstants = _mm_load_ps1(&C1.f[0]);
  2633. V1 = _mm_add_ps(V1,vConstants);
  2634. vConstants = _mm_load_ps1(&C1.f[2]);
  2635. vConstants = _mm_mul_ps(vConstants,R2);
  2636. V1 = _mm_add_ps(V1,vConstants);
  2637. vConstants = _mm_load_ps1(&C1.f[3]);
  2638. vConstants = _mm_mul_ps(vConstants,R3);
  2639. V1 = _mm_add_ps(V1,vConstants);
  2640. // R2 = R^4
  2641. R2 = _mm_mul_ps(R2,R2);
  2642. R2 = _mm_mul_ps(R2,V1);
  2643. R2 = _mm_add_ps(R2,V0);
  2644. E = _mm_div_ps(E,R2);
  2645. return E;
  2646. #else // _XM_VMX128_INTRINSICS_
  2647. #endif // _XM_VMX128_INTRINSICS_
  2648. }
  2649. //------------------------------------------------------------------------------
  2650. XMFINLINE XMVECTOR XMVectorLogEst
  2651. (
  2652. FXMVECTOR V
  2653. )
  2654. {
  2655. #if defined(_XM_NO_INTRINSICS_)
  2656. FLOAT fScale = (1.0f / logf(2.0f));
  2657. XMVECTOR Result;
  2658. Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
  2659. Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
  2660. Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
  2661. Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
  2662. return Result;
  2663. #elif defined(_XM_SSE_INTRINSICS_)
  2664. XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
  2665. XMVECTOR vResult = _mm_setr_ps(
  2666. logf(XMVectorGetX(V)),
  2667. logf(XMVectorGetY(V)),
  2668. logf(XMVectorGetZ(V)),
  2669. logf(XMVectorGetW(V)));
  2670. vResult = _mm_mul_ps(vResult,vScale);
  2671. return vResult;
  2672. #else // _XM_VMX128_INTRINSICS_
  2673. #endif // _XM_VMX128_INTRINSICS_
  2674. }
  2675. //------------------------------------------------------------------------------
  2676. XMINLINE XMVECTOR XMVectorLog
  2677. (
  2678. FXMVECTOR V
  2679. )
  2680. {
  2681. #if defined(_XM_NO_INTRINSICS_)
  2682. FLOAT fScale = (1.0f / logf(2.0f));
  2683. XMVECTOR Result;
  2684. Result.vector4_f32[0] = logf(V.vector4_f32[0])*fScale;
  2685. Result.vector4_f32[1] = logf(V.vector4_f32[1])*fScale;
  2686. Result.vector4_f32[2] = logf(V.vector4_f32[2])*fScale;
  2687. Result.vector4_f32[3] = logf(V.vector4_f32[3])*fScale;
  2688. return Result;
  2689. #elif defined(_XM_SSE_INTRINSICS_)
  2690. XMVECTOR vScale = _mm_set_ps1(1.0f / logf(2.0f));
  2691. XMVECTOR vResult = _mm_setr_ps(
  2692. logf(XMVectorGetX(V)),
  2693. logf(XMVectorGetY(V)),
  2694. logf(XMVectorGetZ(V)),
  2695. logf(XMVectorGetW(V)));
  2696. vResult = _mm_mul_ps(vResult,vScale);
  2697. return vResult;
  2698. #else // _XM_VMX128_INTRINSICS_
  2699. #endif // _XM_VMX128_INTRINSICS_
  2700. }
  2701. //------------------------------------------------------------------------------
  2702. XMFINLINE XMVECTOR XMVectorPowEst
  2703. (
  2704. FXMVECTOR V1,
  2705. FXMVECTOR V2
  2706. )
  2707. {
  2708. #if defined(_XM_NO_INTRINSICS_)
  2709. XMVECTOR Result;
  2710. Result.vector4_f32[0] = powf(V1.vector4_f32[0], V2.vector4_f32[0]);
  2711. Result.vector4_f32[1] = powf(V1.vector4_f32[1], V2.vector4_f32[1]);
  2712. Result.vector4_f32[2] = powf(V1.vector4_f32[2], V2.vector4_f32[2]);
  2713. Result.vector4_f32[3] = powf(V1.vector4_f32[3], V2.vector4_f32[3]);
  2714. return Result;
  2715. #elif defined(_XM_SSE_INTRINSICS_)
  2716. XMVECTOR vResult = _mm_setr_ps(
  2717. powf(XMVectorGetX(V1),XMVectorGetX(V2)),
  2718. powf(XMVectorGetY(V1),XMVectorGetY(V2)),
  2719. powf(XMVectorGetZ(V1),XMVectorGetZ(V2)),
  2720. powf(XMVectorGetW(V1),XMVectorGetW(V2)));
  2721. return vResult;
  2722. #else // _XM_VMX128_INTRINSICS_
  2723. #endif // _XM_VMX128_INTRINSICS_
  2724. }
  2725. //------------------------------------------------------------------------------
  2726. XMFINLINE XMVECTOR XMVectorPow
  2727. (
  2728. FXMVECTOR V1,
  2729. FXMVECTOR V2
  2730. )
  2731. {
  2732. #if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_)
  2733. return XMVectorPowEst(V1, V2);
  2734. #else // _XM_VMX128_INTRINSICS_
  2735. #endif // _XM_VMX128_INTRINSICS_
  2736. }
  2737. //------------------------------------------------------------------------------
  2738. XMFINLINE XMVECTOR XMVectorAbs
  2739. (
  2740. FXMVECTOR V
  2741. )
  2742. {
  2743. #if defined(_XM_NO_INTRINSICS_)
  2744. XMVECTOR vResult = {
  2745. fabsf(V.vector4_f32[0]),
  2746. fabsf(V.vector4_f32[1]),
  2747. fabsf(V.vector4_f32[2]),
  2748. fabsf(V.vector4_f32[3])
  2749. };
  2750. return vResult;
  2751. #elif defined(_XM_SSE_INTRINSICS_)
  2752. XMVECTOR vResult = _mm_setzero_ps();
  2753. vResult = _mm_sub_ps(vResult,V);
  2754. vResult = _mm_max_ps(vResult,V);
  2755. return vResult;
  2756. #else // _XM_VMX128_INTRINSICS_
  2757. #endif // _XM_VMX128_INTRINSICS_
  2758. }
  2759. //------------------------------------------------------------------------------
  2760. XMFINLINE XMVECTOR XMVectorMod
  2761. (
  2762. FXMVECTOR V1,
  2763. FXMVECTOR V2
  2764. )
  2765. {
  2766. #if defined(_XM_NO_INTRINSICS_)
  2767. XMVECTOR Reciprocal;
  2768. XMVECTOR Quotient;
  2769. XMVECTOR Result;
  2770. // V1 % V2 = V1 - V2 * truncate(V1 / V2)
  2771. Reciprocal = XMVectorReciprocal(V2);
  2772. Quotient = XMVectorMultiply(V1, Reciprocal);
  2773. Quotient = XMVectorTruncate(Quotient);
  2774. Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1);
  2775. return Result;
  2776. #elif defined(_XM_SSE_INTRINSICS_)
  2777. XMVECTOR vResult = _mm_div_ps(V1, V2);
  2778. vResult = XMVectorTruncate(vResult);
  2779. vResult = _mm_mul_ps(vResult,V2);
  2780. vResult = _mm_sub_ps(V1,vResult);
  2781. return vResult;
  2782. #else // _XM_VMX128_INTRINSICS_
  2783. #endif // _XM_VMX128_INTRINSICS_
  2784. }
  2785. //------------------------------------------------------------------------------
  2786. XMFINLINE XMVECTOR XMVectorModAngles
  2787. (
  2788. FXMVECTOR Angles
  2789. )
  2790. {
  2791. #if defined(_XM_NO_INTRINSICS_)
  2792. XMVECTOR V;
  2793. XMVECTOR Result;
  2794. // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
  2795. V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v);
  2796. V = XMVectorRound(V);
  2797. Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles);
  2798. return Result;
  2799. #elif defined(_XM_SSE_INTRINSICS_)
  2800. // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI
  2801. XMVECTOR vResult = _mm_mul_ps(Angles,g_XMReciprocalTwoPi);
  2802. // Use the inline function due to complexity for rounding
  2803. vResult = XMVectorRound(vResult);
  2804. vResult = _mm_mul_ps(vResult,g_XMTwoPi);
  2805. vResult = _mm_sub_ps(Angles,vResult);
  2806. return vResult;
  2807. #else // _XM_VMX128_INTRINSICS_
  2808. #endif // _XM_VMX128_INTRINSICS_
  2809. }
  2810. //------------------------------------------------------------------------------
  2811. XMINLINE XMVECTOR XMVectorSin
  2812. (
  2813. FXMVECTOR V
  2814. )
  2815. {
  2816. #if defined(_XM_NO_INTRINSICS_)
  2817. XMVECTOR V1, V2, V3, V5, V7, V9, V11, V13, V15, V17, V19, V21, V23;
  2818. XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
  2819. XMVECTOR Result;
  2820. V1 = XMVectorModAngles(V);
  2821. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
  2822. // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  2823. V2 = XMVectorMultiply(V1, V1);
  2824. V3 = XMVectorMultiply(V2, V1);
  2825. V5 = XMVectorMultiply(V3, V2);
  2826. V7 = XMVectorMultiply(V5, V2);
  2827. V9 = XMVectorMultiply(V7, V2);
  2828. V11 = XMVectorMultiply(V9, V2);
  2829. V13 = XMVectorMultiply(V11, V2);
  2830. V15 = XMVectorMultiply(V13, V2);
  2831. V17 = XMVectorMultiply(V15, V2);
  2832. V19 = XMVectorMultiply(V17, V2);
  2833. V21 = XMVectorMultiply(V19, V2);
  2834. V23 = XMVectorMultiply(V21, V2);
  2835. S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
  2836. S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
  2837. S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
  2838. S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
  2839. S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
  2840. S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
  2841. S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
  2842. S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
  2843. S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
  2844. S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
  2845. S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
  2846. Result = XMVectorMultiplyAdd(S1, V3, V1);
  2847. Result = XMVectorMultiplyAdd(S2, V5, Result);
  2848. Result = XMVectorMultiplyAdd(S3, V7, Result);
  2849. Result = XMVectorMultiplyAdd(S4, V9, Result);
  2850. Result = XMVectorMultiplyAdd(S5, V11, Result);
  2851. Result = XMVectorMultiplyAdd(S6, V13, Result);
  2852. Result = XMVectorMultiplyAdd(S7, V15, Result);
  2853. Result = XMVectorMultiplyAdd(S8, V17, Result);
  2854. Result = XMVectorMultiplyAdd(S9, V19, Result);
  2855. Result = XMVectorMultiplyAdd(S10, V21, Result);
  2856. Result = XMVectorMultiplyAdd(S11, V23, Result);
  2857. return Result;
  2858. #elif defined(_XM_SSE_INTRINSICS_)
  2859. // Force the value within the bounds of pi
  2860. XMVECTOR vResult = XMVectorModAngles(V);
  2861. // Each on is V to the "num" power
  2862. // V2 = V1^2
  2863. XMVECTOR V2 = _mm_mul_ps(vResult,vResult);
  2864. // V1^3
  2865. XMVECTOR vPower = _mm_mul_ps(vResult,V2);
  2866. XMVECTOR vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
  2867. vConstants = _mm_mul_ps(vConstants,vPower);
  2868. vResult = _mm_add_ps(vResult,vConstants);
  2869. // V^5
  2870. vPower = _mm_mul_ps(vPower,V2);
  2871. vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
  2872. vConstants = _mm_mul_ps(vConstants,vPower);
  2873. vResult = _mm_add_ps(vResult,vConstants);
  2874. // V^7
  2875. vPower = _mm_mul_ps(vPower,V2);
  2876. vConstants = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
  2877. vConstants = _mm_mul_ps(vConstants,vPower);
  2878. vResult = _mm_add_ps(vResult,vConstants);
  2879. // V^9
  2880. vPower = _mm_mul_ps(vPower,V2);
  2881. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
  2882. vConstants = _mm_mul_ps(vConstants,vPower);
  2883. vResult = _mm_add_ps(vResult,vConstants);
  2884. // V^11
  2885. vPower = _mm_mul_ps(vPower,V2);
  2886. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
  2887. vConstants = _mm_mul_ps(vConstants,vPower);
  2888. vResult = _mm_add_ps(vResult,vConstants);
  2889. // V^13
  2890. vPower = _mm_mul_ps(vPower,V2);
  2891. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
  2892. vConstants = _mm_mul_ps(vConstants,vPower);
  2893. vResult = _mm_add_ps(vResult,vConstants);
  2894. // V^15
  2895. vPower = _mm_mul_ps(vPower,V2);
  2896. vConstants = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
  2897. vConstants = _mm_mul_ps(vConstants,vPower);
  2898. vResult = _mm_add_ps(vResult,vConstants);
  2899. // V^17
  2900. vPower = _mm_mul_ps(vPower,V2);
  2901. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
  2902. vConstants = _mm_mul_ps(vConstants,vPower);
  2903. vResult = _mm_add_ps(vResult,vConstants);
  2904. // V^19
  2905. vPower = _mm_mul_ps(vPower,V2);
  2906. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
  2907. vConstants = _mm_mul_ps(vConstants,vPower);
  2908. vResult = _mm_add_ps(vResult,vConstants);
  2909. // V^21
  2910. vPower = _mm_mul_ps(vPower,V2);
  2911. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
  2912. vConstants = _mm_mul_ps(vConstants,vPower);
  2913. vResult = _mm_add_ps(vResult,vConstants);
  2914. // V^23
  2915. vPower = _mm_mul_ps(vPower,V2);
  2916. vConstants = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
  2917. vConstants = _mm_mul_ps(vConstants,vPower);
  2918. vResult = _mm_add_ps(vResult,vConstants);
  2919. return vResult;
  2920. #else // _XM_VMX128_INTRINSICS_
  2921. #endif // _XM_VMX128_INTRINSICS_
  2922. }
  2923. //------------------------------------------------------------------------------
  2924. XMINLINE XMVECTOR XMVectorCos
  2925. (
  2926. FXMVECTOR V
  2927. )
  2928. {
  2929. #if defined(_XM_NO_INTRINSICS_)
  2930. XMVECTOR V1, V2, V4, V6, V8, V10, V12, V14, V16, V18, V20, V22;
  2931. XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  2932. XMVECTOR Result;
  2933. V1 = XMVectorModAngles(V);
  2934. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
  2935. // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  2936. V2 = XMVectorMultiply(V1, V1);
  2937. V4 = XMVectorMultiply(V2, V2);
  2938. V6 = XMVectorMultiply(V4, V2);
  2939. V8 = XMVectorMultiply(V4, V4);
  2940. V10 = XMVectorMultiply(V6, V4);
  2941. V12 = XMVectorMultiply(V6, V6);
  2942. V14 = XMVectorMultiply(V8, V6);
  2943. V16 = XMVectorMultiply(V8, V8);
  2944. V18 = XMVectorMultiply(V10, V8);
  2945. V20 = XMVectorMultiply(V10, V10);
  2946. V22 = XMVectorMultiply(V12, V10);
  2947. C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
  2948. C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
  2949. C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
  2950. C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
  2951. C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
  2952. C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
  2953. C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
  2954. C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
  2955. C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
  2956. C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
  2957. C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
  2958. Result = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
  2959. Result = XMVectorMultiplyAdd(C2, V4, Result);
  2960. Result = XMVectorMultiplyAdd(C3, V6, Result);
  2961. Result = XMVectorMultiplyAdd(C4, V8, Result);
  2962. Result = XMVectorMultiplyAdd(C5, V10, Result);
  2963. Result = XMVectorMultiplyAdd(C6, V12, Result);
  2964. Result = XMVectorMultiplyAdd(C7, V14, Result);
  2965. Result = XMVectorMultiplyAdd(C8, V16, Result);
  2966. Result = XMVectorMultiplyAdd(C9, V18, Result);
  2967. Result = XMVectorMultiplyAdd(C10, V20, Result);
  2968. Result = XMVectorMultiplyAdd(C11, V22, Result);
  2969. return Result;
  2970. #elif defined(_XM_SSE_INTRINSICS_)
  2971. // Force the value within the bounds of pi
  2972. XMVECTOR V2 = XMVectorModAngles(V);
  2973. // Each on is V to the "num" power
  2974. // V2 = V1^2
  2975. V2 = _mm_mul_ps(V2,V2);
  2976. // V^2
  2977. XMVECTOR vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
  2978. vConstants = _mm_mul_ps(vConstants,V2);
  2979. XMVECTOR vResult = _mm_add_ps(vConstants,g_XMOne);
  2980. // V^4
  2981. XMVECTOR vPower = _mm_mul_ps(V2,V2);
  2982. vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
  2983. vConstants = _mm_mul_ps(vConstants,vPower);
  2984. vResult = _mm_add_ps(vResult,vConstants);
  2985. // V^6
  2986. vPower = _mm_mul_ps(vPower,V2);
  2987. vConstants = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
  2988. vConstants = _mm_mul_ps(vConstants,vPower);
  2989. vResult = _mm_add_ps(vResult,vConstants);
  2990. // V^8
  2991. vPower = _mm_mul_ps(vPower,V2);
  2992. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
  2993. vConstants = _mm_mul_ps(vConstants,vPower);
  2994. vResult = _mm_add_ps(vResult,vConstants);
  2995. // V^10
  2996. vPower = _mm_mul_ps(vPower,V2);
  2997. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
  2998. vConstants = _mm_mul_ps(vConstants,vPower);
  2999. vResult = _mm_add_ps(vResult,vConstants);
  3000. // V^12
  3001. vPower = _mm_mul_ps(vPower,V2);
  3002. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
  3003. vConstants = _mm_mul_ps(vConstants,vPower);
  3004. vResult = _mm_add_ps(vResult,vConstants);
  3005. // V^14
  3006. vPower = _mm_mul_ps(vPower,V2);
  3007. vConstants = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
  3008. vConstants = _mm_mul_ps(vConstants,vPower);
  3009. vResult = _mm_add_ps(vResult,vConstants);
  3010. // V^16
  3011. vPower = _mm_mul_ps(vPower,V2);
  3012. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
  3013. vConstants = _mm_mul_ps(vConstants,vPower);
  3014. vResult = _mm_add_ps(vResult,vConstants);
  3015. // V^18
  3016. vPower = _mm_mul_ps(vPower,V2);
  3017. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
  3018. vConstants = _mm_mul_ps(vConstants,vPower);
  3019. vResult = _mm_add_ps(vResult,vConstants);
  3020. // V^20
  3021. vPower = _mm_mul_ps(vPower,V2);
  3022. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
  3023. vConstants = _mm_mul_ps(vConstants,vPower);
  3024. vResult = _mm_add_ps(vResult,vConstants);
  3025. // V^22
  3026. vPower = _mm_mul_ps(vPower,V2);
  3027. vConstants = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
  3028. vConstants = _mm_mul_ps(vConstants,vPower);
  3029. vResult = _mm_add_ps(vResult,vConstants);
  3030. return vResult;
  3031. #else // _XM_VMX128_INTRINSICS_
  3032. #endif // _XM_VMX128_INTRINSICS_
  3033. }
  3034. //------------------------------------------------------------------------------
  3035. XMINLINE VOID XMVectorSinCos
  3036. (
  3037. XMVECTOR* pSin,
  3038. XMVECTOR* pCos,
  3039. FXMVECTOR V
  3040. )
  3041. {
  3042. #if defined(_XM_NO_INTRINSICS_)
  3043. XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
  3044. XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
  3045. XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
  3046. XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3047. XMVECTOR Sin, Cos;
  3048. XMASSERT(pSin);
  3049. XMASSERT(pCos);
  3050. V1 = XMVectorModAngles(V);
  3051. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
  3052. // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  3053. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
  3054. // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  3055. V2 = XMVectorMultiply(V1, V1);
  3056. V3 = XMVectorMultiply(V2, V1);
  3057. V4 = XMVectorMultiply(V2, V2);
  3058. V5 = XMVectorMultiply(V3, V2);
  3059. V6 = XMVectorMultiply(V3, V3);
  3060. V7 = XMVectorMultiply(V4, V3);
  3061. V8 = XMVectorMultiply(V4, V4);
  3062. V9 = XMVectorMultiply(V5, V4);
  3063. V10 = XMVectorMultiply(V5, V5);
  3064. V11 = XMVectorMultiply(V6, V5);
  3065. V12 = XMVectorMultiply(V6, V6);
  3066. V13 = XMVectorMultiply(V7, V6);
  3067. V14 = XMVectorMultiply(V7, V7);
  3068. V15 = XMVectorMultiply(V8, V7);
  3069. V16 = XMVectorMultiply(V8, V8);
  3070. V17 = XMVectorMultiply(V9, V8);
  3071. V18 = XMVectorMultiply(V9, V9);
  3072. V19 = XMVectorMultiply(V10, V9);
  3073. V20 = XMVectorMultiply(V10, V10);
  3074. V21 = XMVectorMultiply(V11, V10);
  3075. V22 = XMVectorMultiply(V11, V11);
  3076. V23 = XMVectorMultiply(V12, V11);
  3077. S1 = XMVectorSplatY(g_XMSinCoefficients0.v);
  3078. S2 = XMVectorSplatZ(g_XMSinCoefficients0.v);
  3079. S3 = XMVectorSplatW(g_XMSinCoefficients0.v);
  3080. S4 = XMVectorSplatX(g_XMSinCoefficients1.v);
  3081. S5 = XMVectorSplatY(g_XMSinCoefficients1.v);
  3082. S6 = XMVectorSplatZ(g_XMSinCoefficients1.v);
  3083. S7 = XMVectorSplatW(g_XMSinCoefficients1.v);
  3084. S8 = XMVectorSplatX(g_XMSinCoefficients2.v);
  3085. S9 = XMVectorSplatY(g_XMSinCoefficients2.v);
  3086. S10 = XMVectorSplatZ(g_XMSinCoefficients2.v);
  3087. S11 = XMVectorSplatW(g_XMSinCoefficients2.v);
  3088. C1 = XMVectorSplatY(g_XMCosCoefficients0.v);
  3089. C2 = XMVectorSplatZ(g_XMCosCoefficients0.v);
  3090. C3 = XMVectorSplatW(g_XMCosCoefficients0.v);
  3091. C4 = XMVectorSplatX(g_XMCosCoefficients1.v);
  3092. C5 = XMVectorSplatY(g_XMCosCoefficients1.v);
  3093. C6 = XMVectorSplatZ(g_XMCosCoefficients1.v);
  3094. C7 = XMVectorSplatW(g_XMCosCoefficients1.v);
  3095. C8 = XMVectorSplatX(g_XMCosCoefficients2.v);
  3096. C9 = XMVectorSplatY(g_XMCosCoefficients2.v);
  3097. C10 = XMVectorSplatZ(g_XMCosCoefficients2.v);
  3098. C11 = XMVectorSplatW(g_XMCosCoefficients2.v);
  3099. Sin = XMVectorMultiplyAdd(S1, V3, V1);
  3100. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  3101. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  3102. Sin = XMVectorMultiplyAdd(S4, V9, Sin);
  3103. Sin = XMVectorMultiplyAdd(S5, V11, Sin);
  3104. Sin = XMVectorMultiplyAdd(S6, V13, Sin);
  3105. Sin = XMVectorMultiplyAdd(S7, V15, Sin);
  3106. Sin = XMVectorMultiplyAdd(S8, V17, Sin);
  3107. Sin = XMVectorMultiplyAdd(S9, V19, Sin);
  3108. Sin = XMVectorMultiplyAdd(S10, V21, Sin);
  3109. Sin = XMVectorMultiplyAdd(S11, V23, Sin);
  3110. Cos = XMVectorMultiplyAdd(C1, V2, g_XMOne.v);
  3111. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  3112. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  3113. Cos = XMVectorMultiplyAdd(C4, V8, Cos);
  3114. Cos = XMVectorMultiplyAdd(C5, V10, Cos);
  3115. Cos = XMVectorMultiplyAdd(C6, V12, Cos);
  3116. Cos = XMVectorMultiplyAdd(C7, V14, Cos);
  3117. Cos = XMVectorMultiplyAdd(C8, V16, Cos);
  3118. Cos = XMVectorMultiplyAdd(C9, V18, Cos);
  3119. Cos = XMVectorMultiplyAdd(C10, V20, Cos);
  3120. Cos = XMVectorMultiplyAdd(C11, V22, Cos);
  3121. *pSin = Sin;
  3122. *pCos = Cos;
  3123. #elif defined(_XM_SSE_INTRINSICS_)
  3124. XMASSERT(pSin);
  3125. XMASSERT(pCos);
  3126. XMVECTOR V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13;
  3127. XMVECTOR V14, V15, V16, V17, V18, V19, V20, V21, V22, V23;
  3128. XMVECTOR S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11;
  3129. XMVECTOR C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3130. XMVECTOR Sin, Cos;
  3131. V1 = XMVectorModAngles(V);
  3132. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! + V^9 / 9! - V^11 / 11! + V^13 / 13! -
  3133. // V^15 / 15! + V^17 / 17! - V^19 / 19! + V^21 / 21! - V^23 / 23! (for -PI <= V < PI)
  3134. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! + V^8 / 8! - V^10 / 10! + V^12 / 12! -
  3135. // V^14 / 14! + V^16 / 16! - V^18 / 18! + V^20 / 20! - V^22 / 22! (for -PI <= V < PI)
  3136. V2 = XMVectorMultiply(V1, V1);
  3137. V3 = XMVectorMultiply(V2, V1);
  3138. V4 = XMVectorMultiply(V2, V2);
  3139. V5 = XMVectorMultiply(V3, V2);
  3140. V6 = XMVectorMultiply(V3, V3);
  3141. V7 = XMVectorMultiply(V4, V3);
  3142. V8 = XMVectorMultiply(V4, V4);
  3143. V9 = XMVectorMultiply(V5, V4);
  3144. V10 = XMVectorMultiply(V5, V5);
  3145. V11 = XMVectorMultiply(V6, V5);
  3146. V12 = XMVectorMultiply(V6, V6);
  3147. V13 = XMVectorMultiply(V7, V6);
  3148. V14 = XMVectorMultiply(V7, V7);
  3149. V15 = XMVectorMultiply(V8, V7);
  3150. V16 = XMVectorMultiply(V8, V8);
  3151. V17 = XMVectorMultiply(V9, V8);
  3152. V18 = XMVectorMultiply(V9, V9);
  3153. V19 = XMVectorMultiply(V10, V9);
  3154. V20 = XMVectorMultiply(V10, V10);
  3155. V21 = XMVectorMultiply(V11, V10);
  3156. V22 = XMVectorMultiply(V11, V11);
  3157. V23 = XMVectorMultiply(V12, V11);
  3158. S1 = _mm_load_ps1(&g_XMSinCoefficients0.f[1]);
  3159. S2 = _mm_load_ps1(&g_XMSinCoefficients0.f[2]);
  3160. S3 = _mm_load_ps1(&g_XMSinCoefficients0.f[3]);
  3161. S4 = _mm_load_ps1(&g_XMSinCoefficients1.f[0]);
  3162. S5 = _mm_load_ps1(&g_XMSinCoefficients1.f[1]);
  3163. S6 = _mm_load_ps1(&g_XMSinCoefficients1.f[2]);
  3164. S7 = _mm_load_ps1(&g_XMSinCoefficients1.f[3]);
  3165. S8 = _mm_load_ps1(&g_XMSinCoefficients2.f[0]);
  3166. S9 = _mm_load_ps1(&g_XMSinCoefficients2.f[1]);
  3167. S10 = _mm_load_ps1(&g_XMSinCoefficients2.f[2]);
  3168. S11 = _mm_load_ps1(&g_XMSinCoefficients2.f[3]);
  3169. C1 = _mm_load_ps1(&g_XMCosCoefficients0.f[1]);
  3170. C2 = _mm_load_ps1(&g_XMCosCoefficients0.f[2]);
  3171. C3 = _mm_load_ps1(&g_XMCosCoefficients0.f[3]);
  3172. C4 = _mm_load_ps1(&g_XMCosCoefficients1.f[0]);
  3173. C5 = _mm_load_ps1(&g_XMCosCoefficients1.f[1]);
  3174. C6 = _mm_load_ps1(&g_XMCosCoefficients1.f[2]);
  3175. C7 = _mm_load_ps1(&g_XMCosCoefficients1.f[3]);
  3176. C8 = _mm_load_ps1(&g_XMCosCoefficients2.f[0]);
  3177. C9 = _mm_load_ps1(&g_XMCosCoefficients2.f[1]);
  3178. C10 = _mm_load_ps1(&g_XMCosCoefficients2.f[2]);
  3179. C11 = _mm_load_ps1(&g_XMCosCoefficients2.f[3]);
  3180. S1 = _mm_mul_ps(S1,V3);
  3181. Sin = _mm_add_ps(S1,V1);
  3182. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  3183. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  3184. Sin = XMVectorMultiplyAdd(S4, V9, Sin);
  3185. Sin = XMVectorMultiplyAdd(S5, V11, Sin);
  3186. Sin = XMVectorMultiplyAdd(S6, V13, Sin);
  3187. Sin = XMVectorMultiplyAdd(S7, V15, Sin);
  3188. Sin = XMVectorMultiplyAdd(S8, V17, Sin);
  3189. Sin = XMVectorMultiplyAdd(S9, V19, Sin);
  3190. Sin = XMVectorMultiplyAdd(S10, V21, Sin);
  3191. Sin = XMVectorMultiplyAdd(S11, V23, Sin);
  3192. Cos = _mm_mul_ps(C1,V2);
  3193. Cos = _mm_add_ps(Cos,g_XMOne);
  3194. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  3195. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  3196. Cos = XMVectorMultiplyAdd(C4, V8, Cos);
  3197. Cos = XMVectorMultiplyAdd(C5, V10, Cos);
  3198. Cos = XMVectorMultiplyAdd(C6, V12, Cos);
  3199. Cos = XMVectorMultiplyAdd(C7, V14, Cos);
  3200. Cos = XMVectorMultiplyAdd(C8, V16, Cos);
  3201. Cos = XMVectorMultiplyAdd(C9, V18, Cos);
  3202. Cos = XMVectorMultiplyAdd(C10, V20, Cos);
  3203. Cos = XMVectorMultiplyAdd(C11, V22, Cos);
  3204. *pSin = Sin;
  3205. *pCos = Cos;
  3206. #else // _XM_VMX128_INTRINSICS_
  3207. #endif // _XM_VMX128_INTRINSICS_
  3208. }
  3209. //------------------------------------------------------------------------------
  3210. XMINLINE XMVECTOR XMVectorTan
  3211. (
  3212. FXMVECTOR V
  3213. )
  3214. {
  3215. #if defined(_XM_NO_INTRINSICS_)
  3216. // Cody and Waite algorithm to compute tangent.
  3217. XMVECTOR VA, VB, VC, VC2;
  3218. XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
  3219. XMVECTOR C0, C1, TwoDivPi, Epsilon;
  3220. XMVECTOR N, D;
  3221. XMVECTOR R0, R1;
  3222. XMVECTOR VIsZero, VCNearZero, VBIsEven;
  3223. XMVECTOR Zero;
  3224. XMVECTOR Result;
  3225. UINT i;
  3226. static CONST XMVECTOR TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
  3227. static CONST XMVECTOR TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
  3228. static CONST XMVECTOR TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
  3229. static CONST XMVECTORU32 Mask = {0x1, 0x1, 0x1, 0x1};
  3230. TwoDivPi = XMVectorSplatW(TanConstants);
  3231. Zero = XMVectorZero();
  3232. C0 = XMVectorSplatX(TanConstants);
  3233. C1 = XMVectorSplatY(TanConstants);
  3234. Epsilon = XMVectorSplatZ(TanConstants);
  3235. VA = XMVectorMultiply(V, TwoDivPi);
  3236. VA = XMVectorRound(VA);
  3237. VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
  3238. VB = XMVectorAbs(VA);
  3239. VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
  3240. for (i = 0; i < 4; i++)
  3241. {
  3242. VB.vector4_u32[i] = (UINT)VB.vector4_f32[i];
  3243. }
  3244. VC2 = XMVectorMultiply(VC, VC);
  3245. T7 = XMVectorSplatW(TanCoefficients1);
  3246. T6 = XMVectorSplatZ(TanCoefficients1);
  3247. T4 = XMVectorSplatX(TanCoefficients1);
  3248. T3 = XMVectorSplatW(TanCoefficients0);
  3249. T5 = XMVectorSplatY(TanCoefficients1);
  3250. T2 = XMVectorSplatZ(TanCoefficients0);
  3251. T1 = XMVectorSplatY(TanCoefficients0);
  3252. T0 = XMVectorSplatX(TanCoefficients0);
  3253. VBIsEven = XMVectorAndInt(VB, Mask.v);
  3254. VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
  3255. N = XMVectorMultiplyAdd(VC2, T7, T6);
  3256. D = XMVectorMultiplyAdd(VC2, T4, T3);
  3257. N = XMVectorMultiplyAdd(VC2, N, T5);
  3258. D = XMVectorMultiplyAdd(VC2, D, T2);
  3259. N = XMVectorMultiply(VC2, N);
  3260. D = XMVectorMultiplyAdd(VC2, D, T1);
  3261. N = XMVectorMultiplyAdd(VC, N, VC);
  3262. VCNearZero = XMVectorInBounds(VC, Epsilon);
  3263. D = XMVectorMultiplyAdd(VC2, D, T0);
  3264. N = XMVectorSelect(N, VC, VCNearZero);
  3265. D = XMVectorSelect(D, g_XMOne.v, VCNearZero);
  3266. R0 = XMVectorNegate(N);
  3267. R1 = XMVectorReciprocal(D);
  3268. R0 = XMVectorReciprocal(R0);
  3269. R1 = XMVectorMultiply(N, R1);
  3270. R0 = XMVectorMultiply(D, R0);
  3271. VIsZero = XMVectorEqual(V, Zero);
  3272. Result = XMVectorSelect(R0, R1, VBIsEven);
  3273. Result = XMVectorSelect(Result, Zero, VIsZero);
  3274. return Result;
  3275. #elif defined(_XM_SSE_INTRINSICS_)
  3276. // Cody and Waite algorithm to compute tangent.
  3277. XMVECTOR VA, VB, VC, VC2;
  3278. XMVECTOR T0, T1, T2, T3, T4, T5, T6, T7;
  3279. XMVECTOR C0, C1, TwoDivPi, Epsilon;
  3280. XMVECTOR N, D;
  3281. XMVECTOR R0, R1;
  3282. XMVECTOR VIsZero, VCNearZero, VBIsEven;
  3283. XMVECTOR Zero;
  3284. XMVECTOR Result;
  3285. static CONST XMVECTORF32 TanCoefficients0 = {1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f};
  3286. static CONST XMVECTORF32 TanCoefficients1 = {4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, -1.786170734e-5f};
  3287. static CONST XMVECTORF32 TanConstants = {1.570796371f, 6.077100628e-11f, 0.000244140625f, 2.0f / XM_PI};
  3288. static CONST XMVECTORI32 Mask = {0x1, 0x1, 0x1, 0x1};
  3289. TwoDivPi = XMVectorSplatW(TanConstants);
  3290. Zero = XMVectorZero();
  3291. C0 = XMVectorSplatX(TanConstants);
  3292. C1 = XMVectorSplatY(TanConstants);
  3293. Epsilon = XMVectorSplatZ(TanConstants);
  3294. VA = XMVectorMultiply(V, TwoDivPi);
  3295. VA = XMVectorRound(VA);
  3296. VC = XMVectorNegativeMultiplySubtract(VA, C0, V);
  3297. VB = XMVectorAbs(VA);
  3298. VC = XMVectorNegativeMultiplySubtract(VA, C1, VC);
  3299. reinterpret_cast<__m128i *>(&VB)[0] = _mm_cvttps_epi32(VB);
  3300. VC2 = XMVectorMultiply(VC, VC);
  3301. T7 = XMVectorSplatW(TanCoefficients1);
  3302. T6 = XMVectorSplatZ(TanCoefficients1);
  3303. T4 = XMVectorSplatX(TanCoefficients1);
  3304. T3 = XMVectorSplatW(TanCoefficients0);
  3305. T5 = XMVectorSplatY(TanCoefficients1);
  3306. T2 = XMVectorSplatZ(TanCoefficients0);
  3307. T1 = XMVectorSplatY(TanCoefficients0);
  3308. T0 = XMVectorSplatX(TanCoefficients0);
  3309. VBIsEven = XMVectorAndInt(VB,Mask);
  3310. VBIsEven = XMVectorEqualInt(VBIsEven, Zero);
  3311. N = XMVectorMultiplyAdd(VC2, T7, T6);
  3312. D = XMVectorMultiplyAdd(VC2, T4, T3);
  3313. N = XMVectorMultiplyAdd(VC2, N, T5);
  3314. D = XMVectorMultiplyAdd(VC2, D, T2);
  3315. N = XMVectorMultiply(VC2, N);
  3316. D = XMVectorMultiplyAdd(VC2, D, T1);
  3317. N = XMVectorMultiplyAdd(VC, N, VC);
  3318. VCNearZero = XMVectorInBounds(VC, Epsilon);
  3319. D = XMVectorMultiplyAdd(VC2, D, T0);
  3320. N = XMVectorSelect(N, VC, VCNearZero);
  3321. D = XMVectorSelect(D, g_XMOne, VCNearZero);
  3322. R0 = XMVectorNegate(N);
  3323. R1 = _mm_div_ps(N,D);
  3324. R0 = _mm_div_ps(D,R0);
  3325. VIsZero = XMVectorEqual(V, Zero);
  3326. Result = XMVectorSelect(R0, R1, VBIsEven);
  3327. Result = XMVectorSelect(Result, Zero, VIsZero);
  3328. return Result;
  3329. #else // _XM_VMX128_INTRINSICS_
  3330. #endif // _XM_VMX128_INTRINSICS_
  3331. }
  3332. //------------------------------------------------------------------------------
  3333. XMINLINE XMVECTOR XMVectorSinH
  3334. (
  3335. FXMVECTOR V
  3336. )
  3337. {
  3338. #if defined(_XM_NO_INTRINSICS_)
  3339. XMVECTOR V1, V2;
  3340. XMVECTOR E1, E2;
  3341. XMVECTOR Result;
  3342. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3343. V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
  3344. V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
  3345. E1 = XMVectorExp(V1);
  3346. E2 = XMVectorExp(V2);
  3347. Result = XMVectorSubtract(E1, E2);
  3348. return Result;
  3349. #elif defined(_XM_SSE_INTRINSICS_)
  3350. XMVECTOR V1, V2;
  3351. XMVECTOR E1, E2;
  3352. XMVECTOR Result;
  3353. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3354. V1 = _mm_mul_ps(V, Scale);
  3355. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  3356. V2 = _mm_mul_ps(V, Scale);
  3357. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  3358. E1 = XMVectorExp(V1);
  3359. E2 = XMVectorExp(V2);
  3360. Result = _mm_sub_ps(E1, E2);
  3361. return Result;
  3362. #else // _XM_VMX128_INTRINSICS_
  3363. #endif // _XM_VMX128_INTRINSICS_
  3364. }
  3365. //------------------------------------------------------------------------------
  3366. XMINLINE XMVECTOR XMVectorCosH
  3367. (
  3368. FXMVECTOR V
  3369. )
  3370. {
  3371. #if defined(_XM_NO_INTRINSICS_)
  3372. XMVECTOR V1, V2;
  3373. XMVECTOR E1, E2;
  3374. XMVECTOR Result;
  3375. static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3376. V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
  3377. V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
  3378. E1 = XMVectorExp(V1);
  3379. E2 = XMVectorExp(V2);
  3380. Result = XMVectorAdd(E1, E2);
  3381. return Result;
  3382. #elif defined(_XM_SSE_INTRINSICS_)
  3383. XMVECTOR V1, V2;
  3384. XMVECTOR E1, E2;
  3385. XMVECTOR Result;
  3386. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  3387. V1 = _mm_mul_ps(V,Scale);
  3388. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  3389. V2 = _mm_mul_ps(V, Scale);
  3390. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  3391. E1 = XMVectorExp(V1);
  3392. E2 = XMVectorExp(V2);
  3393. Result = _mm_add_ps(E1, E2);
  3394. return Result;
  3395. #else // _XM_VMX128_INTRINSICS_
  3396. #endif // _XM_VMX128_INTRINSICS_
  3397. }
  3398. //------------------------------------------------------------------------------
  3399. XMINLINE XMVECTOR XMVectorTanH
  3400. (
  3401. FXMVECTOR V
  3402. )
  3403. {
  3404. #if defined(_XM_NO_INTRINSICS_)
  3405. XMVECTOR E;
  3406. XMVECTOR Result;
  3407. static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  3408. E = XMVectorMultiply(V, Scale.v);
  3409. E = XMVectorExp(E);
  3410. E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
  3411. E = XMVectorReciprocal(E);
  3412. Result = XMVectorSubtract(g_XMOne.v, E);
  3413. return Result;
  3414. #elif defined(_XM_SSE_INTRINSICS_)
  3415. static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  3416. XMVECTOR E = _mm_mul_ps(V, Scale);
  3417. E = XMVectorExp(E);
  3418. E = _mm_mul_ps(E,g_XMOneHalf);
  3419. E = _mm_add_ps(E,g_XMOneHalf);
  3420. E = XMVectorReciprocal(E);
  3421. E = _mm_sub_ps(g_XMOne, E);
  3422. return E;
  3423. #else // _XM_VMX128_INTRINSICS_
  3424. #endif // _XM_VMX128_INTRINSICS_
  3425. }
  3426. //------------------------------------------------------------------------------
  3427. XMINLINE XMVECTOR XMVectorASin
  3428. (
  3429. FXMVECTOR V
  3430. )
  3431. {
  3432. #if defined(_XM_NO_INTRINSICS_)
  3433. XMVECTOR V2, V3, AbsV;
  3434. XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3435. XMVECTOR R0, R1, R2, R3, R4;
  3436. XMVECTOR OneMinusAbsV;
  3437. XMVECTOR Rsq;
  3438. XMVECTOR Result;
  3439. static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3440. // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
  3441. // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
  3442. AbsV = XMVectorAbs(V);
  3443. V2 = XMVectorMultiply(V, V);
  3444. V3 = XMVectorMultiply(V2, AbsV);
  3445. R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
  3446. OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
  3447. Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
  3448. C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
  3449. C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
  3450. C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
  3451. C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
  3452. C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
  3453. C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
  3454. C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
  3455. C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
  3456. C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
  3457. C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
  3458. C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
  3459. C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
  3460. R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
  3461. R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
  3462. R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
  3463. R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
  3464. R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
  3465. R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
  3466. R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
  3467. R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
  3468. R0 = XMVectorMultiplyAdd(R2, V3, R0);
  3469. R1 = XMVectorMultiplyAdd(R3, V3, R1);
  3470. R0 = XMVectorMultiply(V, R0);
  3471. R1 = XMVectorMultiply(R4, R1);
  3472. Result = XMVectorMultiplyAdd(R1, Rsq, R0);
  3473. return Result;
  3474. #elif defined(_XM_SSE_INTRINSICS_)
  3475. static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3476. // asin(V) = V * (C0 + C1 * V + C2 * V^2 + C3 * V^3 + C4 * V^4 + C5 * V^5) + (1 - V) * rsq(1 - V) *
  3477. // V * (C6 + C7 * V + C8 * V^2 + C9 * V^3 + C10 * V^4 + C11 * V^5)
  3478. // Get abs(V)
  3479. XMVECTOR vAbsV = _mm_setzero_ps();
  3480. vAbsV = _mm_sub_ps(vAbsV,V);
  3481. vAbsV = _mm_max_ps(vAbsV,V);
  3482. XMVECTOR R0 = vAbsV;
  3483. XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
  3484. R0 = _mm_mul_ps(R0,vConstants);
  3485. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
  3486. R0 = _mm_add_ps(R0,vConstants);
  3487. XMVECTOR R1 = vAbsV;
  3488. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
  3489. R1 = _mm_mul_ps(R1,vConstants);
  3490. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
  3491. R1 = _mm_add_ps(R1, vConstants);
  3492. XMVECTOR R2 = vAbsV;
  3493. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
  3494. R2 = _mm_mul_ps(R2,vConstants);
  3495. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
  3496. R2 = _mm_add_ps(R2, vConstants);
  3497. XMVECTOR R3 = vAbsV;
  3498. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
  3499. R3 = _mm_mul_ps(R3,vConstants);
  3500. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
  3501. R3 = _mm_add_ps(R3, vConstants);
  3502. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
  3503. R0 = _mm_mul_ps(R0,vAbsV);
  3504. R0 = _mm_add_ps(R0,vConstants);
  3505. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
  3506. R1 = _mm_mul_ps(R1,vAbsV);
  3507. R1 = _mm_add_ps(R1,vConstants);
  3508. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
  3509. R2 = _mm_mul_ps(R2,vAbsV);
  3510. R2 = _mm_add_ps(R2,vConstants);
  3511. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
  3512. R3 = _mm_mul_ps(R3,vAbsV);
  3513. R3 = _mm_add_ps(R3,vConstants);
  3514. // V3 = V^3
  3515. vConstants = _mm_mul_ps(V,V);
  3516. vConstants = _mm_mul_ps(vConstants, vAbsV);
  3517. // Mul by V^3
  3518. R2 = _mm_mul_ps(R2,vConstants);
  3519. R3 = _mm_mul_ps(R3,vConstants);
  3520. // Merge the results
  3521. R0 = _mm_add_ps(R0,R2);
  3522. R1 = _mm_add_ps(R1,R3);
  3523. R0 = _mm_mul_ps(R0,V);
  3524. // vConstants = V-(V^2 retaining sign)
  3525. vConstants = _mm_mul_ps(vAbsV, V);
  3526. vConstants = _mm_sub_ps(V,vConstants);
  3527. R1 = _mm_mul_ps(R1,vConstants);
  3528. vConstants = _mm_sub_ps(OnePlusEpsilon,vAbsV);
  3529. // Do NOT use rsqrt/mul. This needs the precision
  3530. vConstants = _mm_sqrt_ps(vConstants);
  3531. R1 = _mm_div_ps(R1,vConstants);
  3532. R0 = _mm_add_ps(R0,R1);
  3533. return R0;
  3534. #else // _XM_VMX128_INTRINSICS_
  3535. #endif // _XM_VMX128_INTRINSICS_
  3536. }
  3537. //------------------------------------------------------------------------------
  3538. XMINLINE XMVECTOR XMVectorACos
  3539. (
  3540. FXMVECTOR V
  3541. )
  3542. {
  3543. #if defined(_XM_NO_INTRINSICS_)
  3544. XMVECTOR V2, V3, AbsV;
  3545. XMVECTOR C0, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10, C11;
  3546. XMVECTOR R0, R1, R2, R3, R4;
  3547. XMVECTOR OneMinusAbsV;
  3548. XMVECTOR Rsq;
  3549. XMVECTOR Result;
  3550. static CONST XMVECTOR OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3551. // acos(V) = PI / 2 - asin(V)
  3552. AbsV = XMVectorAbs(V);
  3553. V2 = XMVectorMultiply(V, V);
  3554. V3 = XMVectorMultiply(V2, AbsV);
  3555. R4 = XMVectorNegativeMultiplySubtract(AbsV, V, V);
  3556. OneMinusAbsV = XMVectorSubtract(OnePlusEpsilon, AbsV);
  3557. Rsq = XMVectorReciprocalSqrt(OneMinusAbsV);
  3558. C0 = XMVectorSplatX(g_XMASinCoefficients0.v);
  3559. C1 = XMVectorSplatY(g_XMASinCoefficients0.v);
  3560. C2 = XMVectorSplatZ(g_XMASinCoefficients0.v);
  3561. C3 = XMVectorSplatW(g_XMASinCoefficients0.v);
  3562. C4 = XMVectorSplatX(g_XMASinCoefficients1.v);
  3563. C5 = XMVectorSplatY(g_XMASinCoefficients1.v);
  3564. C6 = XMVectorSplatZ(g_XMASinCoefficients1.v);
  3565. C7 = XMVectorSplatW(g_XMASinCoefficients1.v);
  3566. C8 = XMVectorSplatX(g_XMASinCoefficients2.v);
  3567. C9 = XMVectorSplatY(g_XMASinCoefficients2.v);
  3568. C10 = XMVectorSplatZ(g_XMASinCoefficients2.v);
  3569. C11 = XMVectorSplatW(g_XMASinCoefficients2.v);
  3570. R0 = XMVectorMultiplyAdd(C3, AbsV, C7);
  3571. R1 = XMVectorMultiplyAdd(C1, AbsV, C5);
  3572. R2 = XMVectorMultiplyAdd(C2, AbsV, C6);
  3573. R3 = XMVectorMultiplyAdd(C0, AbsV, C4);
  3574. R0 = XMVectorMultiplyAdd(R0, AbsV, C11);
  3575. R1 = XMVectorMultiplyAdd(R1, AbsV, C9);
  3576. R2 = XMVectorMultiplyAdd(R2, AbsV, C10);
  3577. R3 = XMVectorMultiplyAdd(R3, AbsV, C8);
  3578. R0 = XMVectorMultiplyAdd(R2, V3, R0);
  3579. R1 = XMVectorMultiplyAdd(R3, V3, R1);
  3580. R0 = XMVectorMultiply(V, R0);
  3581. R1 = XMVectorMultiply(R4, R1);
  3582. Result = XMVectorMultiplyAdd(R1, Rsq, R0);
  3583. Result = XMVectorSubtract(g_XMHalfPi.v, Result);
  3584. return Result;
  3585. #elif defined(_XM_SSE_INTRINSICS_)
  3586. static CONST XMVECTORF32 OnePlusEpsilon = {1.00000011921f, 1.00000011921f, 1.00000011921f, 1.00000011921f};
  3587. // Uses only 6 registers for good code on x86 targets
  3588. // acos(V) = PI / 2 - asin(V)
  3589. // Get abs(V)
  3590. XMVECTOR vAbsV = _mm_setzero_ps();
  3591. vAbsV = _mm_sub_ps(vAbsV,V);
  3592. vAbsV = _mm_max_ps(vAbsV,V);
  3593. // Perform the series in precision groups to
  3594. // retain precision across 20 bits. (3 bits of imprecision due to operations)
  3595. XMVECTOR R0 = vAbsV;
  3596. XMVECTOR vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[3]);
  3597. R0 = _mm_mul_ps(R0,vConstants);
  3598. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[3]);
  3599. R0 = _mm_add_ps(R0,vConstants);
  3600. R0 = _mm_mul_ps(R0,vAbsV);
  3601. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[3]);
  3602. R0 = _mm_add_ps(R0,vConstants);
  3603. XMVECTOR R1 = vAbsV;
  3604. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[1]);
  3605. R1 = _mm_mul_ps(R1,vConstants);
  3606. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[1]);
  3607. R1 = _mm_add_ps(R1,vConstants);
  3608. R1 = _mm_mul_ps(R1, vAbsV);
  3609. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[1]);
  3610. R1 = _mm_add_ps(R1,vConstants);
  3611. XMVECTOR R2 = vAbsV;
  3612. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[2]);
  3613. R2 = _mm_mul_ps(R2,vConstants);
  3614. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[2]);
  3615. R2 = _mm_add_ps(R2,vConstants);
  3616. R2 = _mm_mul_ps(R2, vAbsV);
  3617. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[2]);
  3618. R2 = _mm_add_ps(R2,vConstants);
  3619. XMVECTOR R3 = vAbsV;
  3620. vConstants = _mm_load_ps1(&g_XMASinCoefficients0.f[0]);
  3621. R3 = _mm_mul_ps(R3,vConstants);
  3622. vConstants = _mm_load_ps1(&g_XMASinCoefficients1.f[0]);
  3623. R3 = _mm_add_ps(R3,vConstants);
  3624. R3 = _mm_mul_ps(R3, vAbsV);
  3625. vConstants = _mm_load_ps1(&g_XMASinCoefficients2.f[0]);
  3626. R3 = _mm_add_ps(R3,vConstants);
  3627. // vConstants = V^3
  3628. vConstants = _mm_mul_ps(V,V);
  3629. vConstants = _mm_mul_ps(vConstants,vAbsV);
  3630. R2 = _mm_mul_ps(R2,vConstants);
  3631. R3 = _mm_mul_ps(R3,vConstants);
  3632. // Add the pair of values together here to retain
  3633. // as much precision as possible
  3634. R0 = _mm_add_ps(R0,R2);
  3635. R1 = _mm_add_ps(R1,R3);
  3636. R0 = _mm_mul_ps(R0,V);
  3637. // vConstants = V-(V*abs(V))
  3638. vConstants = _mm_mul_ps(V,vAbsV);
  3639. vConstants = _mm_sub_ps(V,vConstants);
  3640. R1 = _mm_mul_ps(R1,vConstants);
  3641. // Episilon exists to allow 1.0 as an answer
  3642. vConstants = _mm_sub_ps(OnePlusEpsilon, vAbsV);
  3643. // Use sqrt instead of rsqrt for precision
  3644. vConstants = _mm_sqrt_ps(vConstants);
  3645. R1 = _mm_div_ps(R1,vConstants);
  3646. R1 = _mm_add_ps(R1,R0);
  3647. vConstants = _mm_sub_ps(g_XMHalfPi,R1);
  3648. return vConstants;
  3649. #else // _XM_VMX128_INTRINSICS_
  3650. #endif // _XM_VMX128_INTRINSICS_
  3651. }
  3652. //------------------------------------------------------------------------------
  3653. XMINLINE XMVECTOR XMVectorATan
  3654. (
  3655. FXMVECTOR V
  3656. )
  3657. {
  3658. #if defined(_XM_NO_INTRINSICS_)
  3659. // Cody and Waite algorithm to compute inverse tangent.
  3660. XMVECTOR N, D;
  3661. XMVECTOR VF, G, ReciprocalF, AbsF, FA, FB;
  3662. XMVECTOR Sqrt3, Sqrt3MinusOne, TwoMinusSqrt3;
  3663. XMVECTOR HalfPi, OneThirdPi, OneSixthPi, Epsilon, MinV, MaxV;
  3664. XMVECTOR Zero;
  3665. XMVECTOR NegativeHalfPi;
  3666. XMVECTOR Angle1, Angle2;
  3667. XMVECTOR F_GT_One, F_GT_TwoMinusSqrt3, AbsF_LT_Epsilon, V_LT_Zero, V_GT_MaxV, V_LT_MinV;
  3668. XMVECTOR NegativeResult, Result;
  3669. XMVECTOR P0, P1, P2, P3, Q0, Q1, Q2, Q3;
  3670. static CONST XMVECTOR ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
  3671. static CONST XMVECTOR ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
  3672. static CONST XMVECTOR ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
  3673. static CONST XMVECTOR ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
  3674. Zero = XMVectorZero();
  3675. P0 = XMVectorSplatX(ATanConstants0);
  3676. P1 = XMVectorSplatY(ATanConstants0);
  3677. P2 = XMVectorSplatZ(ATanConstants0);
  3678. P3 = XMVectorSplatW(ATanConstants0);
  3679. Q0 = XMVectorSplatX(ATanConstants1);
  3680. Q1 = XMVectorSplatY(ATanConstants1);
  3681. Q2 = XMVectorSplatZ(ATanConstants1);
  3682. Q3 = XMVectorSplatW(ATanConstants1);
  3683. Sqrt3 = XMVectorSplatX(ATanConstants2);
  3684. Sqrt3MinusOne = XMVectorSplatY(ATanConstants2);
  3685. TwoMinusSqrt3 = XMVectorSplatZ(ATanConstants2);
  3686. Epsilon = XMVectorSplatW(ATanConstants2);
  3687. HalfPi = XMVectorSplatX(ATanConstants3);
  3688. OneThirdPi = XMVectorSplatY(ATanConstants3);
  3689. OneSixthPi = XMVectorSplatZ(ATanConstants3);
  3690. MaxV = XMVectorSplatW(ATanConstants3);
  3691. VF = XMVectorAbs(V);
  3692. ReciprocalF = XMVectorReciprocal(VF);
  3693. F_GT_One = XMVectorGreater(VF, g_XMOne.v);
  3694. VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
  3695. Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
  3696. Angle2 = XMVectorSelect(OneSixthPi, OneThirdPi, F_GT_One);
  3697. F_GT_TwoMinusSqrt3 = XMVectorGreater(VF, TwoMinusSqrt3);
  3698. FA = XMVectorMultiplyAdd(Sqrt3MinusOne, VF, VF);
  3699. FA = XMVectorAdd(FA, g_XMNegativeOne.v);
  3700. FB = XMVectorAdd(VF, Sqrt3);
  3701. FB = XMVectorReciprocal(FB);
  3702. FA = XMVectorMultiply(FA, FB);
  3703. VF = XMVectorSelect(VF, FA, F_GT_TwoMinusSqrt3);
  3704. Angle1 = XMVectorSelect(Angle1, Angle2, F_GT_TwoMinusSqrt3);
  3705. AbsF = XMVectorAbs(VF);
  3706. AbsF_LT_Epsilon = XMVectorLess(AbsF, Epsilon);
  3707. G = XMVectorMultiply(VF, VF);
  3708. D = XMVectorAdd(G, Q3);
  3709. D = XMVectorMultiplyAdd(D, G, Q2);
  3710. D = XMVectorMultiplyAdd(D, G, Q1);
  3711. D = XMVectorMultiplyAdd(D, G, Q0);
  3712. D = XMVectorReciprocal(D);
  3713. N = XMVectorMultiplyAdd(P3, G, P2);
  3714. N = XMVectorMultiplyAdd(N, G, P1);
  3715. N = XMVectorMultiplyAdd(N, G, P0);
  3716. N = XMVectorMultiply(N, G);
  3717. Result = XMVectorMultiply(N, D);
  3718. Result = XMVectorMultiplyAdd(Result, VF, VF);
  3719. Result = XMVectorSelect(Result, VF, AbsF_LT_Epsilon);
  3720. NegativeResult = XMVectorNegate(Result);
  3721. Result = XMVectorSelect(Result, NegativeResult, F_GT_One);
  3722. Result = XMVectorAdd(Result, Angle1);
  3723. V_LT_Zero = XMVectorLess(V, Zero);
  3724. NegativeResult = XMVectorNegate(Result);
  3725. Result = XMVectorSelect(Result, NegativeResult, V_LT_Zero);
  3726. MinV = XMVectorNegate(MaxV);
  3727. NegativeHalfPi = XMVectorNegate(HalfPi);
  3728. V_GT_MaxV = XMVectorGreater(V, MaxV);
  3729. V_LT_MinV = XMVectorLess(V, MinV);
  3730. Result = XMVectorSelect(Result, g_XMHalfPi.v, V_GT_MaxV);
  3731. Result = XMVectorSelect(Result, NegativeHalfPi, V_LT_MinV);
  3732. return Result;
  3733. #elif defined(_XM_SSE_INTRINSICS_)
  3734. static CONST XMVECTORF32 ATanConstants0 = {-1.3688768894e+1f, -2.0505855195e+1f, -8.4946240351f, -8.3758299368e-1f};
  3735. static CONST XMVECTORF32 ATanConstants1 = {4.1066306682e+1f, 8.6157349597e+1f, 5.9578436142e+1f, 1.5024001160e+1f};
  3736. static CONST XMVECTORF32 ATanConstants2 = {1.732050808f, 7.320508076e-1f, 2.679491924e-1f, 0.000244140625f}; // <sqrt(3), sqrt(3) - 1, 2 - sqrt(3), Epsilon>
  3737. static CONST XMVECTORF32 ATanConstants3 = {XM_PIDIV2, XM_PI / 3.0f, XM_PI / 6.0f, 8.507059173e+37f}; // <Pi / 2, Pi / 3, Pi / 6, MaxV>
  3738. XMVECTOR VF = XMVectorAbs(V);
  3739. XMVECTOR F_GT_One = _mm_cmpgt_ps(VF,g_XMOne);
  3740. XMVECTOR ReciprocalF = XMVectorReciprocal(VF);
  3741. VF = XMVectorSelect(VF, ReciprocalF, F_GT_One);
  3742. XMVECTOR Zero = XMVectorZero();
  3743. XMVECTOR HalfPi = _mm_load_ps1(&ATanConstants3.f[0]);
  3744. XMVECTOR Angle1 = XMVectorSelect(Zero, HalfPi, F_GT_One);
  3745. // Pi/3
  3746. XMVECTOR vConstants = _mm_load_ps1(&ATanConstants3.f[1]);
  3747. // Pi/6
  3748. XMVECTOR Angle2 = _mm_load_ps1(&ATanConstants3.f[2]);
  3749. Angle2 = XMVectorSelect(Angle2, vConstants, F_GT_One);
  3750. // 1-sqrt(3)
  3751. XMVECTOR FA = _mm_load_ps1(&ATanConstants2.f[1]);
  3752. FA = _mm_mul_ps(FA,VF);
  3753. FA = _mm_add_ps(FA,VF);
  3754. FA = _mm_add_ps(FA,g_XMNegativeOne);
  3755. // sqrt(3)
  3756. vConstants = _mm_load_ps1(&ATanConstants2.f[0]);
  3757. vConstants = _mm_add_ps(vConstants,VF);
  3758. FA = _mm_div_ps(FA,vConstants);
  3759. // 2-sqrt(3)
  3760. vConstants = _mm_load_ps1(&ATanConstants2.f[2]);
  3761. // >2-sqrt(3)?
  3762. vConstants = _mm_cmpgt_ps(VF,vConstants);
  3763. VF = XMVectorSelect(VF, FA, vConstants);
  3764. Angle1 = XMVectorSelect(Angle1, Angle2, vConstants);
  3765. XMVECTOR AbsF = XMVectorAbs(VF);
  3766. XMVECTOR G = _mm_mul_ps(VF,VF);
  3767. XMVECTOR D = _mm_load_ps1(&ATanConstants1.f[3]);
  3768. D = _mm_add_ps(D,G);
  3769. D = _mm_mul_ps(D,G);
  3770. vConstants = _mm_load_ps1(&ATanConstants1.f[2]);
  3771. D = _mm_add_ps(D,vConstants);
  3772. D = _mm_mul_ps(D,G);
  3773. vConstants = _mm_load_ps1(&ATanConstants1.f[1]);
  3774. D = _mm_add_ps(D,vConstants);
  3775. D = _mm_mul_ps(D,G);
  3776. vConstants = _mm_load_ps1(&ATanConstants1.f[0]);
  3777. D = _mm_add_ps(D,vConstants);
  3778. XMVECTOR N = _mm_load_ps1(&ATanConstants0.f[3]);
  3779. N = _mm_mul_ps(N,G);
  3780. vConstants = _mm_load_ps1(&ATanConstants0.f[2]);
  3781. N = _mm_add_ps(N,vConstants);
  3782. N = _mm_mul_ps(N,G);
  3783. vConstants = _mm_load_ps1(&ATanConstants0.f[1]);
  3784. N = _mm_add_ps(N,vConstants);
  3785. N = _mm_mul_ps(N,G);
  3786. vConstants = _mm_load_ps1(&ATanConstants0.f[0]);
  3787. N = _mm_add_ps(N,vConstants);
  3788. N = _mm_mul_ps(N,G);
  3789. XMVECTOR Result = _mm_div_ps(N,D);
  3790. Result = _mm_mul_ps(Result,VF);
  3791. Result = _mm_add_ps(Result,VF);
  3792. // Epsilon
  3793. vConstants = _mm_load_ps1(&ATanConstants2.f[3]);
  3794. vConstants = _mm_cmpge_ps(vConstants,AbsF);
  3795. Result = XMVectorSelect(Result,VF,vConstants);
  3796. XMVECTOR NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
  3797. Result = XMVectorSelect(Result,NegativeResult,F_GT_One);
  3798. Result = _mm_add_ps(Result,Angle1);
  3799. Zero = _mm_cmpge_ps(Zero,V);
  3800. NegativeResult = _mm_mul_ps(Result,g_XMNegativeOne);
  3801. Result = XMVectorSelect(Result,NegativeResult,Zero);
  3802. XMVECTOR MaxV = _mm_load_ps1(&ATanConstants3.f[3]);
  3803. XMVECTOR MinV = _mm_mul_ps(MaxV,g_XMNegativeOne);
  3804. // Negate HalfPi
  3805. HalfPi = _mm_mul_ps(HalfPi,g_XMNegativeOne);
  3806. MaxV = _mm_cmple_ps(MaxV,V);
  3807. MinV = _mm_cmpge_ps(MinV,V);
  3808. Result = XMVectorSelect(Result,g_XMHalfPi,MaxV);
  3809. // HalfPi = -HalfPi
  3810. Result = XMVectorSelect(Result,HalfPi,MinV);
  3811. return Result;
  3812. #else // _XM_VMX128_INTRINSICS_
  3813. #endif // _XM_VMX128_INTRINSICS_
  3814. }
  3815. //------------------------------------------------------------------------------
  3816. XMINLINE XMVECTOR XMVectorATan2
  3817. (
  3818. FXMVECTOR Y,
  3819. FXMVECTOR X
  3820. )
  3821. {
  3822. #if defined(_XM_NO_INTRINSICS_)
  3823. // Return the inverse tangent of Y / X in the range of -Pi to Pi with the following exceptions:
  3824. // Y == 0 and X is Negative -> Pi with the sign of Y
  3825. // y == 0 and x is positive -> 0 with the sign of y
  3826. // Y != 0 and X == 0 -> Pi / 2 with the sign of Y
  3827. // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of Y)
  3828. // X == -Infinity and Finite Y -> Pi with the sign of Y
  3829. // X == +Infinity and Finite Y -> 0 with the sign of Y
  3830. // Y == Infinity and X is Finite -> Pi / 2 with the sign of Y
  3831. // Y == Infinity and X == -Infinity -> 3Pi / 4 with the sign of Y
  3832. // Y == Infinity and X == +Infinity -> Pi / 4 with the sign of Y
  3833. XMVECTOR Reciprocal;
  3834. XMVECTOR V;
  3835. XMVECTOR YSign;
  3836. XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
  3837. XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity;
  3838. XMVECTOR ATanResultValid;
  3839. XMVECTOR R0, R1, R2, R3, R4, R5;
  3840. XMVECTOR Zero;
  3841. XMVECTOR Result;
  3842. static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  3843. Zero = XMVectorZero();
  3844. ATanResultValid = XMVectorTrueInt();
  3845. Pi = XMVectorSplatX(ATan2Constants);
  3846. PiOverTwo = XMVectorSplatY(ATan2Constants);
  3847. PiOverFour = XMVectorSplatZ(ATan2Constants);
  3848. ThreePiOverFour = XMVectorSplatW(ATan2Constants);
  3849. YEqualsZero = XMVectorEqual(Y, Zero);
  3850. XEqualsZero = XMVectorEqual(X, Zero);
  3851. XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
  3852. XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
  3853. YEqualsInfinity = XMVectorIsInfinite(Y);
  3854. XEqualsInfinity = XMVectorIsInfinite(X);
  3855. YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
  3856. Pi = XMVectorOrInt(Pi, YSign);
  3857. PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
  3858. PiOverFour = XMVectorOrInt(PiOverFour, YSign);
  3859. ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
  3860. R1 = XMVectorSelect(Pi, YSign, XIsPositive);
  3861. R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
  3862. R3 = XMVectorSelect(R2, R1, YEqualsZero);
  3863. R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  3864. R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
  3865. Result = XMVectorSelect(R3, R5, YEqualsInfinity);
  3866. ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
  3867. Reciprocal = XMVectorReciprocal(X);
  3868. V = XMVectorMultiply(Y, Reciprocal);
  3869. R0 = XMVectorATan(V);
  3870. R1 = XMVectorSelect( Pi, Zero, XIsPositive );
  3871. R2 = XMVectorAdd(R0, R1);
  3872. Result = XMVectorSelect(Result, R2, ATanResultValid);
  3873. return Result;
  3874. #elif defined(_XM_SSE_INTRINSICS_)
  3875. static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  3876. // Mask if Y>0 && Y!=INF
  3877. XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
  3878. // Get the sign of (Y&0x80000000)
  3879. XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
  3880. // Get the sign bits of X
  3881. XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
  3882. // Change them to masks
  3883. XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
  3884. // Get Pi
  3885. XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]);
  3886. // Copy the sign of Y
  3887. Pi = _mm_or_ps(Pi,YSign);
  3888. XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive);
  3889. // Mask for X==0
  3890. XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
  3891. // Get Pi/2 with with sign of Y
  3892. XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
  3893. PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
  3894. XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
  3895. // Mask for Y==0
  3896. vConstants = _mm_cmpeq_ps(Y,g_XMZero);
  3897. R2 = XMVectorSelect(R2,R1,vConstants);
  3898. // Get Pi/4 with sign of Y
  3899. XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
  3900. PiOverFour = _mm_or_ps(PiOverFour,YSign);
  3901. // Get (Pi*3)/4 with sign of Y
  3902. XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
  3903. ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
  3904. vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  3905. XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
  3906. vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
  3907. XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
  3908. vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity);
  3909. // At this point, any entry that's zero will get the result
  3910. // from XMVectorATan(), otherwise, return the failsafe value
  3911. vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
  3912. // Any entries not 0xFFFFFFFF, are considered precalculated
  3913. XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
  3914. // Let's do the ATan2 function
  3915. vConstants = _mm_div_ps(Y,X);
  3916. vConstants = XMVectorATan(vConstants);
  3917. // Discard entries that have been declared void
  3918. XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive );
  3919. vConstants = _mm_add_ps( vConstants, R3 );
  3920. vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
  3921. return vResult;
  3922. #else // _XM_VMX128_INTRINSICS_
  3923. #endif // _XM_VMX128_INTRINSICS_
  3924. }
  3925. //------------------------------------------------------------------------------
  3926. XMFINLINE XMVECTOR XMVectorSinEst
  3927. (
  3928. FXMVECTOR V
  3929. )
  3930. {
  3931. #if defined(_XM_NO_INTRINSICS_)
  3932. XMVECTOR V2, V3, V5, V7;
  3933. XMVECTOR S1, S2, S3;
  3934. XMVECTOR Result;
  3935. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  3936. V2 = XMVectorMultiply(V, V);
  3937. V3 = XMVectorMultiply(V2, V);
  3938. V5 = XMVectorMultiply(V3, V2);
  3939. V7 = XMVectorMultiply(V5, V2);
  3940. S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
  3941. S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
  3942. S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
  3943. Result = XMVectorMultiplyAdd(S1, V3, V);
  3944. Result = XMVectorMultiplyAdd(S2, V5, Result);
  3945. Result = XMVectorMultiplyAdd(S3, V7, Result);
  3946. return Result;
  3947. #elif defined(_XM_SSE_INTRINSICS_)
  3948. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  3949. XMVECTOR V2 = _mm_mul_ps(V,V);
  3950. XMVECTOR V3 = _mm_mul_ps(V2,V);
  3951. XMVECTOR vResult = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
  3952. vResult = _mm_mul_ps(vResult,V3);
  3953. vResult = _mm_add_ps(vResult,V);
  3954. XMVECTOR vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
  3955. // V^5
  3956. V3 = _mm_mul_ps(V3,V2);
  3957. vConstants = _mm_mul_ps(vConstants,V3);
  3958. vResult = _mm_add_ps(vResult,vConstants);
  3959. vConstants = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
  3960. // V^7
  3961. V3 = _mm_mul_ps(V3,V2);
  3962. vConstants = _mm_mul_ps(vConstants,V3);
  3963. vResult = _mm_add_ps(vResult,vConstants);
  3964. return vResult;
  3965. #else // _XM_VMX128_INTRINSICS_
  3966. #endif // _XM_VMX128_INTRINSICS_
  3967. }
  3968. //------------------------------------------------------------------------------
  3969. XMFINLINE XMVECTOR XMVectorCosEst
  3970. (
  3971. FXMVECTOR V
  3972. )
  3973. {
  3974. #if defined(_XM_NO_INTRINSICS_)
  3975. XMVECTOR V2, V4, V6;
  3976. XMVECTOR C0, C1, C2, C3;
  3977. XMVECTOR Result;
  3978. V2 = XMVectorMultiply(V, V);
  3979. V4 = XMVectorMultiply(V2, V2);
  3980. V6 = XMVectorMultiply(V4, V2);
  3981. C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
  3982. C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
  3983. C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
  3984. C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
  3985. Result = XMVectorMultiplyAdd(C1, V2, C0);
  3986. Result = XMVectorMultiplyAdd(C2, V4, Result);
  3987. Result = XMVectorMultiplyAdd(C3, V6, Result);
  3988. return Result;
  3989. #elif defined(_XM_SSE_INTRINSICS_)
  3990. // Get V^2
  3991. XMVECTOR V2 = _mm_mul_ps(V,V);
  3992. XMVECTOR vResult = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
  3993. vResult = _mm_mul_ps(vResult,V2);
  3994. XMVECTOR vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
  3995. vResult = _mm_add_ps(vResult,vConstants);
  3996. vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
  3997. // Get V^4
  3998. XMVECTOR V4 = _mm_mul_ps(V2, V2);
  3999. vConstants = _mm_mul_ps(vConstants,V4);
  4000. vResult = _mm_add_ps(vResult,vConstants);
  4001. vConstants = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
  4002. // It's really V^6
  4003. V4 = _mm_mul_ps(V4,V2);
  4004. vConstants = _mm_mul_ps(vConstants,V4);
  4005. vResult = _mm_add_ps(vResult,vConstants);
  4006. return vResult;
  4007. #else // _XM_VMX128_INTRINSICS_
  4008. #endif // _XM_VMX128_INTRINSICS_
  4009. }
  4010. //------------------------------------------------------------------------------
  4011. XMFINLINE VOID XMVectorSinCosEst
  4012. (
  4013. XMVECTOR* pSin,
  4014. XMVECTOR* pCos,
  4015. FXMVECTOR V
  4016. )
  4017. {
  4018. #if defined(_XM_NO_INTRINSICS_)
  4019. XMVECTOR V2, V3, V4, V5, V6, V7;
  4020. XMVECTOR S1, S2, S3;
  4021. XMVECTOR C0, C1, C2, C3;
  4022. XMVECTOR Sin, Cos;
  4023. XMASSERT(pSin);
  4024. XMASSERT(pCos);
  4025. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  4026. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  4027. V2 = XMVectorMultiply(V, V);
  4028. V3 = XMVectorMultiply(V2, V);
  4029. V4 = XMVectorMultiply(V2, V2);
  4030. V5 = XMVectorMultiply(V3, V2);
  4031. V6 = XMVectorMultiply(V3, V3);
  4032. V7 = XMVectorMultiply(V4, V3);
  4033. S1 = XMVectorSplatY(g_XMSinEstCoefficients.v);
  4034. S2 = XMVectorSplatZ(g_XMSinEstCoefficients.v);
  4035. S3 = XMVectorSplatW(g_XMSinEstCoefficients.v);
  4036. C0 = XMVectorSplatX(g_XMCosEstCoefficients.v);
  4037. C1 = XMVectorSplatY(g_XMCosEstCoefficients.v);
  4038. C2 = XMVectorSplatZ(g_XMCosEstCoefficients.v);
  4039. C3 = XMVectorSplatW(g_XMCosEstCoefficients.v);
  4040. Sin = XMVectorMultiplyAdd(S1, V3, V);
  4041. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  4042. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  4043. Cos = XMVectorMultiplyAdd(C1, V2, C0);
  4044. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  4045. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  4046. *pSin = Sin;
  4047. *pCos = Cos;
  4048. #elif defined(_XM_SSE_INTRINSICS_)
  4049. XMASSERT(pSin);
  4050. XMASSERT(pCos);
  4051. XMVECTOR V2, V3, V4, V5, V6, V7;
  4052. XMVECTOR S1, S2, S3;
  4053. XMVECTOR C0, C1, C2, C3;
  4054. XMVECTOR Sin, Cos;
  4055. // sin(V) ~= V - V^3 / 3! + V^5 / 5! - V^7 / 7! (for -PI <= V < PI)
  4056. // cos(V) ~= 1 - V^2 / 2! + V^4 / 4! - V^6 / 6! (for -PI <= V < PI)
  4057. V2 = XMVectorMultiply(V, V);
  4058. V3 = XMVectorMultiply(V2, V);
  4059. V4 = XMVectorMultiply(V2, V2);
  4060. V5 = XMVectorMultiply(V3, V2);
  4061. V6 = XMVectorMultiply(V3, V3);
  4062. V7 = XMVectorMultiply(V4, V3);
  4063. S1 = _mm_load_ps1(&g_XMSinEstCoefficients.f[1]);
  4064. S2 = _mm_load_ps1(&g_XMSinEstCoefficients.f[2]);
  4065. S3 = _mm_load_ps1(&g_XMSinEstCoefficients.f[3]);
  4066. C0 = _mm_load_ps1(&g_XMCosEstCoefficients.f[0]);
  4067. C1 = _mm_load_ps1(&g_XMCosEstCoefficients.f[1]);
  4068. C2 = _mm_load_ps1(&g_XMCosEstCoefficients.f[2]);
  4069. C3 = _mm_load_ps1(&g_XMCosEstCoefficients.f[3]);
  4070. Sin = XMVectorMultiplyAdd(S1, V3, V);
  4071. Sin = XMVectorMultiplyAdd(S2, V5, Sin);
  4072. Sin = XMVectorMultiplyAdd(S3, V7, Sin);
  4073. Cos = XMVectorMultiplyAdd(C1, V2, C0);
  4074. Cos = XMVectorMultiplyAdd(C2, V4, Cos);
  4075. Cos = XMVectorMultiplyAdd(C3, V6, Cos);
  4076. *pSin = Sin;
  4077. *pCos = Cos;
  4078. #else // _XM_VMX128_INTRINSICS_
  4079. #endif // _XM_VMX128_INTRINSICS_
  4080. }
  4081. //------------------------------------------------------------------------------
  4082. XMFINLINE XMVECTOR XMVectorTanEst
  4083. (
  4084. FXMVECTOR V
  4085. )
  4086. {
  4087. #if defined(_XM_NO_INTRINSICS_)
  4088. XMVECTOR V1, V2, V1T0, V1T1, V2T2;
  4089. XMVECTOR T0, T1, T2;
  4090. XMVECTOR N, D;
  4091. XMVECTOR OneOverPi;
  4092. XMVECTOR Result;
  4093. OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v);
  4094. V1 = XMVectorMultiply(V, OneOverPi);
  4095. V1 = XMVectorRound(V1);
  4096. V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V);
  4097. T0 = XMVectorSplatX(g_XMTanEstCoefficients.v);
  4098. T1 = XMVectorSplatY(g_XMTanEstCoefficients.v);
  4099. T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v);
  4100. V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
  4101. V2 = XMVectorMultiply(V1, V1);
  4102. V1T0 = XMVectorMultiply(V1, T0);
  4103. V1T1 = XMVectorMultiply(V1, T1);
  4104. D = XMVectorReciprocalEst(V2T2);
  4105. N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
  4106. Result = XMVectorMultiply(N, D);
  4107. return Result;
  4108. #elif defined(_XM_SSE_INTRINSICS_)
  4109. XMVECTOR V1, V2, V1T0, V1T1, V2T2;
  4110. XMVECTOR T0, T1, T2;
  4111. XMVECTOR N, D;
  4112. XMVECTOR OneOverPi;
  4113. XMVECTOR Result;
  4114. OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients);
  4115. V1 = XMVectorMultiply(V, OneOverPi);
  4116. V1 = XMVectorRound(V1);
  4117. V1 = XMVectorNegativeMultiplySubtract(g_XMPi, V1, V);
  4118. T0 = XMVectorSplatX(g_XMTanEstCoefficients);
  4119. T1 = XMVectorSplatY(g_XMTanEstCoefficients);
  4120. T2 = XMVectorSplatZ(g_XMTanEstCoefficients);
  4121. V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2);
  4122. V2 = XMVectorMultiply(V1, V1);
  4123. V1T0 = XMVectorMultiply(V1, T0);
  4124. V1T1 = XMVectorMultiply(V1, T1);
  4125. D = XMVectorReciprocalEst(V2T2);
  4126. N = XMVectorMultiplyAdd(V2, V1T1, V1T0);
  4127. Result = XMVectorMultiply(N, D);
  4128. return Result;
  4129. #else // _XM_VMX128_INTRINSICS_
  4130. #endif // _XM_VMX128_INTRINSICS_
  4131. }
  4132. //------------------------------------------------------------------------------
  4133. XMFINLINE XMVECTOR XMVectorSinHEst
  4134. (
  4135. FXMVECTOR V
  4136. )
  4137. {
  4138. #if defined(_XM_NO_INTRINSICS_)
  4139. XMVECTOR V1, V2;
  4140. XMVECTOR E1, E2;
  4141. XMVECTOR Result;
  4142. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4143. V1 = XMVectorMultiplyAdd(V, Scale.v, g_XMNegativeOne.v);
  4144. V2 = XMVectorNegativeMultiplySubtract(V, Scale.v, g_XMNegativeOne.v);
  4145. E1 = XMVectorExpEst(V1);
  4146. E2 = XMVectorExpEst(V2);
  4147. Result = XMVectorSubtract(E1, E2);
  4148. return Result;
  4149. #elif defined(_XM_SSE_INTRINSICS_)
  4150. XMVECTOR V1, V2;
  4151. XMVECTOR E1, E2;
  4152. XMVECTOR Result;
  4153. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4154. V1 = _mm_mul_ps(V,Scale);
  4155. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  4156. V2 = _mm_mul_ps(V,Scale);
  4157. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  4158. E1 = XMVectorExpEst(V1);
  4159. E2 = XMVectorExpEst(V2);
  4160. Result = _mm_sub_ps(E1, E2);
  4161. return Result;
  4162. #else // _XM_VMX128_INTRINSICS_
  4163. #endif // _XM_VMX128_INTRINSICS_
  4164. }
  4165. //------------------------------------------------------------------------------
  4166. XMFINLINE XMVECTOR XMVectorCosHEst
  4167. (
  4168. FXMVECTOR V
  4169. )
  4170. {
  4171. #if defined(_XM_NO_INTRINSICS_)
  4172. XMVECTOR V1, V2;
  4173. XMVECTOR E1, E2;
  4174. XMVECTOR Result;
  4175. static CONST XMVECTOR Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4176. V1 = XMVectorMultiplyAdd(V, Scale, g_XMNegativeOne.v);
  4177. V2 = XMVectorNegativeMultiplySubtract(V, Scale, g_XMNegativeOne.v);
  4178. E1 = XMVectorExpEst(V1);
  4179. E2 = XMVectorExpEst(V2);
  4180. Result = XMVectorAdd(E1, E2);
  4181. return Result;
  4182. #elif defined(_XM_SSE_INTRINSICS_)
  4183. XMVECTOR V1, V2;
  4184. XMVECTOR E1, E2;
  4185. XMVECTOR Result;
  4186. static CONST XMVECTORF32 Scale = {1.442695040888963f, 1.442695040888963f, 1.442695040888963f, 1.442695040888963f}; // 1.0f / ln(2.0f)
  4187. V1 = _mm_mul_ps(V,Scale);
  4188. V1 = _mm_add_ps(V1,g_XMNegativeOne);
  4189. V2 = _mm_mul_ps(V, Scale);
  4190. V2 = _mm_sub_ps(g_XMNegativeOne,V2);
  4191. E1 = XMVectorExpEst(V1);
  4192. E2 = XMVectorExpEst(V2);
  4193. Result = _mm_add_ps(E1, E2);
  4194. return Result;
  4195. #else // _XM_VMX128_INTRINSICS_
  4196. #endif // _XM_VMX128_INTRINSICS_
  4197. }
  4198. //------------------------------------------------------------------------------
  4199. XMFINLINE XMVECTOR XMVectorTanHEst
  4200. (
  4201. FXMVECTOR V
  4202. )
  4203. {
  4204. #if defined(_XM_NO_INTRINSICS_)
  4205. XMVECTOR E;
  4206. XMVECTOR Result;
  4207. static CONST XMVECTOR Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  4208. E = XMVectorMultiply(V, Scale);
  4209. E = XMVectorExpEst(E);
  4210. E = XMVectorMultiplyAdd(E, g_XMOneHalf.v, g_XMOneHalf.v);
  4211. E = XMVectorReciprocalEst(E);
  4212. Result = XMVectorSubtract(g_XMOne.v, E);
  4213. return Result;
  4214. #elif defined(_XM_SSE_INTRINSICS_)
  4215. static CONST XMVECTORF32 Scale = {2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f}; // 2.0f / ln(2.0f)
  4216. XMVECTOR E = _mm_mul_ps(V, Scale);
  4217. E = XMVectorExpEst(E);
  4218. E = _mm_mul_ps(E,g_XMOneHalf);
  4219. E = _mm_add_ps(E,g_XMOneHalf);
  4220. E = XMVectorReciprocalEst(E);
  4221. E = _mm_sub_ps(g_XMOne, E);
  4222. return E;
  4223. #else // _XM_VMX128_INTRINSICS_
  4224. #endif // _XM_VMX128_INTRINSICS_
  4225. }
  4226. //------------------------------------------------------------------------------
  4227. XMFINLINE XMVECTOR XMVectorASinEst
  4228. (
  4229. FXMVECTOR V
  4230. )
  4231. {
  4232. #if defined(_XM_NO_INTRINSICS_)
  4233. XMVECTOR AbsV, V2, VD, VC0, V2C3;
  4234. XMVECTOR C0, C1, C2, C3;
  4235. XMVECTOR D, Rsq, SqrtD;
  4236. XMVECTOR OnePlusEps;
  4237. XMVECTOR Result;
  4238. AbsV = XMVectorAbs(V);
  4239. OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
  4240. C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
  4241. C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
  4242. C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
  4243. C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
  4244. D = XMVectorSubtract(OnePlusEps, AbsV);
  4245. Rsq = XMVectorReciprocalSqrtEst(D);
  4246. SqrtD = XMVectorMultiply(D, Rsq);
  4247. V2 = XMVectorMultiply(V, AbsV);
  4248. V2C3 = XMVectorMultiply(V2, C3);
  4249. VD = XMVectorMultiply(D, AbsV);
  4250. VC0 = XMVectorMultiply(V, C0);
  4251. Result = XMVectorMultiply(V, C1);
  4252. Result = XMVectorMultiplyAdd(V2, C2, Result);
  4253. Result = XMVectorMultiplyAdd(V2C3, VD, Result);
  4254. Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
  4255. return Result;
  4256. #elif defined(_XM_SSE_INTRINSICS_)
  4257. // Get abs(V)
  4258. XMVECTOR vAbsV = _mm_setzero_ps();
  4259. vAbsV = _mm_sub_ps(vAbsV,V);
  4260. vAbsV = _mm_max_ps(vAbsV,V);
  4261. XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
  4262. D = _mm_sub_ps(D,vAbsV);
  4263. // Since this is an estimate, rqsrt is okay
  4264. XMVECTOR vConstants = _mm_rsqrt_ps(D);
  4265. XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
  4266. // V2 = V^2 retaining sign
  4267. XMVECTOR V2 = _mm_mul_ps(V,vAbsV);
  4268. D = _mm_mul_ps(D,vAbsV);
  4269. XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
  4270. vResult = _mm_mul_ps(vResult,V);
  4271. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
  4272. vConstants = _mm_mul_ps(vConstants,V2);
  4273. vResult = _mm_add_ps(vResult,vConstants);
  4274. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
  4275. vConstants = _mm_mul_ps(vConstants,V2);
  4276. vConstants = _mm_mul_ps(vConstants,D);
  4277. vResult = _mm_add_ps(vResult,vConstants);
  4278. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
  4279. vConstants = _mm_mul_ps(vConstants,V);
  4280. vConstants = _mm_mul_ps(vConstants,SqrtD);
  4281. vResult = _mm_add_ps(vResult,vConstants);
  4282. return vResult;
  4283. #else // _XM_VMX128_INTRINSICS_
  4284. #endif // _XM_VMX128_INTRINSICS_
  4285. }
  4286. //------------------------------------------------------------------------------
  4287. XMFINLINE XMVECTOR XMVectorACosEst
  4288. (
  4289. FXMVECTOR V
  4290. )
  4291. {
  4292. #if defined(_XM_NO_INTRINSICS_)
  4293. XMVECTOR AbsV, V2, VD, VC0, V2C3;
  4294. XMVECTOR C0, C1, C2, C3;
  4295. XMVECTOR D, Rsq, SqrtD;
  4296. XMVECTOR OnePlusEps, HalfPi;
  4297. XMVECTOR Result;
  4298. // acos(V) = PI / 2 - asin(V)
  4299. AbsV = XMVectorAbs(V);
  4300. OnePlusEps = XMVectorSplatX(g_XMASinEstConstants.v);
  4301. HalfPi = XMVectorSplatY(g_XMASinEstConstants.v);
  4302. C0 = XMVectorSplatX(g_XMASinEstCoefficients.v);
  4303. C1 = XMVectorSplatY(g_XMASinEstCoefficients.v);
  4304. C2 = XMVectorSplatZ(g_XMASinEstCoefficients.v);
  4305. C3 = XMVectorSplatW(g_XMASinEstCoefficients.v);
  4306. D = XMVectorSubtract(OnePlusEps, AbsV);
  4307. Rsq = XMVectorReciprocalSqrtEst(D);
  4308. SqrtD = XMVectorMultiply(D, Rsq);
  4309. V2 = XMVectorMultiply(V, AbsV);
  4310. V2C3 = XMVectorMultiply(V2, C3);
  4311. VD = XMVectorMultiply(D, AbsV);
  4312. VC0 = XMVectorMultiply(V, C0);
  4313. Result = XMVectorMultiply(V, C1);
  4314. Result = XMVectorMultiplyAdd(V2, C2, Result);
  4315. Result = XMVectorMultiplyAdd(V2C3, VD, Result);
  4316. Result = XMVectorMultiplyAdd(VC0, SqrtD, Result);
  4317. Result = XMVectorSubtract(HalfPi, Result);
  4318. return Result;
  4319. #elif defined(_XM_SSE_INTRINSICS_)
  4320. // acos(V) = PI / 2 - asin(V)
  4321. // Get abs(V)
  4322. XMVECTOR vAbsV = _mm_setzero_ps();
  4323. vAbsV = _mm_sub_ps(vAbsV,V);
  4324. vAbsV = _mm_max_ps(vAbsV,V);
  4325. // Calc D
  4326. XMVECTOR D = _mm_load_ps1(&g_XMASinEstConstants.f[0]);
  4327. D = _mm_sub_ps(D,vAbsV);
  4328. // SqrtD = sqrt(D-abs(V)) estimated
  4329. XMVECTOR vConstants = _mm_rsqrt_ps(D);
  4330. XMVECTOR SqrtD = _mm_mul_ps(D,vConstants);
  4331. // V2 = V^2 while retaining sign
  4332. XMVECTOR V2 = _mm_mul_ps(V, vAbsV);
  4333. // Drop vAbsV here. D = (Const-abs(V))*abs(V)
  4334. D = _mm_mul_ps(D, vAbsV);
  4335. XMVECTOR vResult = _mm_load_ps1(&g_XMASinEstCoefficients.f[1]);
  4336. vResult = _mm_mul_ps(vResult,V);
  4337. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[2]);
  4338. vConstants = _mm_mul_ps(vConstants,V2);
  4339. vResult = _mm_add_ps(vResult,vConstants);
  4340. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[3]);
  4341. vConstants = _mm_mul_ps(vConstants,V2);
  4342. vConstants = _mm_mul_ps(vConstants,D);
  4343. vResult = _mm_add_ps(vResult,vConstants);
  4344. vConstants = _mm_load_ps1(&g_XMASinEstCoefficients.f[0]);
  4345. vConstants = _mm_mul_ps(vConstants,V);
  4346. vConstants = _mm_mul_ps(vConstants,SqrtD);
  4347. vResult = _mm_add_ps(vResult,vConstants);
  4348. vConstants = _mm_load_ps1(&g_XMASinEstConstants.f[1]);
  4349. vResult = _mm_sub_ps(vConstants,vResult);
  4350. return vResult;
  4351. #else // _XM_VMX128_INTRINSICS_
  4352. #endif // _XM_VMX128_INTRINSICS_
  4353. }
  4354. //------------------------------------------------------------------------------
  4355. XMFINLINE XMVECTOR XMVectorATanEst
  4356. (
  4357. FXMVECTOR V
  4358. )
  4359. {
  4360. #if defined(_XM_NO_INTRINSICS_)
  4361. XMVECTOR AbsV, V2S2, N, D;
  4362. XMVECTOR S0, S1, S2;
  4363. XMVECTOR HalfPi;
  4364. XMVECTOR Result;
  4365. S0 = XMVectorSplatX(g_XMATanEstCoefficients.v);
  4366. S1 = XMVectorSplatY(g_XMATanEstCoefficients.v);
  4367. S2 = XMVectorSplatZ(g_XMATanEstCoefficients.v);
  4368. HalfPi = XMVectorSplatW(g_XMATanEstCoefficients.v);
  4369. AbsV = XMVectorAbs(V);
  4370. V2S2 = XMVectorMultiplyAdd(V, V, S2);
  4371. N = XMVectorMultiplyAdd(AbsV, HalfPi, S0);
  4372. D = XMVectorMultiplyAdd(AbsV, S1, V2S2);
  4373. N = XMVectorMultiply(N, V);
  4374. D = XMVectorReciprocalEst(D);
  4375. Result = XMVectorMultiply(N, D);
  4376. return Result;
  4377. #elif defined(_XM_SSE_INTRINSICS_)
  4378. // Get abs(V)
  4379. XMVECTOR vAbsV = _mm_setzero_ps();
  4380. vAbsV = _mm_sub_ps(vAbsV,V);
  4381. vAbsV = _mm_max_ps(vAbsV,V);
  4382. XMVECTOR vResult = _mm_load_ps1(&g_XMATanEstCoefficients.f[3]);
  4383. vResult = _mm_mul_ps(vResult,vAbsV);
  4384. XMVECTOR vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[0]);
  4385. vResult = _mm_add_ps(vResult,vConstants);
  4386. vResult = _mm_mul_ps(vResult,V);
  4387. XMVECTOR D = _mm_mul_ps(V,V);
  4388. vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[2]);
  4389. D = _mm_add_ps(D,vConstants);
  4390. vConstants = _mm_load_ps1(&g_XMATanEstCoefficients.f[1]);
  4391. vConstants = _mm_mul_ps(vConstants,vAbsV);
  4392. D = _mm_add_ps(D,vConstants);
  4393. vResult = _mm_div_ps(vResult,D);
  4394. return vResult;
  4395. #else // _XM_VMX128_INTRINSICS_
  4396. #endif // _XM_VMX128_INTRINSICS_
  4397. }
  4398. //------------------------------------------------------------------------------
  4399. XMFINLINE XMVECTOR XMVectorATan2Est
  4400. (
  4401. FXMVECTOR Y,
  4402. FXMVECTOR X
  4403. )
  4404. {
  4405. #if defined(_XM_NO_INTRINSICS_)
  4406. XMVECTOR Reciprocal;
  4407. XMVECTOR V;
  4408. XMVECTOR YSign;
  4409. XMVECTOR Pi, PiOverTwo, PiOverFour, ThreePiOverFour;
  4410. XMVECTOR YEqualsZero, XEqualsZero, XIsPositive, YEqualsInfinity, XEqualsInfinity;
  4411. XMVECTOR ATanResultValid;
  4412. XMVECTOR R0, R1, R2, R3, R4, R5;
  4413. XMVECTOR Zero;
  4414. XMVECTOR Result;
  4415. static CONST XMVECTOR ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  4416. Zero = XMVectorZero();
  4417. ATanResultValid = XMVectorTrueInt();
  4418. Pi = XMVectorSplatX(ATan2Constants);
  4419. PiOverTwo = XMVectorSplatY(ATan2Constants);
  4420. PiOverFour = XMVectorSplatZ(ATan2Constants);
  4421. ThreePiOverFour = XMVectorSplatW(ATan2Constants);
  4422. YEqualsZero = XMVectorEqual(Y, Zero);
  4423. XEqualsZero = XMVectorEqual(X, Zero);
  4424. XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v);
  4425. XIsPositive = XMVectorEqualInt(XIsPositive, Zero);
  4426. YEqualsInfinity = XMVectorIsInfinite(Y);
  4427. XEqualsInfinity = XMVectorIsInfinite(X);
  4428. YSign = XMVectorAndInt(Y, g_XMNegativeZero.v);
  4429. Pi = XMVectorOrInt(Pi, YSign);
  4430. PiOverTwo = XMVectorOrInt(PiOverTwo, YSign);
  4431. PiOverFour = XMVectorOrInt(PiOverFour, YSign);
  4432. ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign);
  4433. R1 = XMVectorSelect(Pi, YSign, XIsPositive);
  4434. R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero);
  4435. R3 = XMVectorSelect(R2, R1, YEqualsZero);
  4436. R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  4437. R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity);
  4438. Result = XMVectorSelect(R3, R5, YEqualsInfinity);
  4439. ATanResultValid = XMVectorEqualInt(Result, ATanResultValid);
  4440. Reciprocal = XMVectorReciprocalEst(X);
  4441. V = XMVectorMultiply(Y, Reciprocal);
  4442. R0 = XMVectorATanEst(V);
  4443. R1 = XMVectorSelect( Pi, Zero, XIsPositive );
  4444. R2 = XMVectorAdd(R0, R1);
  4445. Result = XMVectorSelect(Result, R2, ATanResultValid);
  4446. return Result;
  4447. #elif defined(_XM_SSE_INTRINSICS_)
  4448. static CONST XMVECTORF32 ATan2Constants = {XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f};
  4449. // Mask if Y>0 && Y!=INF
  4450. XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y);
  4451. // Get the sign of (Y&0x80000000)
  4452. XMVECTOR YSign = _mm_and_ps(Y, g_XMNegativeZero);
  4453. // Get the sign bits of X
  4454. XMVECTOR XIsPositive = _mm_and_ps(X,g_XMNegativeZero);
  4455. // Change them to masks
  4456. XIsPositive = XMVectorEqualInt(XIsPositive,g_XMZero);
  4457. // Get Pi
  4458. XMVECTOR Pi = _mm_load_ps1(&ATan2Constants.f[0]);
  4459. // Copy the sign of Y
  4460. Pi = _mm_or_ps(Pi,YSign);
  4461. XMVECTOR R1 = XMVectorSelect(Pi,YSign,XIsPositive);
  4462. // Mask for X==0
  4463. XMVECTOR vConstants = _mm_cmpeq_ps(X,g_XMZero);
  4464. // Get Pi/2 with with sign of Y
  4465. XMVECTOR PiOverTwo = _mm_load_ps1(&ATan2Constants.f[1]);
  4466. PiOverTwo = _mm_or_ps(PiOverTwo,YSign);
  4467. XMVECTOR R2 = XMVectorSelect(g_XMNegOneMask,PiOverTwo,vConstants);
  4468. // Mask for Y==0
  4469. vConstants = _mm_cmpeq_ps(Y,g_XMZero);
  4470. R2 = XMVectorSelect(R2,R1,vConstants);
  4471. // Get Pi/4 with sign of Y
  4472. XMVECTOR PiOverFour = _mm_load_ps1(&ATan2Constants.f[2]);
  4473. PiOverFour = _mm_or_ps(PiOverFour,YSign);
  4474. // Get (Pi*3)/4 with sign of Y
  4475. XMVECTOR ThreePiOverFour = _mm_load_ps1(&ATan2Constants.f[3]);
  4476. ThreePiOverFour = _mm_or_ps(ThreePiOverFour,YSign);
  4477. vConstants = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive);
  4478. XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X);
  4479. vConstants = XMVectorSelect(PiOverTwo,vConstants,XEqualsInfinity);
  4480. XMVECTOR vResult = XMVectorSelect(R2,vConstants,YEqualsInfinity);
  4481. vConstants = XMVectorSelect(R1,vResult,YEqualsInfinity);
  4482. // At this point, any entry that's zero will get the result
  4483. // from XMVectorATan(), otherwise, return the failsafe value
  4484. vResult = XMVectorSelect(vResult,vConstants,XEqualsInfinity);
  4485. // Any entries not 0xFFFFFFFF, are considered precalculated
  4486. XMVECTOR ATanResultValid = XMVectorEqualInt(vResult,g_XMNegOneMask);
  4487. // Let's do the ATan2 function
  4488. XMVECTOR Reciprocal = _mm_rcp_ps(X);
  4489. vConstants = _mm_mul_ps(Y, Reciprocal);
  4490. vConstants = XMVectorATanEst(vConstants);
  4491. // Discard entries that have been declared void
  4492. XMVECTOR R3 = XMVectorSelect( Pi, g_XMZero, XIsPositive );
  4493. vConstants = _mm_add_ps( vConstants, R3 );
  4494. vResult = XMVectorSelect(vResult,vConstants,ATanResultValid);
  4495. return vResult;
  4496. #else // _XM_VMX128_INTRINSICS_
  4497. #endif // _XM_VMX128_INTRINSICS_
  4498. }
  4499. //------------------------------------------------------------------------------
  4500. XMFINLINE XMVECTOR XMVectorLerp
  4501. (
  4502. FXMVECTOR V0,
  4503. FXMVECTOR V1,
  4504. FLOAT t
  4505. )
  4506. {
  4507. #if defined(_XM_NO_INTRINSICS_)
  4508. XMVECTOR Scale;
  4509. XMVECTOR Length;
  4510. XMVECTOR Result;
  4511. // V0 + t * (V1 - V0)
  4512. Scale = XMVectorReplicate(t);
  4513. Length = XMVectorSubtract(V1, V0);
  4514. Result = XMVectorMultiplyAdd(Length, Scale, V0);
  4515. return Result;
  4516. #elif defined(_XM_SSE_INTRINSICS_)
  4517. XMVECTOR L, S;
  4518. XMVECTOR Result;
  4519. L = _mm_sub_ps( V1, V0 );
  4520. S = _mm_set_ps1( t );
  4521. Result = _mm_mul_ps( L, S );
  4522. return _mm_add_ps( Result, V0 );
  4523. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4524. #endif // _XM_VMX128_INTRINSICS_
  4525. }
  4526. //------------------------------------------------------------------------------
  4527. XMFINLINE XMVECTOR XMVectorLerpV
  4528. (
  4529. FXMVECTOR V0,
  4530. FXMVECTOR V1,
  4531. FXMVECTOR T
  4532. )
  4533. {
  4534. #if defined(_XM_NO_INTRINSICS_)
  4535. XMVECTOR Length;
  4536. XMVECTOR Result;
  4537. // V0 + T * (V1 - V0)
  4538. Length = XMVectorSubtract(V1, V0);
  4539. Result = XMVectorMultiplyAdd(Length, T, V0);
  4540. return Result;
  4541. #elif defined(_XM_SSE_INTRINSICS_)
  4542. XMVECTOR Length;
  4543. XMVECTOR Result;
  4544. Length = _mm_sub_ps( V1, V0 );
  4545. Result = _mm_mul_ps( Length, T );
  4546. return _mm_add_ps( Result, V0 );
  4547. #else // _XM_VMX128_INTRINSICS_
  4548. #endif // _XM_VMX128_INTRINSICS_
  4549. }
  4550. //------------------------------------------------------------------------------
  4551. XMFINLINE XMVECTOR XMVectorHermite
  4552. (
  4553. FXMVECTOR Position0,
  4554. FXMVECTOR Tangent0,
  4555. FXMVECTOR Position1,
  4556. CXMVECTOR Tangent1,
  4557. FLOAT t
  4558. )
  4559. {
  4560. #if defined(_XM_NO_INTRINSICS_)
  4561. XMVECTOR P0;
  4562. XMVECTOR T0;
  4563. XMVECTOR P1;
  4564. XMVECTOR T1;
  4565. XMVECTOR Result;
  4566. FLOAT t2;
  4567. FLOAT t3;
  4568. // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
  4569. // (t^3 - 2 * t^2 + t) * Tangent0 +
  4570. // (-2 * t^3 + 3 * t^2) * Position1 +
  4571. // (t^3 - t^2) * Tangent1
  4572. t2 = t * t;
  4573. t3 = t * t2;
  4574. P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f);
  4575. T0 = XMVectorReplicate(t3 - 2.0f * t2 + t);
  4576. P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2);
  4577. T1 = XMVectorReplicate(t3 - t2);
  4578. Result = XMVectorMultiply(P0, Position0);
  4579. Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
  4580. Result = XMVectorMultiplyAdd(P1, Position1, Result);
  4581. Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
  4582. return Result;
  4583. #elif defined(_XM_SSE_INTRINSICS_)
  4584. FLOAT t2 = t * t;
  4585. FLOAT t3 = t * t2;
  4586. XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f);
  4587. XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t);
  4588. XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2);
  4589. XMVECTOR T1 = _mm_set_ps1(t3 - t2);
  4590. XMVECTOR vResult = _mm_mul_ps(P0, Position0);
  4591. XMVECTOR vTemp = _mm_mul_ps(T0, Tangent0);
  4592. vResult = _mm_add_ps(vResult,vTemp);
  4593. vTemp = _mm_mul_ps(P1, Position1);
  4594. vResult = _mm_add_ps(vResult,vTemp);
  4595. vTemp = _mm_mul_ps(T1, Tangent1);
  4596. vResult = _mm_add_ps(vResult,vTemp);
  4597. return vResult;
  4598. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4599. #endif // _XM_VMX128_INTRINSICS_
  4600. }
  4601. //------------------------------------------------------------------------------
  4602. XMFINLINE XMVECTOR XMVectorHermiteV
  4603. (
  4604. FXMVECTOR Position0,
  4605. FXMVECTOR Tangent0,
  4606. FXMVECTOR Position1,
  4607. CXMVECTOR Tangent1,
  4608. CXMVECTOR T
  4609. )
  4610. {
  4611. #if defined(_XM_NO_INTRINSICS_)
  4612. XMVECTOR P0;
  4613. XMVECTOR T0;
  4614. XMVECTOR P1;
  4615. XMVECTOR T1;
  4616. XMVECTOR Result;
  4617. XMVECTOR T2;
  4618. XMVECTOR T3;
  4619. // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
  4620. // (t^3 - 2 * t^2 + t) * Tangent0 +
  4621. // (-2 * t^3 + 3 * t^2) * Position1 +
  4622. // (t^3 - t^2) * Tangent1
  4623. T2 = XMVectorMultiply(T, T);
  4624. T3 = XMVectorMultiply(T , T2);
  4625. P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - 3.0f * T2.vector4_f32[0] + 1.0f);
  4626. T0 = XMVectorReplicate(T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]);
  4627. P1 = XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]);
  4628. T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]);
  4629. Result = XMVectorMultiply(P0, Position0);
  4630. Result = XMVectorMultiplyAdd(T0, Tangent0, Result);
  4631. Result = XMVectorMultiplyAdd(P1, Position1, Result);
  4632. Result = XMVectorMultiplyAdd(T1, Tangent1, Result);
  4633. return Result;
  4634. #elif defined(_XM_SSE_INTRINSICS_)
  4635. static const XMVECTORF32 CatMulT2 = {-3.0f,-2.0f,3.0f,-1.0f};
  4636. static const XMVECTORF32 CatMulT3 = {2.0f,1.0f,-2.0f,1.0f};
  4637. // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 +
  4638. // (t^3 - 2 * t^2 + t) * Tangent0 +
  4639. // (-2 * t^3 + 3 * t^2) * Position1 +
  4640. // (t^3 - t^2) * Tangent1
  4641. XMVECTOR T2 = _mm_mul_ps(T,T);
  4642. XMVECTOR T3 = _mm_mul_ps(T,T2);
  4643. // Mul by the constants against t^2
  4644. T2 = _mm_mul_ps(T2,CatMulT2);
  4645. // Mul by the constants against t^3
  4646. T3 = _mm_mul_ps(T3,CatMulT3);
  4647. // T3 now has the pre-result.
  4648. T3 = _mm_add_ps(T3,T2);
  4649. // I need to add t.y only
  4650. T2 = _mm_and_ps(T,g_XMMaskY);
  4651. T3 = _mm_add_ps(T3,T2);
  4652. // Add 1.0f to x
  4653. T3 = _mm_add_ps(T3,g_XMIdentityR0);
  4654. // Now, I have the constants created
  4655. // Mul the x constant to Position0
  4656. XMVECTOR vResult = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(0,0,0,0));
  4657. vResult = _mm_mul_ps(vResult,Position0);
  4658. // Mul the y constant to Tangent0
  4659. T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(1,1,1,1));
  4660. T2 = _mm_mul_ps(T2,Tangent0);
  4661. vResult = _mm_add_ps(vResult,T2);
  4662. // Mul the z constant to Position1
  4663. T2 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(2,2,2,2));
  4664. T2 = _mm_mul_ps(T2,Position1);
  4665. vResult = _mm_add_ps(vResult,T2);
  4666. // Mul the w constant to Tangent1
  4667. T3 = _mm_shuffle_ps(T3,T3,_MM_SHUFFLE(3,3,3,3));
  4668. T3 = _mm_mul_ps(T3,Tangent1);
  4669. vResult = _mm_add_ps(vResult,T3);
  4670. return vResult;
  4671. #else // _XM_VMX128_INTRINSICS_
  4672. #endif // _XM_VMX128_INTRINSICS_
  4673. }
  4674. //------------------------------------------------------------------------------
  4675. XMFINLINE XMVECTOR XMVectorCatmullRom
  4676. (
  4677. FXMVECTOR Position0,
  4678. FXMVECTOR Position1,
  4679. FXMVECTOR Position2,
  4680. CXMVECTOR Position3,
  4681. FLOAT t
  4682. )
  4683. {
  4684. #if defined(_XM_NO_INTRINSICS_)
  4685. XMVECTOR P0;
  4686. XMVECTOR P1;
  4687. XMVECTOR P2;
  4688. XMVECTOR P3;
  4689. XMVECTOR Result;
  4690. FLOAT t2;
  4691. FLOAT t3;
  4692. // Result = ((-t^3 + 2 * t^2 - t) * Position0 +
  4693. // (3 * t^3 - 5 * t^2 + 2) * Position1 +
  4694. // (-3 * t^3 + 4 * t^2 + t) * Position2 +
  4695. // (t^3 - t^2) * Position3) * 0.5
  4696. t2 = t * t;
  4697. t3 = t * t2;
  4698. P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f);
  4699. P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
  4700. P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
  4701. P3 = XMVectorReplicate((t3 - t2) * 0.5f);
  4702. Result = XMVectorMultiply(P0, Position0);
  4703. Result = XMVectorMultiplyAdd(P1, Position1, Result);
  4704. Result = XMVectorMultiplyAdd(P2, Position2, Result);
  4705. Result = XMVectorMultiplyAdd(P3, Position3, Result);
  4706. return Result;
  4707. #elif defined(_XM_SSE_INTRINSICS_)
  4708. FLOAT t2 = t * t;
  4709. FLOAT t3 = t * t2;
  4710. XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f);
  4711. XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f);
  4712. XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f);
  4713. XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f);
  4714. P0 = _mm_mul_ps(P0, Position0);
  4715. P1 = _mm_mul_ps(P1, Position1);
  4716. P2 = _mm_mul_ps(P2, Position2);
  4717. P3 = _mm_mul_ps(P3, Position3);
  4718. P0 = _mm_add_ps(P0,P1);
  4719. P2 = _mm_add_ps(P2,P3);
  4720. P0 = _mm_add_ps(P0,P2);
  4721. return P0;
  4722. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4723. #endif // _XM_VMX128_INTRINSICS_
  4724. }
  4725. //------------------------------------------------------------------------------
  4726. XMFINLINE XMVECTOR XMVectorCatmullRomV
  4727. (
  4728. FXMVECTOR Position0,
  4729. FXMVECTOR Position1,
  4730. FXMVECTOR Position2,
  4731. CXMVECTOR Position3,
  4732. CXMVECTOR T
  4733. )
  4734. {
  4735. #if defined(_XM_NO_INTRINSICS_)
  4736. float fx = T.vector4_f32[0];
  4737. float fy = T.vector4_f32[1];
  4738. float fz = T.vector4_f32[2];
  4739. float fw = T.vector4_f32[3];
  4740. XMVECTOR vResult = {
  4741. 0.5f*((-fx*fx*fx+2*fx*fx-fx)*Position0.vector4_f32[0]+
  4742. (3*fx*fx*fx-5*fx*fx+2)*Position1.vector4_f32[0]+
  4743. (-3*fx*fx*fx+4*fx*fx+fx)*Position2.vector4_f32[0]+
  4744. (fx*fx*fx-fx*fx)*Position3.vector4_f32[0]),
  4745. 0.5f*((-fy*fy*fy+2*fy*fy-fy)*Position0.vector4_f32[1]+
  4746. (3*fy*fy*fy-5*fy*fy+2)*Position1.vector4_f32[1]+
  4747. (-3*fy*fy*fy+4*fy*fy+fy)*Position2.vector4_f32[1]+
  4748. (fy*fy*fy-fy*fy)*Position3.vector4_f32[1]),
  4749. 0.5f*((-fz*fz*fz+2*fz*fz-fz)*Position0.vector4_f32[2]+
  4750. (3*fz*fz*fz-5*fz*fz+2)*Position1.vector4_f32[2]+
  4751. (-3*fz*fz*fz+4*fz*fz+fz)*Position2.vector4_f32[2]+
  4752. (fz*fz*fz-fz*fz)*Position3.vector4_f32[2]),
  4753. 0.5f*((-fw*fw*fw+2*fw*fw-fw)*Position0.vector4_f32[3]+
  4754. (3*fw*fw*fw-5*fw*fw+2)*Position1.vector4_f32[3]+
  4755. (-3*fw*fw*fw+4*fw*fw+fw)*Position2.vector4_f32[3]+
  4756. (fw*fw*fw-fw*fw)*Position3.vector4_f32[3])
  4757. };
  4758. return vResult;
  4759. #elif defined(_XM_SSE_INTRINSICS_)
  4760. static const XMVECTORF32 Catmul2 = {2.0f,2.0f,2.0f,2.0f};
  4761. static const XMVECTORF32 Catmul3 = {3.0f,3.0f,3.0f,3.0f};
  4762. static const XMVECTORF32 Catmul4 = {4.0f,4.0f,4.0f,4.0f};
  4763. static const XMVECTORF32 Catmul5 = {5.0f,5.0f,5.0f,5.0f};
  4764. // Cache T^2 and T^3
  4765. XMVECTOR T2 = _mm_mul_ps(T,T);
  4766. XMVECTOR T3 = _mm_mul_ps(T,T2);
  4767. // Perform the Position0 term
  4768. XMVECTOR vResult = _mm_add_ps(T2,T2);
  4769. vResult = _mm_sub_ps(vResult,T);
  4770. vResult = _mm_sub_ps(vResult,T3);
  4771. vResult = _mm_mul_ps(vResult,Position0);
  4772. // Perform the Position1 term and add
  4773. XMVECTOR vTemp = _mm_mul_ps(T3,Catmul3);
  4774. XMVECTOR vTemp2 = _mm_mul_ps(T2,Catmul5);
  4775. vTemp = _mm_sub_ps(vTemp,vTemp2);
  4776. vTemp = _mm_add_ps(vTemp,Catmul2);
  4777. vTemp = _mm_mul_ps(vTemp,Position1);
  4778. vResult = _mm_add_ps(vResult,vTemp);
  4779. // Perform the Position2 term and add
  4780. vTemp = _mm_mul_ps(T2,Catmul4);
  4781. vTemp2 = _mm_mul_ps(T3,Catmul3);
  4782. vTemp = _mm_sub_ps(vTemp,vTemp2);
  4783. vTemp = _mm_add_ps(vTemp,T);
  4784. vTemp = _mm_mul_ps(vTemp,Position2);
  4785. vResult = _mm_add_ps(vResult,vTemp);
  4786. // Position3 is the last term
  4787. T3 = _mm_sub_ps(T3,T2);
  4788. T3 = _mm_mul_ps(T3,Position3);
  4789. vResult = _mm_add_ps(vResult,T3);
  4790. // Multiply by 0.5f and exit
  4791. vResult = _mm_mul_ps(vResult,g_XMOneHalf);
  4792. return vResult;
  4793. #else // _XM_VMX128_INTRINSICS_
  4794. #endif // _XM_VMX128_INTRINSICS_
  4795. }
  4796. //------------------------------------------------------------------------------
  4797. XMFINLINE XMVECTOR XMVectorBaryCentric
  4798. (
  4799. FXMVECTOR Position0,
  4800. FXMVECTOR Position1,
  4801. FXMVECTOR Position2,
  4802. FLOAT f,
  4803. FLOAT g
  4804. )
  4805. {
  4806. #if defined(_XM_NO_INTRINSICS_)
  4807. // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
  4808. XMVECTOR P10;
  4809. XMVECTOR P20;
  4810. XMVECTOR ScaleF;
  4811. XMVECTOR ScaleG;
  4812. XMVECTOR Result;
  4813. P10 = XMVectorSubtract(Position1, Position0);
  4814. ScaleF = XMVectorReplicate(f);
  4815. P20 = XMVectorSubtract(Position2, Position0);
  4816. ScaleG = XMVectorReplicate(g);
  4817. Result = XMVectorMultiplyAdd(P10, ScaleF, Position0);
  4818. Result = XMVectorMultiplyAdd(P20, ScaleG, Result);
  4819. return Result;
  4820. #elif defined(_XM_SSE_INTRINSICS_)
  4821. XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
  4822. XMVECTOR SF = _mm_set_ps1(f);
  4823. XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
  4824. XMVECTOR SG = _mm_set_ps1(g);
  4825. R1 = _mm_mul_ps(R1,SF);
  4826. R2 = _mm_mul_ps(R2,SG);
  4827. R1 = _mm_add_ps(R1,Position0);
  4828. R1 = _mm_add_ps(R1,R2);
  4829. return R1;
  4830. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  4831. #endif // _XM_VMX128_INTRINSICS_
  4832. }
  4833. //------------------------------------------------------------------------------
  4834. XMFINLINE XMVECTOR XMVectorBaryCentricV
  4835. (
  4836. FXMVECTOR Position0,
  4837. FXMVECTOR Position1,
  4838. FXMVECTOR Position2,
  4839. CXMVECTOR F,
  4840. CXMVECTOR G
  4841. )
  4842. {
  4843. #if defined(_XM_NO_INTRINSICS_)
  4844. // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - Position0)
  4845. XMVECTOR P10;
  4846. XMVECTOR P20;
  4847. XMVECTOR Result;
  4848. P10 = XMVectorSubtract(Position1, Position0);
  4849. P20 = XMVectorSubtract(Position2, Position0);
  4850. Result = XMVectorMultiplyAdd(P10, F, Position0);
  4851. Result = XMVectorMultiplyAdd(P20, G, Result);
  4852. return Result;
  4853. #elif defined(_XM_SSE_INTRINSICS_)
  4854. XMVECTOR R1 = _mm_sub_ps(Position1,Position0);
  4855. XMVECTOR R2 = _mm_sub_ps(Position2,Position0);
  4856. R1 = _mm_mul_ps(R1,F);
  4857. R2 = _mm_mul_ps(R2,G);
  4858. R1 = _mm_add_ps(R1,Position0);
  4859. R1 = _mm_add_ps(R1,R2);
  4860. return R1;
  4861. #else // _XM_VMX128_INTRINSICS_
  4862. #endif // _XM_VMX128_INTRINSICS_
  4863. }
  4864. /****************************************************************************
  4865. *
  4866. * 2D Vector
  4867. *
  4868. ****************************************************************************/
  4869. //------------------------------------------------------------------------------
  4870. // Comparison operations
  4871. //------------------------------------------------------------------------------
  4872. //------------------------------------------------------------------------------
  4873. XMFINLINE BOOL XMVector2Equal
  4874. (
  4875. FXMVECTOR V1,
  4876. FXMVECTOR V2
  4877. )
  4878. {
  4879. #if defined(_XM_NO_INTRINSICS_)
  4880. return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0);
  4881. #elif defined(_XM_SSE_INTRINSICS_)
  4882. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  4883. // z and w are don't care
  4884. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  4885. #else // _XM_VMX128_INTRINSICS_
  4886. return XMComparisonAllTrue(XMVector2EqualR(V1, V2));
  4887. #endif
  4888. }
  4889. //------------------------------------------------------------------------------
  4890. XMFINLINE UINT XMVector2EqualR
  4891. (
  4892. FXMVECTOR V1,
  4893. FXMVECTOR V2
  4894. )
  4895. {
  4896. #if defined(_XM_NO_INTRINSICS_)
  4897. UINT CR = 0;
  4898. if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
  4899. (V1.vector4_f32[1] == V2.vector4_f32[1]))
  4900. {
  4901. CR = XM_CRMASK_CR6TRUE;
  4902. }
  4903. else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
  4904. (V1.vector4_f32[1] != V2.vector4_f32[1]))
  4905. {
  4906. CR = XM_CRMASK_CR6FALSE;
  4907. }
  4908. return CR;
  4909. #elif defined(_XM_SSE_INTRINSICS_)
  4910. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  4911. // z and w are don't care
  4912. int iTest = _mm_movemask_ps(vTemp)&3;
  4913. UINT CR = 0;
  4914. if (iTest==3)
  4915. {
  4916. CR = XM_CRMASK_CR6TRUE;
  4917. }
  4918. else if (!iTest)
  4919. {
  4920. CR = XM_CRMASK_CR6FALSE;
  4921. }
  4922. return CR;
  4923. #else // _XM_VMX128_INTRINSICS_
  4924. #endif // _XM_VMX128_INTRINSICS_
  4925. }
  4926. //------------------------------------------------------------------------------
  4927. XMFINLINE BOOL XMVector2EqualInt
  4928. (
  4929. FXMVECTOR V1,
  4930. FXMVECTOR V2
  4931. )
  4932. {
  4933. #if defined(_XM_NO_INTRINSICS_)
  4934. return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0);
  4935. #elif defined(_XM_SSE_INTRINSICS_)
  4936. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  4937. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)==3) != 0);
  4938. #else // _XM_VMX128_INTRINSICS_
  4939. return XMComparisonAllTrue(XMVector2EqualIntR(V1, V2));
  4940. #endif
  4941. }
  4942. //------------------------------------------------------------------------------
  4943. XMFINLINE UINT XMVector2EqualIntR
  4944. (
  4945. FXMVECTOR V1,
  4946. FXMVECTOR V2
  4947. )
  4948. {
  4949. #if defined(_XM_NO_INTRINSICS_)
  4950. UINT CR = 0;
  4951. if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
  4952. (V1.vector4_u32[1] == V2.vector4_u32[1]))
  4953. {
  4954. CR = XM_CRMASK_CR6TRUE;
  4955. }
  4956. else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
  4957. (V1.vector4_u32[1] != V2.vector4_u32[1]))
  4958. {
  4959. CR = XM_CRMASK_CR6FALSE;
  4960. }
  4961. return CR;
  4962. #elif defined(_XM_SSE_INTRINSICS_)
  4963. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  4964. int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3;
  4965. UINT CR = 0;
  4966. if (iTest==3)
  4967. {
  4968. CR = XM_CRMASK_CR6TRUE;
  4969. }
  4970. else if (!iTest)
  4971. {
  4972. CR = XM_CRMASK_CR6FALSE;
  4973. }
  4974. return CR;
  4975. #else // _XM_VMX128_INTRINSICS_
  4976. #endif // _XM_VMX128_INTRINSICS_
  4977. }
  4978. //------------------------------------------------------------------------------
  4979. XMFINLINE BOOL XMVector2NearEqual
  4980. (
  4981. FXMVECTOR V1,
  4982. FXMVECTOR V2,
  4983. FXMVECTOR Epsilon
  4984. )
  4985. {
  4986. #if defined(_XM_NO_INTRINSICS_)
  4987. FLOAT dx, dy;
  4988. dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
  4989. dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
  4990. return ((dx <= Epsilon.vector4_f32[0]) &&
  4991. (dy <= Epsilon.vector4_f32[1]));
  4992. #elif defined(_XM_SSE_INTRINSICS_)
  4993. // Get the difference
  4994. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  4995. // Get the absolute value of the difference
  4996. XMVECTOR vTemp = _mm_setzero_ps();
  4997. vTemp = _mm_sub_ps(vTemp,vDelta);
  4998. vTemp = _mm_max_ps(vTemp,vDelta);
  4999. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  5000. // z and w are don't care
  5001. return (((_mm_movemask_ps(vTemp)&3)==0x3) != 0);
  5002. #else // _XM_VMX128_INTRINSICS_
  5003. #endif // _XM_VMX128_INTRINSICS_
  5004. }
  5005. //------------------------------------------------------------------------------
  5006. XMFINLINE BOOL XMVector2NotEqual
  5007. (
  5008. FXMVECTOR V1,
  5009. FXMVECTOR V2
  5010. )
  5011. {
  5012. #if defined(_XM_NO_INTRINSICS_)
  5013. return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0);
  5014. #elif defined(_XM_SSE_INTRINSICS_)
  5015. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  5016. // z and w are don't care
  5017. return (((_mm_movemask_ps(vTemp)&3)!=3) != 0);
  5018. #else // _XM_VMX128_INTRINSICS_
  5019. return XMComparisonAnyFalse(XMVector2EqualR(V1, V2));
  5020. #endif
  5021. }
  5022. //------------------------------------------------------------------------------
  5023. XMFINLINE BOOL XMVector2NotEqualInt
  5024. (
  5025. FXMVECTOR V1,
  5026. FXMVECTOR V2
  5027. )
  5028. {
  5029. #if defined(_XM_NO_INTRINSICS_)
  5030. return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0);
  5031. #elif defined(_XM_SSE_INTRINSICS_)
  5032. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  5033. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&3)!=3) != 0);
  5034. #else // _XM_VMX128_INTRINSICS_
  5035. return XMComparisonAnyFalse(XMVector2EqualIntR(V1, V2));
  5036. #endif
  5037. }
  5038. //------------------------------------------------------------------------------
  5039. XMFINLINE BOOL XMVector2Greater
  5040. (
  5041. FXMVECTOR V1,
  5042. FXMVECTOR V2
  5043. )
  5044. {
  5045. #if defined(_XM_NO_INTRINSICS_)
  5046. return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0);
  5047. #elif defined(_XM_SSE_INTRINSICS_)
  5048. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  5049. // z and w are don't care
  5050. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5051. #else // _XM_VMX128_INTRINSICS_
  5052. return XMComparisonAllTrue(XMVector2GreaterR(V1, V2));
  5053. #endif
  5054. }
  5055. //------------------------------------------------------------------------------
  5056. XMFINLINE UINT XMVector2GreaterR
  5057. (
  5058. FXMVECTOR V1,
  5059. FXMVECTOR V2
  5060. )
  5061. {
  5062. #if defined(_XM_NO_INTRINSICS_)
  5063. UINT CR = 0;
  5064. if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
  5065. (V1.vector4_f32[1] > V2.vector4_f32[1]))
  5066. {
  5067. CR = XM_CRMASK_CR6TRUE;
  5068. }
  5069. else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
  5070. (V1.vector4_f32[1] <= V2.vector4_f32[1]))
  5071. {
  5072. CR = XM_CRMASK_CR6FALSE;
  5073. }
  5074. return CR;
  5075. #elif defined(_XM_SSE_INTRINSICS_)
  5076. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  5077. int iTest = _mm_movemask_ps(vTemp)&3;
  5078. UINT CR = 0;
  5079. if (iTest==3)
  5080. {
  5081. CR = XM_CRMASK_CR6TRUE;
  5082. }
  5083. else if (!iTest)
  5084. {
  5085. CR = XM_CRMASK_CR6FALSE;
  5086. }
  5087. return CR;
  5088. #else // _XM_VMX128_INTRINSICS_
  5089. #endif // _XM_VMX128_INTRINSICS_
  5090. }
  5091. //------------------------------------------------------------------------------
  5092. XMFINLINE BOOL XMVector2GreaterOrEqual
  5093. (
  5094. FXMVECTOR V1,
  5095. FXMVECTOR V2
  5096. )
  5097. {
  5098. #if defined(_XM_NO_INTRINSICS_)
  5099. return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0);
  5100. #elif defined(_XM_SSE_INTRINSICS_)
  5101. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  5102. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5103. #else // _XM_VMX128_INTRINSICS_
  5104. return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V1, V2));
  5105. #endif
  5106. }
  5107. //------------------------------------------------------------------------------
  5108. XMFINLINE UINT XMVector2GreaterOrEqualR
  5109. (
  5110. FXMVECTOR V1,
  5111. FXMVECTOR V2
  5112. )
  5113. {
  5114. #if defined(_XM_NO_INTRINSICS_)
  5115. UINT CR = 0;
  5116. if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
  5117. (V1.vector4_f32[1] >= V2.vector4_f32[1]))
  5118. {
  5119. CR = XM_CRMASK_CR6TRUE;
  5120. }
  5121. else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
  5122. (V1.vector4_f32[1] < V2.vector4_f32[1]))
  5123. {
  5124. CR = XM_CRMASK_CR6FALSE;
  5125. }
  5126. return CR;
  5127. #elif defined(_XM_SSE_INTRINSICS_)
  5128. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  5129. int iTest = _mm_movemask_ps(vTemp)&3;
  5130. UINT CR = 0;
  5131. if (iTest == 3)
  5132. {
  5133. CR = XM_CRMASK_CR6TRUE;
  5134. }
  5135. else if (!iTest)
  5136. {
  5137. CR = XM_CRMASK_CR6FALSE;
  5138. }
  5139. return CR;
  5140. #else // _XM_VMX128_INTRINSICS_
  5141. #endif // _XM_VMX128_INTRINSICS_
  5142. }
  5143. //------------------------------------------------------------------------------
  5144. XMFINLINE BOOL XMVector2Less
  5145. (
  5146. FXMVECTOR V1,
  5147. FXMVECTOR V2
  5148. )
  5149. {
  5150. #if defined(_XM_NO_INTRINSICS_)
  5151. return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0);
  5152. #elif defined(_XM_SSE_INTRINSICS_)
  5153. XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
  5154. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5155. #else // _XM_VMX128_INTRINSICS_
  5156. return XMComparisonAllTrue(XMVector2GreaterR(V2, V1));
  5157. #endif
  5158. }
  5159. //------------------------------------------------------------------------------
  5160. XMFINLINE BOOL XMVector2LessOrEqual
  5161. (
  5162. FXMVECTOR V1,
  5163. FXMVECTOR V2
  5164. )
  5165. {
  5166. #if defined(_XM_NO_INTRINSICS_)
  5167. return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0);
  5168. #elif defined(_XM_SSE_INTRINSICS_)
  5169. XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
  5170. return (((_mm_movemask_ps(vTemp)&3)==3) != 0);
  5171. #else // _XM_VMX128_INTRINSICS_
  5172. return XMComparisonAllTrue(XMVector2GreaterOrEqualR(V2, V1));
  5173. #endif
  5174. }
  5175. //------------------------------------------------------------------------------
  5176. XMFINLINE BOOL XMVector2InBounds
  5177. (
  5178. FXMVECTOR V,
  5179. FXMVECTOR Bounds
  5180. )
  5181. {
  5182. #if defined(_XM_NO_INTRINSICS_)
  5183. return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
  5184. (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0);
  5185. #elif defined(_XM_SSE_INTRINSICS_)
  5186. // Test if less than or equal
  5187. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  5188. // Negate the bounds
  5189. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  5190. // Test if greater or equal (Reversed)
  5191. vTemp2 = _mm_cmple_ps(vTemp2,V);
  5192. // Blend answers
  5193. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  5194. // x and y in bounds? (z and w are don't care)
  5195. return (((_mm_movemask_ps(vTemp1)&0x3)==0x3) != 0);
  5196. #else // _XM_VMX128_INTRINSICS_
  5197. return XMComparisonAllInBounds(XMVector2InBoundsR(V, Bounds));
  5198. #endif
  5199. }
  5200. //------------------------------------------------------------------------------
  5201. XMFINLINE UINT XMVector2InBoundsR
  5202. (
  5203. FXMVECTOR V,
  5204. FXMVECTOR Bounds
  5205. )
  5206. {
  5207. #if defined(_XM_NO_INTRINSICS_)
  5208. UINT CR = 0;
  5209. if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
  5210. (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]))
  5211. {
  5212. CR = XM_CRMASK_CR6BOUNDS;
  5213. }
  5214. return CR;
  5215. #elif defined(_XM_SSE_INTRINSICS_)
  5216. // Test if less than or equal
  5217. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  5218. // Negate the bounds
  5219. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  5220. // Test if greater or equal (Reversed)
  5221. vTemp2 = _mm_cmple_ps(vTemp2,V);
  5222. // Blend answers
  5223. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  5224. // x and y in bounds? (z and w are don't care)
  5225. return ((_mm_movemask_ps(vTemp1)&0x3)==0x3) ? XM_CRMASK_CR6BOUNDS : 0;
  5226. #else // _XM_VMX128_INTRINSICS_
  5227. #endif // _XM_VMX128_INTRINSICS_
  5228. }
  5229. //------------------------------------------------------------------------------
  5230. XMFINLINE BOOL XMVector2IsNaN
  5231. (
  5232. FXMVECTOR V
  5233. )
  5234. {
  5235. #if defined(_XM_NO_INTRINSICS_)
  5236. return (XMISNAN(V.vector4_f32[0]) ||
  5237. XMISNAN(V.vector4_f32[1]));
  5238. #elif defined(_XM_SSE_INTRINSICS_)
  5239. // Mask off the exponent
  5240. __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
  5241. // Mask off the mantissa
  5242. __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
  5243. // Are any of the exponents == 0x7F800000?
  5244. vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
  5245. // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
  5246. vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
  5247. // Perform a not on the NaN test to be true on NON-zero mantissas
  5248. vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
  5249. // If x or y are NaN, the signs are true after the merge above
  5250. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&3) != 0);
  5251. #else // _XM_VMX128_INTRINSICS_
  5252. #endif // _XM_VMX128_INTRINSICS_
  5253. }
  5254. //------------------------------------------------------------------------------
  5255. XMFINLINE BOOL XMVector2IsInfinite
  5256. (
  5257. FXMVECTOR V
  5258. )
  5259. {
  5260. #if defined(_XM_NO_INTRINSICS_)
  5261. return (XMISINF(V.vector4_f32[0]) ||
  5262. XMISINF(V.vector4_f32[1]));
  5263. #elif defined(_XM_SSE_INTRINSICS_)
  5264. // Mask off the sign bit
  5265. __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
  5266. // Compare to infinity
  5267. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  5268. // If x or z are infinity, the signs are true.
  5269. return ((_mm_movemask_ps(vTemp)&3) != 0);
  5270. #else // _XM_VMX128_INTRINSICS_
  5271. #endif // _XM_VMX128_INTRINSICS_
  5272. }
  5273. //------------------------------------------------------------------------------
  5274. // Computation operations
  5275. //------------------------------------------------------------------------------
  5276. //------------------------------------------------------------------------------
  5277. XMFINLINE XMVECTOR XMVector2Dot
  5278. (
  5279. FXMVECTOR V1,
  5280. FXMVECTOR V2
  5281. )
  5282. {
  5283. #if defined(_XM_NO_INTRINSICS_)
  5284. XMVECTOR Result;
  5285. Result.vector4_f32[0] =
  5286. Result.vector4_f32[1] =
  5287. Result.vector4_f32[2] =
  5288. Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1];
  5289. return Result;
  5290. #elif defined(_XM_SSE_INTRINSICS_)
  5291. // Perform the dot product on x and y
  5292. XMVECTOR vLengthSq = _mm_mul_ps(V1,V2);
  5293. // vTemp has y splatted
  5294. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5295. // x+y
  5296. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5297. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5298. return vLengthSq;
  5299. #else // _XM_VMX128_INTRINSICS_
  5300. #endif // _XM_VMX128_INTRINSICS_
  5301. }
  5302. //------------------------------------------------------------------------------
  5303. XMFINLINE XMVECTOR XMVector2Cross
  5304. (
  5305. FXMVECTOR V1,
  5306. FXMVECTOR V2
  5307. )
  5308. {
  5309. #if defined(_XM_NO_INTRINSICS_)
  5310. FLOAT fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]);
  5311. XMVECTOR vResult = {
  5312. fCross,
  5313. fCross,
  5314. fCross,
  5315. fCross
  5316. };
  5317. return vResult;
  5318. #elif defined(_XM_SSE_INTRINSICS_)
  5319. // Swap x and y
  5320. XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(0,1,0,1));
  5321. // Perform the muls
  5322. vResult = _mm_mul_ps(vResult,V1);
  5323. // Splat y
  5324. XMVECTOR vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(1,1,1,1));
  5325. // Sub the values
  5326. vResult = _mm_sub_ss(vResult,vTemp);
  5327. // Splat the cross product
  5328. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,0,0,0));
  5329. return vResult;
  5330. #else // _XM_VMX128_INTRINSICS_
  5331. #endif // _XM_VMX128_INTRINSICS_
  5332. }
  5333. //------------------------------------------------------------------------------
  5334. XMFINLINE XMVECTOR XMVector2LengthSq
  5335. (
  5336. FXMVECTOR V
  5337. )
  5338. {
  5339. #if defined(_XM_NO_INTRINSICS_)
  5340. return XMVector2Dot(V, V);
  5341. #elif defined(_XM_SSE_INTRINSICS_)
  5342. // Perform the dot product on x and y
  5343. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5344. // vTemp has y splatted
  5345. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5346. // x+y
  5347. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5348. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5349. return vLengthSq;
  5350. #else
  5351. return XMVector2Dot(V, V);
  5352. #endif
  5353. }
  5354. //------------------------------------------------------------------------------
  5355. XMFINLINE XMVECTOR XMVector2ReciprocalLengthEst
  5356. (
  5357. FXMVECTOR V
  5358. )
  5359. {
  5360. #if defined(_XM_NO_INTRINSICS_)
  5361. XMVECTOR Result;
  5362. Result = XMVector2LengthSq(V);
  5363. Result = XMVectorReciprocalSqrtEst(Result);
  5364. return Result;
  5365. #elif defined(_XM_SSE_INTRINSICS_)
  5366. // Perform the dot product on x and y
  5367. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5368. // vTemp has y splatted
  5369. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5370. // x+y
  5371. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5372. vLengthSq = _mm_rsqrt_ss(vLengthSq);
  5373. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5374. return vLengthSq;
  5375. #else // _XM_VMX128_INTRINSICS_
  5376. #endif // _XM_VMX128_INTRINSICS_
  5377. }
  5378. //------------------------------------------------------------------------------
  5379. XMFINLINE XMVECTOR XMVector2ReciprocalLength
  5380. (
  5381. FXMVECTOR V
  5382. )
  5383. {
  5384. #if defined(_XM_NO_INTRINSICS_)
  5385. XMVECTOR Result;
  5386. Result = XMVector2LengthSq(V);
  5387. Result = XMVectorReciprocalSqrt(Result);
  5388. return Result;
  5389. #elif defined(_XM_SSE_INTRINSICS_)
  5390. // Perform the dot product on x and y
  5391. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5392. // vTemp has y splatted
  5393. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5394. // x+y
  5395. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5396. vLengthSq = _mm_sqrt_ss(vLengthSq);
  5397. vLengthSq = _mm_div_ss(g_XMOne,vLengthSq);
  5398. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5399. return vLengthSq;
  5400. #else // _XM_VMX128_INTRINSICS_
  5401. #endif // _XM_VMX128_INTRINSICS_
  5402. }
  5403. //------------------------------------------------------------------------------
  5404. XMFINLINE XMVECTOR XMVector2LengthEst
  5405. (
  5406. FXMVECTOR V
  5407. )
  5408. {
  5409. #if defined(_XM_NO_INTRINSICS_)
  5410. XMVECTOR Result;
  5411. Result = XMVector2LengthSq(V);
  5412. Result = XMVectorSqrtEst(Result);
  5413. return Result;
  5414. #elif defined(_XM_SSE_INTRINSICS_)
  5415. // Perform the dot product on x and y
  5416. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5417. // vTemp has y splatted
  5418. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5419. // x+y
  5420. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5421. vLengthSq = _mm_sqrt_ss(vLengthSq);
  5422. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5423. return vLengthSq;
  5424. #else // _XM_VMX128_INTRINSICS_
  5425. #endif // _XM_VMX128_INTRINSICS_
  5426. }
  5427. //------------------------------------------------------------------------------
  5428. XMFINLINE XMVECTOR XMVector2Length
  5429. (
  5430. FXMVECTOR V
  5431. )
  5432. {
  5433. #if defined(_XM_NO_INTRINSICS_)
  5434. XMVECTOR Result;
  5435. Result = XMVector2LengthSq(V);
  5436. Result = XMVectorSqrt(Result);
  5437. return Result;
  5438. #elif defined(_XM_SSE_INTRINSICS_)
  5439. // Perform the dot product on x and y
  5440. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5441. // vTemp has y splatted
  5442. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5443. // x+y
  5444. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5445. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5446. vLengthSq = _mm_sqrt_ps(vLengthSq);
  5447. return vLengthSq;
  5448. #else // _XM_VMX128_INTRINSICS_
  5449. #endif // _XM_VMX128_INTRINSICS_
  5450. }
  5451. //------------------------------------------------------------------------------
  5452. // XMVector2NormalizeEst uses a reciprocal estimate and
  5453. // returns QNaN on zero and infinite vectors.
  5454. XMFINLINE XMVECTOR XMVector2NormalizeEst
  5455. (
  5456. FXMVECTOR V
  5457. )
  5458. {
  5459. #if defined(_XM_NO_INTRINSICS_)
  5460. XMVECTOR Result;
  5461. Result = XMVector2ReciprocalLength(V);
  5462. Result = XMVectorMultiply(V, Result);
  5463. return Result;
  5464. #elif defined(_XM_SSE_INTRINSICS_)
  5465. // Perform the dot product on x and y
  5466. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5467. // vTemp has y splatted
  5468. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5469. // x+y
  5470. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5471. vLengthSq = _mm_rsqrt_ss(vLengthSq);
  5472. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5473. vLengthSq = _mm_mul_ps(vLengthSq,V);
  5474. return vLengthSq;
  5475. #else // _XM_VMX128_INTRINSICS_
  5476. #endif // _XM_VMX128_INTRINSICS_
  5477. }
  5478. //------------------------------------------------------------------------------
  5479. XMFINLINE XMVECTOR XMVector2Normalize
  5480. (
  5481. FXMVECTOR V
  5482. )
  5483. {
  5484. #if defined(_XM_NO_INTRINSICS_)
  5485. FLOAT fLength;
  5486. XMVECTOR vResult;
  5487. vResult = XMVector2Length( V );
  5488. fLength = vResult.vector4_f32[0];
  5489. // Prevent divide by zero
  5490. if (fLength > 0) {
  5491. fLength = 1.0f/fLength;
  5492. }
  5493. vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
  5494. vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
  5495. vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
  5496. vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
  5497. return vResult;
  5498. #elif defined(_XM_SSE_INTRINSICS_)
  5499. // Perform the dot product on x and y only
  5500. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  5501. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,1,1,1));
  5502. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  5503. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  5504. // Prepare for the division
  5505. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  5506. // Create zero with a single instruction
  5507. XMVECTOR vZeroMask = _mm_setzero_ps();
  5508. // Test for a divide by zero (Must be FP to detect -0.0)
  5509. vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
  5510. // Failsafe on zero (Or epsilon) length planes
  5511. // If the length is infinity, set the elements to zero
  5512. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  5513. // Reciprocal mul to perform the normalization
  5514. vResult = _mm_div_ps(V,vResult);
  5515. // Any that are infinity, set to zero
  5516. vResult = _mm_and_ps(vResult,vZeroMask);
  5517. // Select qnan or result based on infinite length
  5518. XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
  5519. XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
  5520. vResult = _mm_or_ps(vTemp1,vTemp2);
  5521. return vResult;
  5522. #else // _XM_VMX128_INTRINSICS_
  5523. #endif // _XM_VMX128_INTRINSICS_
  5524. }
  5525. //------------------------------------------------------------------------------
  5526. XMFINLINE XMVECTOR XMVector2ClampLength
  5527. (
  5528. FXMVECTOR V,
  5529. FLOAT LengthMin,
  5530. FLOAT LengthMax
  5531. )
  5532. {
  5533. #if defined(_XM_NO_INTRINSICS_)
  5534. XMVECTOR ClampMax;
  5535. XMVECTOR ClampMin;
  5536. ClampMax = XMVectorReplicate(LengthMax);
  5537. ClampMin = XMVectorReplicate(LengthMin);
  5538. return XMVector2ClampLengthV(V, ClampMin, ClampMax);
  5539. #elif defined(_XM_SSE_INTRINSICS_)
  5540. XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
  5541. XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
  5542. return XMVector2ClampLengthV(V, ClampMin, ClampMax);
  5543. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  5544. #endif // _XM_VMX128_INTRINSICS_
  5545. }
  5546. //------------------------------------------------------------------------------
  5547. XMFINLINE XMVECTOR XMVector2ClampLengthV
  5548. (
  5549. FXMVECTOR V,
  5550. FXMVECTOR LengthMin,
  5551. FXMVECTOR LengthMax
  5552. )
  5553. {
  5554. #if defined(_XM_NO_INTRINSICS_)
  5555. XMVECTOR ClampLength;
  5556. XMVECTOR LengthSq;
  5557. XMVECTOR RcpLength;
  5558. XMVECTOR Length;
  5559. XMVECTOR Normal;
  5560. XMVECTOR Zero;
  5561. XMVECTOR InfiniteLength;
  5562. XMVECTOR ZeroLength;
  5563. XMVECTOR Select;
  5564. XMVECTOR ControlMax;
  5565. XMVECTOR ControlMin;
  5566. XMVECTOR Control;
  5567. XMVECTOR Result;
  5568. XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]));
  5569. XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]));
  5570. XMASSERT(XMVector2GreaterOrEqual(LengthMin, XMVectorZero()));
  5571. XMASSERT(XMVector2GreaterOrEqual(LengthMax, XMVectorZero()));
  5572. XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
  5573. LengthSq = XMVector2LengthSq(V);
  5574. Zero = XMVectorZero();
  5575. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  5576. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  5577. ZeroLength = XMVectorEqual(LengthSq, Zero);
  5578. Length = XMVectorMultiply(LengthSq, RcpLength);
  5579. Normal = XMVectorMultiply(V, RcpLength);
  5580. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  5581. Length = XMVectorSelect(LengthSq, Length, Select);
  5582. Normal = XMVectorSelect(LengthSq, Normal, Select);
  5583. ControlMax = XMVectorGreater(Length, LengthMax);
  5584. ControlMin = XMVectorLess(Length, LengthMin);
  5585. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  5586. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  5587. Result = XMVectorMultiply(Normal, ClampLength);
  5588. // Preserve the original vector (with no precision loss) if the length falls within the given range
  5589. Control = XMVectorEqualInt(ControlMax, ControlMin);
  5590. Result = XMVectorSelect(Result, V, Control);
  5591. return Result;
  5592. #elif defined(_XM_SSE_INTRINSICS_)
  5593. XMVECTOR ClampLength;
  5594. XMVECTOR LengthSq;
  5595. XMVECTOR RcpLength;
  5596. XMVECTOR Length;
  5597. XMVECTOR Normal;
  5598. XMVECTOR InfiniteLength;
  5599. XMVECTOR ZeroLength;
  5600. XMVECTOR Select;
  5601. XMVECTOR ControlMax;
  5602. XMVECTOR ControlMin;
  5603. XMVECTOR Control;
  5604. XMVECTOR Result;
  5605. XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)));
  5606. XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)));
  5607. XMASSERT(XMVector2GreaterOrEqual(LengthMin, g_XMZero));
  5608. XMASSERT(XMVector2GreaterOrEqual(LengthMax, g_XMZero));
  5609. XMASSERT(XMVector2GreaterOrEqual(LengthMax, LengthMin));
  5610. LengthSq = XMVector2LengthSq(V);
  5611. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  5612. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
  5613. ZeroLength = XMVectorEqual(LengthSq, g_XMZero);
  5614. Length = _mm_mul_ps(LengthSq, RcpLength);
  5615. Normal = _mm_mul_ps(V, RcpLength);
  5616. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  5617. Length = XMVectorSelect(LengthSq, Length, Select);
  5618. Normal = XMVectorSelect(LengthSq, Normal, Select);
  5619. ControlMax = XMVectorGreater(Length, LengthMax);
  5620. ControlMin = XMVectorLess(Length, LengthMin);
  5621. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  5622. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  5623. Result = _mm_mul_ps(Normal, ClampLength);
  5624. // Preserve the original vector (with no precision loss) if the length falls within the given range
  5625. Control = XMVectorEqualInt(ControlMax, ControlMin);
  5626. Result = XMVectorSelect(Result, V, Control);
  5627. return Result;
  5628. #else // _XM_VMX128_INTRINSICS_
  5629. #endif // _XM_VMX128_INTRINSICS_
  5630. }
  5631. //------------------------------------------------------------------------------
  5632. XMFINLINE XMVECTOR XMVector2Reflect
  5633. (
  5634. FXMVECTOR Incident,
  5635. FXMVECTOR Normal
  5636. )
  5637. {
  5638. #if defined(_XM_NO_INTRINSICS_)
  5639. XMVECTOR Result;
  5640. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  5641. Result = XMVector2Dot(Incident, Normal);
  5642. Result = XMVectorAdd(Result, Result);
  5643. Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
  5644. return Result;
  5645. #elif defined(_XM_SSE_INTRINSICS_)
  5646. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  5647. XMVECTOR Result = XMVector2Dot(Incident,Normal);
  5648. Result = _mm_add_ps(Result, Result);
  5649. Result = _mm_mul_ps(Result, Normal);
  5650. Result = _mm_sub_ps(Incident,Result);
  5651. return Result;
  5652. #else // _XM_VMX128_INTRINSICS_
  5653. #endif // _XM_VMX128_INTRINSICS_
  5654. }
  5655. //------------------------------------------------------------------------------
  5656. XMFINLINE XMVECTOR XMVector2Refract
  5657. (
  5658. FXMVECTOR Incident,
  5659. FXMVECTOR Normal,
  5660. FLOAT RefractionIndex
  5661. )
  5662. {
  5663. #if defined(_XM_NO_INTRINSICS_)
  5664. XMVECTOR Index;
  5665. Index = XMVectorReplicate(RefractionIndex);
  5666. return XMVector2RefractV(Incident, Normal, Index);
  5667. #elif defined(_XM_SSE_INTRINSICS_)
  5668. XMVECTOR Index = _mm_set_ps1(RefractionIndex);
  5669. return XMVector2RefractV(Incident,Normal,Index);
  5670. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  5671. #endif // _XM_VMX128_INTRINSICS_
  5672. }
  5673. //------------------------------------------------------------------------------
  5674. // Return the refraction of a 2D vector
  5675. XMFINLINE XMVECTOR XMVector2RefractV
  5676. (
  5677. FXMVECTOR Incident,
  5678. FXMVECTOR Normal,
  5679. FXMVECTOR RefractionIndex
  5680. )
  5681. {
  5682. #if defined(_XM_NO_INTRINSICS_)
  5683. float IDotN;
  5684. float RX,RY;
  5685. XMVECTOR vResult;
  5686. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  5687. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  5688. IDotN = (Incident.vector4_f32[0]*Normal.vector4_f32[0])+(Incident.vector4_f32[1]*Normal.vector4_f32[1]);
  5689. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  5690. RY = 1.0f-(IDotN*IDotN);
  5691. RX = 1.0f-(RY*RefractionIndex.vector4_f32[0]*RefractionIndex.vector4_f32[0]);
  5692. RY = 1.0f-(RY*RefractionIndex.vector4_f32[1]*RefractionIndex.vector4_f32[1]);
  5693. if (RX>=0.0f) {
  5694. RX = (RefractionIndex.vector4_f32[0]*Incident.vector4_f32[0])-(Normal.vector4_f32[0]*((RefractionIndex.vector4_f32[0]*IDotN)+sqrtf(RX)));
  5695. } else {
  5696. RX = 0.0f;
  5697. }
  5698. if (RY>=0.0f) {
  5699. RY = (RefractionIndex.vector4_f32[1]*Incident.vector4_f32[1])-(Normal.vector4_f32[1]*((RefractionIndex.vector4_f32[1]*IDotN)+sqrtf(RY)));
  5700. } else {
  5701. RY = 0.0f;
  5702. }
  5703. vResult.vector4_f32[0] = RX;
  5704. vResult.vector4_f32[1] = RY;
  5705. vResult.vector4_f32[2] = 0.0f;
  5706. vResult.vector4_f32[3] = 0.0f;
  5707. return vResult;
  5708. #elif defined(_XM_SSE_INTRINSICS_)
  5709. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  5710. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  5711. // Get the 2D Dot product of Incident-Normal
  5712. XMVECTOR IDotN = _mm_mul_ps(Incident,Normal);
  5713. XMVECTOR vTemp = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(1,1,1,1));
  5714. IDotN = _mm_add_ss(IDotN,vTemp);
  5715. IDotN = _mm_shuffle_ps(IDotN,IDotN,_MM_SHUFFLE(0,0,0,0));
  5716. // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  5717. vTemp = _mm_mul_ps(IDotN,IDotN);
  5718. vTemp = _mm_sub_ps(g_XMOne,vTemp);
  5719. vTemp = _mm_mul_ps(vTemp,RefractionIndex);
  5720. vTemp = _mm_mul_ps(vTemp,RefractionIndex);
  5721. vTemp = _mm_sub_ps(g_XMOne,vTemp);
  5722. // If any terms are <=0, sqrt() will fail, punt to zero
  5723. XMVECTOR vMask = _mm_cmpgt_ps(vTemp,g_XMZero);
  5724. // R = RefractionIndex * IDotN + sqrt(R)
  5725. vTemp = _mm_sqrt_ps(vTemp);
  5726. XMVECTOR vResult = _mm_mul_ps(RefractionIndex,IDotN);
  5727. vTemp = _mm_add_ps(vTemp,vResult);
  5728. // Result = RefractionIndex * Incident - Normal * R
  5729. vResult = _mm_mul_ps(RefractionIndex,Incident);
  5730. vTemp = _mm_mul_ps(vTemp,Normal);
  5731. vResult = _mm_sub_ps(vResult,vTemp);
  5732. vResult = _mm_and_ps(vResult,vMask);
  5733. return vResult;
  5734. #else // _XM_VMX128_INTRINSICS_
  5735. #endif // _XM_VMX128_INTRINSICS_
  5736. }
  5737. //------------------------------------------------------------------------------
  5738. XMFINLINE XMVECTOR XMVector2Orthogonal
  5739. (
  5740. FXMVECTOR V
  5741. )
  5742. {
  5743. #if defined(_XM_NO_INTRINSICS_)
  5744. XMVECTOR Result;
  5745. Result.vector4_f32[0] = -V.vector4_f32[1];
  5746. Result.vector4_f32[1] = V.vector4_f32[0];
  5747. return Result;
  5748. #elif defined(_XM_SSE_INTRINSICS_)
  5749. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,2,0,1));
  5750. vResult = _mm_mul_ps(vResult,g_XMNegateX);
  5751. return vResult;
  5752. #else // _XM_VMX128_INTRINSICS_
  5753. #endif // _XM_VMX128_INTRINSICS_
  5754. }
  5755. //------------------------------------------------------------------------------
  5756. XMFINLINE XMVECTOR XMVector2AngleBetweenNormalsEst
  5757. (
  5758. FXMVECTOR N1,
  5759. FXMVECTOR N2
  5760. )
  5761. {
  5762. #if defined(_XM_NO_INTRINSICS_)
  5763. XMVECTOR NegativeOne;
  5764. XMVECTOR One;
  5765. XMVECTOR Result;
  5766. Result = XMVector2Dot(N1, N2);
  5767. NegativeOne = XMVectorSplatConstant(-1, 0);
  5768. One = XMVectorSplatOne();
  5769. Result = XMVectorClamp(Result, NegativeOne, One);
  5770. Result = XMVectorACosEst(Result);
  5771. return Result;
  5772. #elif defined(_XM_SSE_INTRINSICS_)
  5773. XMVECTOR vResult = XMVector2Dot(N1,N2);
  5774. // Clamp to -1.0f to 1.0f
  5775. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  5776. vResult = _mm_min_ps(vResult,g_XMOne);;
  5777. vResult = XMVectorACosEst(vResult);
  5778. return vResult;
  5779. #else // _XM_VMX128_INTRINSICS_
  5780. #endif // _XM_VMX128_INTRINSICS_
  5781. }
  5782. //------------------------------------------------------------------------------
  5783. XMFINLINE XMVECTOR XMVector2AngleBetweenNormals
  5784. (
  5785. FXMVECTOR N1,
  5786. FXMVECTOR N2
  5787. )
  5788. {
  5789. #if defined(_XM_NO_INTRINSICS_)
  5790. XMVECTOR NegativeOne;
  5791. XMVECTOR One;
  5792. XMVECTOR Result;
  5793. Result = XMVector2Dot(N1, N2);
  5794. NegativeOne = XMVectorSplatConstant(-1, 0);
  5795. One = XMVectorSplatOne();
  5796. Result = XMVectorClamp(Result, NegativeOne, One);
  5797. Result = XMVectorACos(Result);
  5798. return Result;
  5799. #elif defined(_XM_SSE_INTRINSICS_)
  5800. XMVECTOR vResult = XMVector2Dot(N1,N2);
  5801. // Clamp to -1.0f to 1.0f
  5802. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  5803. vResult = _mm_min_ps(vResult,g_XMOne);;
  5804. vResult = XMVectorACos(vResult);
  5805. return vResult;
  5806. #else // _XM_VMX128_INTRINSICS_
  5807. #endif // _XM_VMX128_INTRINSICS_
  5808. }
  5809. //------------------------------------------------------------------------------
  5810. XMFINLINE XMVECTOR XMVector2AngleBetweenVectors
  5811. (
  5812. FXMVECTOR V1,
  5813. FXMVECTOR V2
  5814. )
  5815. {
  5816. #if defined(_XM_NO_INTRINSICS_)
  5817. XMVECTOR L1;
  5818. XMVECTOR L2;
  5819. XMVECTOR Dot;
  5820. XMVECTOR CosAngle;
  5821. XMVECTOR NegativeOne;
  5822. XMVECTOR One;
  5823. XMVECTOR Result;
  5824. L1 = XMVector2ReciprocalLength(V1);
  5825. L2 = XMVector2ReciprocalLength(V2);
  5826. Dot = XMVector2Dot(V1, V2);
  5827. L1 = XMVectorMultiply(L1, L2);
  5828. CosAngle = XMVectorMultiply(Dot, L1);
  5829. NegativeOne = XMVectorSplatConstant(-1, 0);
  5830. One = XMVectorSplatOne();
  5831. CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
  5832. Result = XMVectorACos(CosAngle);
  5833. return Result;
  5834. #elif defined(_XM_SSE_INTRINSICS_)
  5835. XMVECTOR L1;
  5836. XMVECTOR L2;
  5837. XMVECTOR Dot;
  5838. XMVECTOR CosAngle;
  5839. XMVECTOR Result;
  5840. L1 = XMVector2ReciprocalLength(V1);
  5841. L2 = XMVector2ReciprocalLength(V2);
  5842. Dot = XMVector2Dot(V1, V2);
  5843. L1 = _mm_mul_ps(L1, L2);
  5844. CosAngle = _mm_mul_ps(Dot, L1);
  5845. CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne,g_XMOne);
  5846. Result = XMVectorACos(CosAngle);
  5847. return Result;
  5848. #else // _XM_VMX128_INTRINSICS_
  5849. #endif // _XM_VMX128_INTRINSICS_
  5850. }
  5851. //------------------------------------------------------------------------------
  5852. XMFINLINE XMVECTOR XMVector2LinePointDistance
  5853. (
  5854. FXMVECTOR LinePoint1,
  5855. FXMVECTOR LinePoint2,
  5856. FXMVECTOR Point
  5857. )
  5858. {
  5859. #if defined(_XM_NO_INTRINSICS_)
  5860. XMVECTOR PointVector;
  5861. XMVECTOR LineVector;
  5862. XMVECTOR ReciprocalLengthSq;
  5863. XMVECTOR PointProjectionScale;
  5864. XMVECTOR DistanceVector;
  5865. XMVECTOR Result;
  5866. // Given a vector PointVector from LinePoint1 to Point and a vector
  5867. // LineVector from LinePoint1 to LinePoint2, the scaled distance
  5868. // PointProjectionScale from LinePoint1 to the perpendicular projection
  5869. // of PointVector onto the line is defined as:
  5870. //
  5871. // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
  5872. PointVector = XMVectorSubtract(Point, LinePoint1);
  5873. LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
  5874. ReciprocalLengthSq = XMVector2LengthSq(LineVector);
  5875. ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
  5876. PointProjectionScale = XMVector2Dot(PointVector, LineVector);
  5877. PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
  5878. DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
  5879. DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
  5880. Result = XMVector2Length(DistanceVector);
  5881. return Result;
  5882. #elif defined(_XM_SSE_INTRINSICS_)
  5883. XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
  5884. XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
  5885. XMVECTOR ReciprocalLengthSq = XMVector2LengthSq(LineVector);
  5886. XMVECTOR vResult = XMVector2Dot(PointVector,LineVector);
  5887. vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
  5888. vResult = _mm_mul_ps(vResult,LineVector);
  5889. vResult = _mm_sub_ps(PointVector,vResult);
  5890. vResult = XMVector2Length(vResult);
  5891. return vResult;
  5892. #else // _XM_VMX128_INTRINSICS_
  5893. #endif // _XM_VMX128_INTRINSICS_
  5894. }
  5895. //------------------------------------------------------------------------------
  5896. XMFINLINE XMVECTOR XMVector2IntersectLine
  5897. (
  5898. FXMVECTOR Line1Point1,
  5899. FXMVECTOR Line1Point2,
  5900. FXMVECTOR Line2Point1,
  5901. CXMVECTOR Line2Point2
  5902. )
  5903. {
  5904. #if defined(_XM_NO_INTRINSICS_)
  5905. XMVECTOR V1;
  5906. XMVECTOR V2;
  5907. XMVECTOR V3;
  5908. XMVECTOR C1;
  5909. XMVECTOR C2;
  5910. XMVECTOR Result;
  5911. CONST XMVECTOR Zero = XMVectorZero();
  5912. V1 = XMVectorSubtract(Line1Point2, Line1Point1);
  5913. V2 = XMVectorSubtract(Line2Point2, Line2Point1);
  5914. V3 = XMVectorSubtract(Line1Point1, Line2Point1);
  5915. C1 = XMVector2Cross(V1, V2);
  5916. C2 = XMVector2Cross(V2, V3);
  5917. if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v))
  5918. {
  5919. if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v))
  5920. {
  5921. // Coincident
  5922. Result = g_XMInfinity.v;
  5923. }
  5924. else
  5925. {
  5926. // Parallel
  5927. Result = g_XMQNaN.v;
  5928. }
  5929. }
  5930. else
  5931. {
  5932. // Intersection point = Line1Point1 + V1 * (C2 / C1)
  5933. XMVECTOR Scale;
  5934. Scale = XMVectorReciprocal(C1);
  5935. Scale = XMVectorMultiply(C2, Scale);
  5936. Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1);
  5937. }
  5938. return Result;
  5939. #elif defined(_XM_SSE_INTRINSICS_)
  5940. XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1);
  5941. XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1);
  5942. XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1);
  5943. // Generate the cross products
  5944. XMVECTOR C1 = XMVector2Cross(V1, V2);
  5945. XMVECTOR C2 = XMVector2Cross(V2, V3);
  5946. // If C1 is not close to epsilon, use the calculated value
  5947. XMVECTOR vResultMask = _mm_setzero_ps();
  5948. vResultMask = _mm_sub_ps(vResultMask,C1);
  5949. vResultMask = _mm_max_ps(vResultMask,C1);
  5950. // 0xFFFFFFFF if the calculated value is to be used
  5951. vResultMask = _mm_cmpgt_ps(vResultMask,g_XMEpsilon);
  5952. // If C1 is close to epsilon, which fail type is it? INFINITY or NAN?
  5953. XMVECTOR vFailMask = _mm_setzero_ps();
  5954. vFailMask = _mm_sub_ps(vFailMask,C2);
  5955. vFailMask = _mm_max_ps(vFailMask,C2);
  5956. vFailMask = _mm_cmple_ps(vFailMask,g_XMEpsilon);
  5957. XMVECTOR vFail = _mm_and_ps(vFailMask,g_XMInfinity);
  5958. vFailMask = _mm_andnot_ps(vFailMask,g_XMQNaN);
  5959. // vFail is NAN or INF
  5960. vFail = _mm_or_ps(vFail,vFailMask);
  5961. // Intersection point = Line1Point1 + V1 * (C2 / C1)
  5962. XMVECTOR vResult = _mm_div_ps(C2,C1);
  5963. vResult = _mm_mul_ps(vResult,V1);
  5964. vResult = _mm_add_ps(vResult,Line1Point1);
  5965. // Use result, or failure value
  5966. vResult = _mm_and_ps(vResult,vResultMask);
  5967. vResultMask = _mm_andnot_ps(vResultMask,vFail);
  5968. vResult = _mm_or_ps(vResult,vResultMask);
  5969. return vResult;
  5970. #else // _XM_VMX128_INTRINSICS_
  5971. #endif // _XM_VMX128_INTRINSICS_
  5972. }
  5973. //------------------------------------------------------------------------------
  5974. XMFINLINE XMVECTOR XMVector2Transform
  5975. (
  5976. FXMVECTOR V,
  5977. CXMMATRIX M
  5978. )
  5979. {
  5980. #if defined(_XM_NO_INTRINSICS_)
  5981. XMVECTOR X;
  5982. XMVECTOR Y;
  5983. XMVECTOR Result;
  5984. Y = XMVectorSplatY(V);
  5985. X = XMVectorSplatX(V);
  5986. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  5987. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  5988. return Result;
  5989. #elif defined(_XM_SSE_INTRINSICS_)
  5990. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  5991. vResult = _mm_mul_ps(vResult,M.r[0]);
  5992. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  5993. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  5994. vResult = _mm_add_ps(vResult,vTemp);
  5995. vResult = _mm_add_ps(vResult,M.r[3]);
  5996. return vResult;
  5997. #else // _XM_VMX128_INTRINSICS_
  5998. #endif // _XM_VMX128_INTRINSICS_
  5999. }
  6000. //------------------------------------------------------------------------------
  6001. XMINLINE XMFLOAT4* XMVector2TransformStream
  6002. (
  6003. XMFLOAT4* pOutputStream,
  6004. UINT OutputStride,
  6005. CONST XMFLOAT2* pInputStream,
  6006. UINT InputStride,
  6007. UINT VectorCount,
  6008. CXMMATRIX M
  6009. )
  6010. {
  6011. #if defined(_XM_NO_INTRINSICS_)
  6012. XMVECTOR V;
  6013. XMVECTOR X;
  6014. XMVECTOR Y;
  6015. XMVECTOR Result;
  6016. UINT i;
  6017. BYTE* pInputVector = (BYTE*)pInputStream;
  6018. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6019. XMASSERT(pOutputStream);
  6020. XMASSERT(pInputStream);
  6021. for (i = 0; i < VectorCount; i++)
  6022. {
  6023. V = XMLoadFloat2((XMFLOAT2*)pInputVector);
  6024. Y = XMVectorSplatY(V);
  6025. X = XMVectorSplatX(V);
  6026. // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
  6027. // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
  6028. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  6029. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6030. XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
  6031. pInputVector += InputStride;
  6032. pOutputVector += OutputStride;
  6033. }
  6034. return pOutputStream;
  6035. #elif defined(_XM_SSE_INTRINSICS_)
  6036. XMASSERT(pOutputStream);
  6037. XMASSERT(pInputStream);
  6038. UINT i;
  6039. const BYTE* pInputVector = (const BYTE*)pInputStream;
  6040. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6041. for (i = 0; i < VectorCount; i++)
  6042. {
  6043. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
  6044. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
  6045. vResult = _mm_mul_ps(vResult,M.r[1]);
  6046. vResult = _mm_add_ps(vResult,M.r[3]);
  6047. X = _mm_mul_ps(X,M.r[0]);
  6048. vResult = _mm_add_ps(vResult,X);
  6049. _mm_storeu_ps(reinterpret_cast<float*>(pOutputVector),vResult);
  6050. pInputVector += InputStride;
  6051. pOutputVector += OutputStride;
  6052. }
  6053. return pOutputStream;
  6054. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6055. #endif // _XM_VMX128_INTRINSICS_
  6056. }
  6057. //------------------------------------------------------------------------------
  6058. XMINLINE XMFLOAT4* XMVector2TransformStreamNC
  6059. (
  6060. XMFLOAT4* pOutputStream,
  6061. UINT OutputStride,
  6062. CONST XMFLOAT2* pInputStream,
  6063. UINT InputStride,
  6064. UINT VectorCount,
  6065. CXMMATRIX M
  6066. )
  6067. {
  6068. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
  6069. return XMVector2TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
  6070. #else // _XM_VMX128_INTRINSICS_
  6071. #endif // _XM_VMX128_INTRINSICS_
  6072. }
  6073. //------------------------------------------------------------------------------
  6074. XMFINLINE XMVECTOR XMVector2TransformCoord
  6075. (
  6076. FXMVECTOR V,
  6077. CXMMATRIX M
  6078. )
  6079. {
  6080. #if defined(_XM_NO_INTRINSICS_)
  6081. XMVECTOR X;
  6082. XMVECTOR Y;
  6083. XMVECTOR InverseW;
  6084. XMVECTOR Result;
  6085. Y = XMVectorSplatY(V);
  6086. X = XMVectorSplatX(V);
  6087. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  6088. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6089. InverseW = XMVectorSplatW(Result);
  6090. InverseW = XMVectorReciprocal(InverseW);
  6091. Result = XMVectorMultiply(Result, InverseW);
  6092. return Result;
  6093. #elif defined(_XM_SSE_INTRINSICS_)
  6094. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  6095. vResult = _mm_mul_ps(vResult,M.r[0]);
  6096. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  6097. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  6098. vResult = _mm_add_ps(vResult,vTemp);
  6099. vResult = _mm_add_ps(vResult,M.r[3]);
  6100. vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  6101. vResult = _mm_div_ps(vResult,vTemp);
  6102. return vResult;
  6103. #else // _XM_VMX128_INTRINSICS_
  6104. #endif // _XM_VMX128_INTRINSICS_
  6105. }
  6106. //------------------------------------------------------------------------------
  6107. XMINLINE XMFLOAT2* XMVector2TransformCoordStream
  6108. (
  6109. XMFLOAT2* pOutputStream,
  6110. UINT OutputStride,
  6111. CONST XMFLOAT2* pInputStream,
  6112. UINT InputStride,
  6113. UINT VectorCount,
  6114. CXMMATRIX M
  6115. )
  6116. {
  6117. #if defined(_XM_NO_INTRINSICS_)
  6118. XMVECTOR V;
  6119. XMVECTOR X;
  6120. XMVECTOR Y;
  6121. XMVECTOR InverseW;
  6122. XMVECTOR Result;
  6123. UINT i;
  6124. BYTE* pInputVector = (BYTE*)pInputStream;
  6125. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6126. XMASSERT(pOutputStream);
  6127. XMASSERT(pInputStream);
  6128. for (i = 0; i < VectorCount; i++)
  6129. {
  6130. V = XMLoadFloat2((XMFLOAT2*)pInputVector);
  6131. Y = XMVectorSplatY(V);
  6132. X = XMVectorSplatX(V);
  6133. // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
  6134. // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
  6135. Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]);
  6136. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6137. InverseW = XMVectorSplatW(Result);
  6138. InverseW = XMVectorReciprocal(InverseW);
  6139. Result = XMVectorMultiply(Result, InverseW);
  6140. XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
  6141. pInputVector += InputStride;
  6142. pOutputVector += OutputStride;
  6143. }
  6144. return pOutputStream;
  6145. #elif defined(_XM_SSE_INTRINSICS_)
  6146. XMASSERT(pOutputStream);
  6147. XMASSERT(pInputStream);
  6148. UINT i;
  6149. const BYTE *pInputVector = (BYTE*)pInputStream;
  6150. BYTE *pOutputVector = (BYTE*)pOutputStream;
  6151. for (i = 0; i < VectorCount; i++)
  6152. {
  6153. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->x);
  6154. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2*>(pInputVector)->y);
  6155. vResult = _mm_mul_ps(vResult,M.r[1]);
  6156. vResult = _mm_add_ps(vResult,M.r[3]);
  6157. X = _mm_mul_ps(X,M.r[0]);
  6158. vResult = _mm_add_ps(vResult,X);
  6159. X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  6160. vResult = _mm_div_ps(vResult,X);
  6161. _mm_store_sd(reinterpret_cast<double *>(pOutputVector),reinterpret_cast<__m128d *>(&vResult)[0]);
  6162. pInputVector += InputStride;
  6163. pOutputVector += OutputStride;
  6164. }
  6165. return pOutputStream;
  6166. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6167. #endif // _XM_VMX128_INTRINSICS_
  6168. }
  6169. //------------------------------------------------------------------------------
  6170. XMFINLINE XMVECTOR XMVector2TransformNormal
  6171. (
  6172. FXMVECTOR V,
  6173. CXMMATRIX M
  6174. )
  6175. {
  6176. #if defined(_XM_NO_INTRINSICS_)
  6177. XMVECTOR X;
  6178. XMVECTOR Y;
  6179. XMVECTOR Result;
  6180. Y = XMVectorSplatY(V);
  6181. X = XMVectorSplatX(V);
  6182. Result = XMVectorMultiply(Y, M.r[1]);
  6183. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6184. return Result;
  6185. #elif defined(_XM_SSE_INTRINSICS_)
  6186. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  6187. vResult = _mm_mul_ps(vResult,M.r[0]);
  6188. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  6189. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  6190. vResult = _mm_add_ps(vResult,vTemp);
  6191. return vResult;
  6192. #else // _XM_VMX128_INTRINSICS_
  6193. #endif // _XM_VMX128_INTRINSICS_
  6194. }
  6195. //------------------------------------------------------------------------------
  6196. XMINLINE XMFLOAT2* XMVector2TransformNormalStream
  6197. (
  6198. XMFLOAT2* pOutputStream,
  6199. UINT OutputStride,
  6200. CONST XMFLOAT2* pInputStream,
  6201. UINT InputStride,
  6202. UINT VectorCount,
  6203. CXMMATRIX M
  6204. )
  6205. {
  6206. #if defined(_XM_NO_INTRINSICS_)
  6207. XMVECTOR V;
  6208. XMVECTOR X;
  6209. XMVECTOR Y;
  6210. XMVECTOR Result;
  6211. UINT i;
  6212. BYTE* pInputVector = (BYTE*)pInputStream;
  6213. BYTE* pOutputVector = (BYTE*)pOutputStream;
  6214. XMASSERT(pOutputStream);
  6215. XMASSERT(pInputStream);
  6216. for (i = 0; i < VectorCount; i++)
  6217. {
  6218. V = XMLoadFloat2((XMFLOAT2*)pInputVector);
  6219. Y = XMVectorSplatY(V);
  6220. X = XMVectorSplatX(V);
  6221. // Y = XMVectorReplicate(((XMFLOAT2*)pInputVector)->y);
  6222. // X = XMVectorReplicate(((XMFLOAT2*)pInputVector)->x);
  6223. Result = XMVectorMultiply(Y, M.r[1]);
  6224. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  6225. XMStoreFloat2((XMFLOAT2*)pOutputVector, Result);
  6226. pInputVector += InputStride;
  6227. pOutputVector += OutputStride;
  6228. }
  6229. return pOutputStream;
  6230. #elif defined(_XM_SSE_INTRINSICS_)
  6231. XMASSERT(pOutputStream);
  6232. XMASSERT(pInputStream);
  6233. UINT i;
  6234. const BYTE*pInputVector = (const BYTE*)pInputStream;
  6235. BYTE *pOutputVector = (BYTE*)pOutputStream;
  6236. for (i = 0; i < VectorCount; i++)
  6237. {
  6238. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->x);
  6239. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT2 *>(pInputVector)->y);
  6240. vResult = _mm_mul_ps(vResult,M.r[1]);
  6241. X = _mm_mul_ps(X,M.r[0]);
  6242. vResult = _mm_add_ps(vResult,X);
  6243. _mm_store_sd(reinterpret_cast<double*>(pOutputVector),reinterpret_cast<const __m128d *>(&vResult)[0]);
  6244. pInputVector += InputStride;
  6245. pOutputVector += OutputStride;
  6246. }
  6247. return pOutputStream;
  6248. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6249. #endif // _XM_VMX128_INTRINSICS_
  6250. }
  6251. /****************************************************************************
  6252. *
  6253. * 3D Vector
  6254. *
  6255. ****************************************************************************/
  6256. //------------------------------------------------------------------------------
  6257. // Comparison operations
  6258. //------------------------------------------------------------------------------
  6259. //------------------------------------------------------------------------------
  6260. XMFINLINE BOOL XMVector3Equal
  6261. (
  6262. FXMVECTOR V1,
  6263. FXMVECTOR V2
  6264. )
  6265. {
  6266. #if defined(_XM_NO_INTRINSICS_)
  6267. return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0);
  6268. #elif defined(_XM_SSE_INTRINSICS_)
  6269. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  6270. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6271. #else // _XM_VMX128_INTRINSICS_
  6272. return XMComparisonAllTrue(XMVector3EqualR(V1, V2));
  6273. #endif
  6274. }
  6275. //------------------------------------------------------------------------------
  6276. XMFINLINE UINT XMVector3EqualR
  6277. (
  6278. FXMVECTOR V1,
  6279. FXMVECTOR V2
  6280. )
  6281. {
  6282. #if defined(_XM_NO_INTRINSICS_)
  6283. UINT CR = 0;
  6284. if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
  6285. (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
  6286. (V1.vector4_f32[2] == V2.vector4_f32[2]))
  6287. {
  6288. CR = XM_CRMASK_CR6TRUE;
  6289. }
  6290. else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
  6291. (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
  6292. (V1.vector4_f32[2] != V2.vector4_f32[2]))
  6293. {
  6294. CR = XM_CRMASK_CR6FALSE;
  6295. }
  6296. return CR;
  6297. #elif defined(_XM_SSE_INTRINSICS_)
  6298. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  6299. int iTest = _mm_movemask_ps(vTemp)&7;
  6300. UINT CR = 0;
  6301. if (iTest==7)
  6302. {
  6303. CR = XM_CRMASK_CR6TRUE;
  6304. }
  6305. else if (!iTest)
  6306. {
  6307. CR = XM_CRMASK_CR6FALSE;
  6308. }
  6309. return CR;
  6310. #else // _XM_VMX128_INTRINSICS_
  6311. #endif // _XM_VMX128_INTRINSICS_
  6312. }
  6313. //------------------------------------------------------------------------------
  6314. XMFINLINE BOOL XMVector3EqualInt
  6315. (
  6316. FXMVECTOR V1,
  6317. FXMVECTOR V2
  6318. )
  6319. {
  6320. #if defined(_XM_NO_INTRINSICS_)
  6321. return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0);
  6322. #elif defined(_XM_SSE_INTRINSICS_)
  6323. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  6324. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)==7) != 0);
  6325. #else // _XM_VMX128_INTRINSICS_
  6326. return XMComparisonAllTrue(XMVector3EqualIntR(V1, V2));
  6327. #endif
  6328. }
  6329. //------------------------------------------------------------------------------
  6330. XMFINLINE UINT XMVector3EqualIntR
  6331. (
  6332. FXMVECTOR V1,
  6333. FXMVECTOR V2
  6334. )
  6335. {
  6336. #if defined(_XM_NO_INTRINSICS_)
  6337. UINT CR = 0;
  6338. if ((V1.vector4_u32[0] == V2.vector4_u32[0]) &&
  6339. (V1.vector4_u32[1] == V2.vector4_u32[1]) &&
  6340. (V1.vector4_u32[2] == V2.vector4_u32[2]))
  6341. {
  6342. CR = XM_CRMASK_CR6TRUE;
  6343. }
  6344. else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) &&
  6345. (V1.vector4_u32[1] != V2.vector4_u32[1]) &&
  6346. (V1.vector4_u32[2] != V2.vector4_u32[2]))
  6347. {
  6348. CR = XM_CRMASK_CR6FALSE;
  6349. }
  6350. return CR;
  6351. #elif defined(_XM_SSE_INTRINSICS_)
  6352. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  6353. int iTemp = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7;
  6354. UINT CR = 0;
  6355. if (iTemp==7)
  6356. {
  6357. CR = XM_CRMASK_CR6TRUE;
  6358. }
  6359. else if (!iTemp)
  6360. {
  6361. CR = XM_CRMASK_CR6FALSE;
  6362. }
  6363. return CR;
  6364. #else // _XM_VMX128_INTRINSICS_
  6365. #endif // _XM_VMX128_INTRINSICS_
  6366. }
  6367. //------------------------------------------------------------------------------
  6368. XMFINLINE BOOL XMVector3NearEqual
  6369. (
  6370. FXMVECTOR V1,
  6371. FXMVECTOR V2,
  6372. FXMVECTOR Epsilon
  6373. )
  6374. {
  6375. #if defined(_XM_NO_INTRINSICS_)
  6376. FLOAT dx, dy, dz;
  6377. dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
  6378. dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
  6379. dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
  6380. return (((dx <= Epsilon.vector4_f32[0]) &&
  6381. (dy <= Epsilon.vector4_f32[1]) &&
  6382. (dz <= Epsilon.vector4_f32[2])) != 0);
  6383. #elif defined(_XM_SSE_INTRINSICS_)
  6384. // Get the difference
  6385. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  6386. // Get the absolute value of the difference
  6387. XMVECTOR vTemp = _mm_setzero_ps();
  6388. vTemp = _mm_sub_ps(vTemp,vDelta);
  6389. vTemp = _mm_max_ps(vTemp,vDelta);
  6390. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  6391. // w is don't care
  6392. return (((_mm_movemask_ps(vTemp)&7)==0x7) != 0);
  6393. #else // _XM_VMX128_INTRINSICS_
  6394. #endif // _XM_VMX128_INTRINSICS_
  6395. }
  6396. //------------------------------------------------------------------------------
  6397. XMFINLINE BOOL XMVector3NotEqual
  6398. (
  6399. FXMVECTOR V1,
  6400. FXMVECTOR V2
  6401. )
  6402. {
  6403. #if defined(_XM_NO_INTRINSICS_)
  6404. return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0);
  6405. #elif defined(_XM_SSE_INTRINSICS_)
  6406. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  6407. return (((_mm_movemask_ps(vTemp)&7)!=7) != 0);
  6408. #else // _XM_VMX128_INTRINSICS_
  6409. return XMComparisonAnyFalse(XMVector3EqualR(V1, V2));
  6410. #endif
  6411. }
  6412. //------------------------------------------------------------------------------
  6413. XMFINLINE BOOL XMVector3NotEqualInt
  6414. (
  6415. FXMVECTOR V1,
  6416. FXMVECTOR V2
  6417. )
  6418. {
  6419. #if defined(_XM_NO_INTRINSICS_)
  6420. return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0);
  6421. #elif defined(_XM_SSE_INTRINSICS_)
  6422. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  6423. return (((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])&7)!=7) != 0);
  6424. #else // _XM_VMX128_INTRINSICS_
  6425. return XMComparisonAnyFalse(XMVector3EqualIntR(V1, V2));
  6426. #endif
  6427. }
  6428. //------------------------------------------------------------------------------
  6429. XMFINLINE BOOL XMVector3Greater
  6430. (
  6431. FXMVECTOR V1,
  6432. FXMVECTOR V2
  6433. )
  6434. {
  6435. #if defined(_XM_NO_INTRINSICS_)
  6436. return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0);
  6437. #elif defined(_XM_SSE_INTRINSICS_)
  6438. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  6439. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6440. #else // _XM_VMX128_INTRINSICS_
  6441. return XMComparisonAllTrue(XMVector3GreaterR(V1, V2));
  6442. #endif
  6443. }
  6444. //------------------------------------------------------------------------------
  6445. XMFINLINE UINT XMVector3GreaterR
  6446. (
  6447. FXMVECTOR V1,
  6448. FXMVECTOR V2
  6449. )
  6450. {
  6451. #if defined(_XM_NO_INTRINSICS_)
  6452. UINT CR = 0;
  6453. if ((V1.vector4_f32[0] > V2.vector4_f32[0]) &&
  6454. (V1.vector4_f32[1] > V2.vector4_f32[1]) &&
  6455. (V1.vector4_f32[2] > V2.vector4_f32[2]))
  6456. {
  6457. CR = XM_CRMASK_CR6TRUE;
  6458. }
  6459. else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) &&
  6460. (V1.vector4_f32[1] <= V2.vector4_f32[1]) &&
  6461. (V1.vector4_f32[2] <= V2.vector4_f32[2]))
  6462. {
  6463. CR = XM_CRMASK_CR6FALSE;
  6464. }
  6465. return CR;
  6466. #elif defined(_XM_SSE_INTRINSICS_)
  6467. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  6468. UINT CR = 0;
  6469. int iTest = _mm_movemask_ps(vTemp)&7;
  6470. if (iTest==7)
  6471. {
  6472. CR = XM_CRMASK_CR6TRUE;
  6473. }
  6474. else if (!iTest)
  6475. {
  6476. CR = XM_CRMASK_CR6FALSE;
  6477. }
  6478. return CR;
  6479. #else // _XM_VMX128_INTRINSICS_
  6480. #endif // _XM_VMX128_INTRINSICS_
  6481. }
  6482. //------------------------------------------------------------------------------
  6483. XMFINLINE BOOL XMVector3GreaterOrEqual
  6484. (
  6485. FXMVECTOR V1,
  6486. FXMVECTOR V2
  6487. )
  6488. {
  6489. #if defined(_XM_NO_INTRINSICS_)
  6490. return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0);
  6491. #elif defined(_XM_SSE_INTRINSICS_)
  6492. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  6493. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6494. #else // _XM_VMX128_INTRINSICS_
  6495. return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V1, V2));
  6496. #endif
  6497. }
  6498. //------------------------------------------------------------------------------
  6499. XMFINLINE UINT XMVector3GreaterOrEqualR
  6500. (
  6501. FXMVECTOR V1,
  6502. FXMVECTOR V2
  6503. )
  6504. {
  6505. #if defined(_XM_NO_INTRINSICS_)
  6506. UINT CR = 0;
  6507. if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
  6508. (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
  6509. (V1.vector4_f32[2] >= V2.vector4_f32[2]))
  6510. {
  6511. CR = XM_CRMASK_CR6TRUE;
  6512. }
  6513. else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
  6514. (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
  6515. (V1.vector4_f32[2] < V2.vector4_f32[2]))
  6516. {
  6517. CR = XM_CRMASK_CR6FALSE;
  6518. }
  6519. return CR;
  6520. #elif defined(_XM_SSE_INTRINSICS_)
  6521. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  6522. UINT CR = 0;
  6523. int iTest = _mm_movemask_ps(vTemp)&7;
  6524. if (iTest==7)
  6525. {
  6526. CR = XM_CRMASK_CR6TRUE;
  6527. }
  6528. else if (!iTest)
  6529. {
  6530. CR = XM_CRMASK_CR6FALSE;
  6531. }
  6532. return CR;
  6533. #else // _XM_VMX128_INTRINSICS_
  6534. #endif // _XM_VMX128_INTRINSICS_
  6535. }
  6536. //------------------------------------------------------------------------------
  6537. XMFINLINE BOOL XMVector3Less
  6538. (
  6539. FXMVECTOR V1,
  6540. FXMVECTOR V2
  6541. )
  6542. {
  6543. #if defined(_XM_NO_INTRINSICS_)
  6544. return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0);
  6545. #elif defined(_XM_SSE_INTRINSICS_)
  6546. XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
  6547. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6548. #else // _XM_VMX128_INTRINSICS_
  6549. return XMComparisonAllTrue(XMVector3GreaterR(V2, V1));
  6550. #endif
  6551. }
  6552. //------------------------------------------------------------------------------
  6553. XMFINLINE BOOL XMVector3LessOrEqual
  6554. (
  6555. FXMVECTOR V1,
  6556. FXMVECTOR V2
  6557. )
  6558. {
  6559. #if defined(_XM_NO_INTRINSICS_)
  6560. return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0);
  6561. #elif defined(_XM_SSE_INTRINSICS_)
  6562. XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
  6563. return (((_mm_movemask_ps(vTemp)&7)==7) != 0);
  6564. #else // _XM_VMX128_INTRINSICS_
  6565. return XMComparisonAllTrue(XMVector3GreaterOrEqualR(V2, V1));
  6566. #endif
  6567. }
  6568. //------------------------------------------------------------------------------
  6569. XMFINLINE BOOL XMVector3InBounds
  6570. (
  6571. FXMVECTOR V,
  6572. FXMVECTOR Bounds
  6573. )
  6574. {
  6575. #if defined(_XM_NO_INTRINSICS_)
  6576. return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
  6577. (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
  6578. (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0);
  6579. #elif defined(_XM_SSE_INTRINSICS_)
  6580. // Test if less than or equal
  6581. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  6582. // Negate the bounds
  6583. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  6584. // Test if greater or equal (Reversed)
  6585. vTemp2 = _mm_cmple_ps(vTemp2,V);
  6586. // Blend answers
  6587. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  6588. // x,y and z in bounds? (w is don't care)
  6589. return (((_mm_movemask_ps(vTemp1)&0x7)==0x7) != 0);
  6590. #else
  6591. return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds));
  6592. #endif
  6593. }
  6594. //------------------------------------------------------------------------------
  6595. XMFINLINE UINT XMVector3InBoundsR
  6596. (
  6597. FXMVECTOR V,
  6598. FXMVECTOR Bounds
  6599. )
  6600. {
  6601. #if defined(_XM_NO_INTRINSICS_)
  6602. UINT CR = 0;
  6603. if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
  6604. (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
  6605. (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]))
  6606. {
  6607. CR = XM_CRMASK_CR6BOUNDS;
  6608. }
  6609. return CR;
  6610. #elif defined(_XM_SSE_INTRINSICS_)
  6611. // Test if less than or equal
  6612. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  6613. // Negate the bounds
  6614. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  6615. // Test if greater or equal (Reversed)
  6616. vTemp2 = _mm_cmple_ps(vTemp2,V);
  6617. // Blend answers
  6618. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  6619. // x,y and z in bounds? (w is don't care)
  6620. return ((_mm_movemask_ps(vTemp1)&0x7)==0x7) ? XM_CRMASK_CR6BOUNDS : 0;
  6621. #else // _XM_VMX128_INTRINSICS_
  6622. #endif // _XM_VMX128_INTRINSICS_
  6623. }
  6624. //------------------------------------------------------------------------------
  6625. XMFINLINE BOOL XMVector3IsNaN
  6626. (
  6627. FXMVECTOR V
  6628. )
  6629. {
  6630. #if defined(_XM_NO_INTRINSICS_)
  6631. return (XMISNAN(V.vector4_f32[0]) ||
  6632. XMISNAN(V.vector4_f32[1]) ||
  6633. XMISNAN(V.vector4_f32[2]));
  6634. #elif defined(_XM_SSE_INTRINSICS_)
  6635. // Mask off the exponent
  6636. __m128i vTempInf = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMInfinity);
  6637. // Mask off the mantissa
  6638. __m128i vTempNan = _mm_and_si128(reinterpret_cast<const __m128i *>(&V)[0],g_XMQNaNTest);
  6639. // Are any of the exponents == 0x7F800000?
  6640. vTempInf = _mm_cmpeq_epi32(vTempInf,g_XMInfinity);
  6641. // Are any of the mantissa's zero? (SSE2 doesn't have a neq test)
  6642. vTempNan = _mm_cmpeq_epi32(vTempNan,g_XMZero);
  6643. // Perform a not on the NaN test to be true on NON-zero mantissas
  6644. vTempNan = _mm_andnot_si128(vTempNan,vTempInf);
  6645. // If x, y or z are NaN, the signs are true after the merge above
  6646. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTempNan)[0])&7) != 0);
  6647. #else // _XM_VMX128_INTRINSICS_
  6648. #endif // _XM_VMX128_INTRINSICS_
  6649. }
  6650. //------------------------------------------------------------------------------
  6651. XMFINLINE BOOL XMVector3IsInfinite
  6652. (
  6653. FXMVECTOR V
  6654. )
  6655. {
  6656. #if defined(_XM_NO_INTRINSICS_)
  6657. return (XMISINF(V.vector4_f32[0]) ||
  6658. XMISINF(V.vector4_f32[1]) ||
  6659. XMISINF(V.vector4_f32[2]));
  6660. #elif defined(_XM_SSE_INTRINSICS_)
  6661. // Mask off the sign bit
  6662. __m128 vTemp = _mm_and_ps(V,g_XMAbsMask);
  6663. // Compare to infinity
  6664. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  6665. // If x,y or z are infinity, the signs are true.
  6666. return ((_mm_movemask_ps(vTemp)&7) != 0);
  6667. #else // _XM_VMX128_INTRINSICS_
  6668. #endif // _XM_VMX128_INTRINSICS_
  6669. }
  6670. //------------------------------------------------------------------------------
  6671. // Computation operations
  6672. //------------------------------------------------------------------------------
  6673. //------------------------------------------------------------------------------
  6674. XMFINLINE XMVECTOR XMVector3Dot
  6675. (
  6676. FXMVECTOR V1,
  6677. FXMVECTOR V2
  6678. )
  6679. {
  6680. #if defined(_XM_NO_INTRINSICS_)
  6681. FLOAT fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2];
  6682. XMVECTOR vResult = {
  6683. fValue,
  6684. fValue,
  6685. fValue,
  6686. fValue
  6687. };
  6688. return vResult;
  6689. #elif defined(_XM_SSE_INTRINSICS_)
  6690. // Perform the dot product
  6691. XMVECTOR vDot = _mm_mul_ps(V1,V2);
  6692. // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2]
  6693. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  6694. // Result.vector4_f32[0] = x+y
  6695. vDot = _mm_add_ss(vDot,vTemp);
  6696. // x=Dot.vector4_f32[2]
  6697. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6698. // Result.vector4_f32[0] = (x+y)+z
  6699. vDot = _mm_add_ss(vDot,vTemp);
  6700. // Splat x
  6701. return _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  6702. #else // _XM_VMX128_INTRINSICS_
  6703. #endif // _XM_VMX128_INTRINSICS_
  6704. }
  6705. //------------------------------------------------------------------------------
  6706. XMFINLINE XMVECTOR XMVector3Cross
  6707. (
  6708. FXMVECTOR V1,
  6709. FXMVECTOR V2
  6710. )
  6711. {
  6712. #if defined(_XM_NO_INTRINSICS_)
  6713. XMVECTOR vResult = {
  6714. (V1.vector4_f32[1] * V2.vector4_f32[2]) - (V1.vector4_f32[2] * V2.vector4_f32[1]),
  6715. (V1.vector4_f32[2] * V2.vector4_f32[0]) - (V1.vector4_f32[0] * V2.vector4_f32[2]),
  6716. (V1.vector4_f32[0] * V2.vector4_f32[1]) - (V1.vector4_f32[1] * V2.vector4_f32[0]),
  6717. 0.0f
  6718. };
  6719. return vResult;
  6720. #elif defined(_XM_SSE_INTRINSICS_)
  6721. // y1,z1,x1,w1
  6722. XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(3,0,2,1));
  6723. // z2,x2,y2,w2
  6724. XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(3,1,0,2));
  6725. // Perform the left operation
  6726. XMVECTOR vResult = _mm_mul_ps(vTemp1,vTemp2);
  6727. // z1,x1,y1,w1
  6728. vTemp1 = _mm_shuffle_ps(vTemp1,vTemp1,_MM_SHUFFLE(3,0,2,1));
  6729. // y2,z2,x2,w2
  6730. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(3,1,0,2));
  6731. // Perform the right operation
  6732. vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
  6733. // Subract the right from left, and return answer
  6734. vResult = _mm_sub_ps(vResult,vTemp1);
  6735. // Set w to zero
  6736. return _mm_and_ps(vResult,g_XMMask3);
  6737. #else // _XM_VMX128_INTRINSICS_
  6738. #endif // _XM_VMX128_INTRINSICS_
  6739. }
  6740. //------------------------------------------------------------------------------
  6741. XMFINLINE XMVECTOR XMVector3LengthSq
  6742. (
  6743. FXMVECTOR V
  6744. )
  6745. {
  6746. return XMVector3Dot(V, V);
  6747. }
  6748. //------------------------------------------------------------------------------
  6749. XMFINLINE XMVECTOR XMVector3ReciprocalLengthEst
  6750. (
  6751. FXMVECTOR V
  6752. )
  6753. {
  6754. #if defined(_XM_NO_INTRINSICS_)
  6755. XMVECTOR Result;
  6756. Result = XMVector3LengthSq(V);
  6757. Result = XMVectorReciprocalSqrtEst(Result);
  6758. return Result;
  6759. #elif defined(_XM_SSE_INTRINSICS_)
  6760. // Perform the dot product on x,y and z
  6761. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6762. // vTemp has z and y
  6763. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
  6764. // x+z, y
  6765. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6766. // y,y,y,y
  6767. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6768. // x+z+y,??,??,??
  6769. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6770. // Splat the length squared
  6771. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6772. // Get the reciprocal
  6773. vLengthSq = _mm_rsqrt_ps(vLengthSq);
  6774. return vLengthSq;
  6775. #else // _XM_VMX128_INTRINSICS_
  6776. #endif // _XM_VMX128_INTRINSICS_
  6777. }
  6778. //------------------------------------------------------------------------------
  6779. XMFINLINE XMVECTOR XMVector3ReciprocalLength
  6780. (
  6781. FXMVECTOR V
  6782. )
  6783. {
  6784. #if defined(_XM_NO_INTRINSICS_)
  6785. XMVECTOR Result;
  6786. Result = XMVector3LengthSq(V);
  6787. Result = XMVectorReciprocalSqrt(Result);
  6788. return Result;
  6789. #elif defined(_XM_SSE_INTRINSICS_)
  6790. // Perform the dot product
  6791. XMVECTOR vDot = _mm_mul_ps(V,V);
  6792. // x=Dot.y, y=Dot.z
  6793. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  6794. // Result.x = x+y
  6795. vDot = _mm_add_ss(vDot,vTemp);
  6796. // x=Dot.z
  6797. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6798. // Result.x = (x+y)+z
  6799. vDot = _mm_add_ss(vDot,vTemp);
  6800. // Splat x
  6801. vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  6802. // Get the reciprocal
  6803. vDot = _mm_sqrt_ps(vDot);
  6804. // Get the reciprocal
  6805. vDot = _mm_div_ps(g_XMOne,vDot);
  6806. return vDot;
  6807. #else // _XM_VMX128_INTRINSICS_
  6808. #endif // _XM_VMX128_INTRINSICS_
  6809. }
  6810. //------------------------------------------------------------------------------
  6811. XMFINLINE XMVECTOR XMVector3LengthEst
  6812. (
  6813. FXMVECTOR V
  6814. )
  6815. {
  6816. #if defined(_XM_NO_INTRINSICS_)
  6817. XMVECTOR Result;
  6818. Result = XMVector3LengthSq(V);
  6819. Result = XMVectorSqrtEst(Result);
  6820. return Result;
  6821. #elif defined(_XM_SSE_INTRINSICS_)
  6822. // Perform the dot product on x,y and z
  6823. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6824. // vTemp has z and y
  6825. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
  6826. // x+z, y
  6827. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6828. // y,y,y,y
  6829. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6830. // x+z+y,??,??,??
  6831. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6832. // Splat the length squared
  6833. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6834. // Get the length
  6835. vLengthSq = _mm_sqrt_ps(vLengthSq);
  6836. return vLengthSq;
  6837. #else // _XM_VMX128_INTRINSICS_
  6838. #endif // _XM_VMX128_INTRINSICS_
  6839. }
  6840. //------------------------------------------------------------------------------
  6841. XMFINLINE XMVECTOR XMVector3Length
  6842. (
  6843. FXMVECTOR V
  6844. )
  6845. {
  6846. #if defined(_XM_NO_INTRINSICS_)
  6847. XMVECTOR Result;
  6848. Result = XMVector3LengthSq(V);
  6849. Result = XMVectorSqrt(Result);
  6850. return Result;
  6851. #elif defined(_XM_SSE_INTRINSICS_)
  6852. // Perform the dot product on x,y and z
  6853. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6854. // vTemp has z and y
  6855. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,2,1,2));
  6856. // x+z, y
  6857. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6858. // y,y,y,y
  6859. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6860. // x+z+y,??,??,??
  6861. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6862. // Splat the length squared
  6863. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6864. // Get the length
  6865. vLengthSq = _mm_sqrt_ps(vLengthSq);
  6866. return vLengthSq;
  6867. #else // _XM_VMX128_INTRINSICS_
  6868. #endif // _XM_VMX128_INTRINSICS_
  6869. }
  6870. //------------------------------------------------------------------------------
  6871. // XMVector3NormalizeEst uses a reciprocal estimate and
  6872. // returns QNaN on zero and infinite vectors.
  6873. XMFINLINE XMVECTOR XMVector3NormalizeEst
  6874. (
  6875. FXMVECTOR V
  6876. )
  6877. {
  6878. #if defined(_XM_NO_INTRINSICS_)
  6879. XMVECTOR Result;
  6880. Result = XMVector3ReciprocalLength(V);
  6881. Result = XMVectorMultiply(V, Result);
  6882. return Result;
  6883. #elif defined(_XM_SSE_INTRINSICS_)
  6884. // Perform the dot product
  6885. XMVECTOR vDot = _mm_mul_ps(V,V);
  6886. // x=Dot.y, y=Dot.z
  6887. XMVECTOR vTemp = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(2,1,2,1));
  6888. // Result.x = x+y
  6889. vDot = _mm_add_ss(vDot,vTemp);
  6890. // x=Dot.z
  6891. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6892. // Result.x = (x+y)+z
  6893. vDot = _mm_add_ss(vDot,vTemp);
  6894. // Splat x
  6895. vDot = _mm_shuffle_ps(vDot,vDot,_MM_SHUFFLE(0,0,0,0));
  6896. // Get the reciprocal
  6897. vDot = _mm_rsqrt_ps(vDot);
  6898. // Perform the normalization
  6899. vDot = _mm_mul_ps(vDot,V);
  6900. return vDot;
  6901. #else // _XM_VMX128_INTRINSICS_
  6902. #endif // _XM_VMX128_INTRINSICS_
  6903. }
  6904. //------------------------------------------------------------------------------
  6905. XMFINLINE XMVECTOR XMVector3Normalize
  6906. (
  6907. FXMVECTOR V
  6908. )
  6909. {
  6910. #if defined(_XM_NO_INTRINSICS_)
  6911. FLOAT fLength;
  6912. XMVECTOR vResult;
  6913. vResult = XMVector3Length( V );
  6914. fLength = vResult.vector4_f32[0];
  6915. // Prevent divide by zero
  6916. if (fLength > 0) {
  6917. fLength = 1.0f/fLength;
  6918. }
  6919. vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
  6920. vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
  6921. vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
  6922. vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
  6923. return vResult;
  6924. #elif defined(_XM_SSE_INTRINSICS_)
  6925. // Perform the dot product on x,y and z only
  6926. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  6927. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,1,2,1));
  6928. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6929. vTemp = _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(1,1,1,1));
  6930. vLengthSq = _mm_add_ss(vLengthSq,vTemp);
  6931. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(0,0,0,0));
  6932. // Prepare for the division
  6933. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  6934. // Create zero with a single instruction
  6935. XMVECTOR vZeroMask = _mm_setzero_ps();
  6936. // Test for a divide by zero (Must be FP to detect -0.0)
  6937. vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
  6938. // Failsafe on zero (Or epsilon) length planes
  6939. // If the length is infinity, set the elements to zero
  6940. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  6941. // Divide to perform the normalization
  6942. vResult = _mm_div_ps(V,vResult);
  6943. // Any that are infinity, set to zero
  6944. vResult = _mm_and_ps(vResult,vZeroMask);
  6945. // Select qnan or result based on infinite length
  6946. XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
  6947. XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
  6948. vResult = _mm_or_ps(vTemp1,vTemp2);
  6949. return vResult;
  6950. #else // _XM_VMX128_INTRINSICS_
  6951. #endif // _XM_VMX128_INTRINSICS_
  6952. }
  6953. //------------------------------------------------------------------------------
  6954. XMFINLINE XMVECTOR XMVector3ClampLength
  6955. (
  6956. FXMVECTOR V,
  6957. FLOAT LengthMin,
  6958. FLOAT LengthMax
  6959. )
  6960. {
  6961. #if defined(_XM_NO_INTRINSICS_)
  6962. XMVECTOR ClampMax;
  6963. XMVECTOR ClampMin;
  6964. ClampMax = XMVectorReplicate(LengthMax);
  6965. ClampMin = XMVectorReplicate(LengthMin);
  6966. return XMVector3ClampLengthV(V, ClampMin, ClampMax);
  6967. #elif defined(_XM_SSE_INTRINSICS_)
  6968. XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
  6969. XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
  6970. return XMVector3ClampLengthV(V,ClampMin,ClampMax);
  6971. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  6972. #endif // _XM_VMX128_INTRINSICS_
  6973. }
  6974. //------------------------------------------------------------------------------
  6975. XMFINLINE XMVECTOR XMVector3ClampLengthV
  6976. (
  6977. FXMVECTOR V,
  6978. FXMVECTOR LengthMin,
  6979. FXMVECTOR LengthMax
  6980. )
  6981. {
  6982. #if defined(_XM_NO_INTRINSICS_)
  6983. XMVECTOR ClampLength;
  6984. XMVECTOR LengthSq;
  6985. XMVECTOR RcpLength;
  6986. XMVECTOR Length;
  6987. XMVECTOR Normal;
  6988. XMVECTOR Zero;
  6989. XMVECTOR InfiniteLength;
  6990. XMVECTOR ZeroLength;
  6991. XMVECTOR Select;
  6992. XMVECTOR ControlMax;
  6993. XMVECTOR ControlMin;
  6994. XMVECTOR Control;
  6995. XMVECTOR Result;
  6996. XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]));
  6997. XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]));
  6998. XMASSERT(XMVector3GreaterOrEqual(LengthMin, XMVectorZero()));
  6999. XMASSERT(XMVector3GreaterOrEqual(LengthMax, XMVectorZero()));
  7000. XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
  7001. LengthSq = XMVector3LengthSq(V);
  7002. Zero = XMVectorZero();
  7003. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  7004. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  7005. ZeroLength = XMVectorEqual(LengthSq, Zero);
  7006. Normal = XMVectorMultiply(V, RcpLength);
  7007. Length = XMVectorMultiply(LengthSq, RcpLength);
  7008. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  7009. Length = XMVectorSelect(LengthSq, Length, Select);
  7010. Normal = XMVectorSelect(LengthSq, Normal, Select);
  7011. ControlMax = XMVectorGreater(Length, LengthMax);
  7012. ControlMin = XMVectorLess(Length, LengthMin);
  7013. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  7014. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  7015. Result = XMVectorMultiply(Normal, ClampLength);
  7016. // Preserve the original vector (with no precision loss) if the length falls within the given range
  7017. Control = XMVectorEqualInt(ControlMax, ControlMin);
  7018. Result = XMVectorSelect(Result, V, Control);
  7019. return Result;
  7020. #elif defined(_XM_SSE_INTRINSICS_)
  7021. XMVECTOR ClampLength;
  7022. XMVECTOR LengthSq;
  7023. XMVECTOR RcpLength;
  7024. XMVECTOR Length;
  7025. XMVECTOR Normal;
  7026. XMVECTOR InfiniteLength;
  7027. XMVECTOR ZeroLength;
  7028. XMVECTOR Select;
  7029. XMVECTOR ControlMax;
  7030. XMVECTOR ControlMin;
  7031. XMVECTOR Control;
  7032. XMVECTOR Result;
  7033. XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)));
  7034. XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)));
  7035. XMASSERT(XMVector3GreaterOrEqual(LengthMin, g_XMZero));
  7036. XMASSERT(XMVector3GreaterOrEqual(LengthMax, g_XMZero));
  7037. XMASSERT(XMVector3GreaterOrEqual(LengthMax, LengthMin));
  7038. LengthSq = XMVector3LengthSq(V);
  7039. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  7040. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
  7041. ZeroLength = XMVectorEqual(LengthSq,g_XMZero);
  7042. Normal = _mm_mul_ps(V, RcpLength);
  7043. Length = _mm_mul_ps(LengthSq, RcpLength);
  7044. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  7045. Length = XMVectorSelect(LengthSq, Length, Select);
  7046. Normal = XMVectorSelect(LengthSq, Normal, Select);
  7047. ControlMax = XMVectorGreater(Length, LengthMax);
  7048. ControlMin = XMVectorLess(Length, LengthMin);
  7049. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  7050. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  7051. Result = _mm_mul_ps(Normal, ClampLength);
  7052. // Preserve the original vector (with no precision loss) if the length falls within the given range
  7053. Control = XMVectorEqualInt(ControlMax, ControlMin);
  7054. Result = XMVectorSelect(Result, V, Control);
  7055. return Result;
  7056. #else // _XM_VMX128_INTRINSICS_
  7057. #endif // _XM_VMX128_INTRINSICS_
  7058. }
  7059. //------------------------------------------------------------------------------
  7060. XMFINLINE XMVECTOR XMVector3Reflect
  7061. (
  7062. FXMVECTOR Incident,
  7063. FXMVECTOR Normal
  7064. )
  7065. {
  7066. #if defined(_XM_NO_INTRINSICS_)
  7067. XMVECTOR Result;
  7068. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  7069. Result = XMVector3Dot(Incident, Normal);
  7070. Result = XMVectorAdd(Result, Result);
  7071. Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
  7072. return Result;
  7073. #elif defined(_XM_SSE_INTRINSICS_)
  7074. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  7075. XMVECTOR Result = XMVector3Dot(Incident, Normal);
  7076. Result = _mm_add_ps(Result, Result);
  7077. Result = _mm_mul_ps(Result, Normal);
  7078. Result = _mm_sub_ps(Incident,Result);
  7079. return Result;
  7080. #else // _XM_VMX128_INTRINSICS_
  7081. #endif // _XM_VMX128_INTRINSICS_
  7082. }
  7083. //------------------------------------------------------------------------------
  7084. XMFINLINE XMVECTOR XMVector3Refract
  7085. (
  7086. FXMVECTOR Incident,
  7087. FXMVECTOR Normal,
  7088. FLOAT RefractionIndex
  7089. )
  7090. {
  7091. #if defined(_XM_NO_INTRINSICS_)
  7092. XMVECTOR Index;
  7093. Index = XMVectorReplicate(RefractionIndex);
  7094. return XMVector3RefractV(Incident, Normal, Index);
  7095. #elif defined(_XM_SSE_INTRINSICS_)
  7096. XMVECTOR Index = _mm_set_ps1(RefractionIndex);
  7097. return XMVector3RefractV(Incident,Normal,Index);
  7098. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7099. #endif // _XM_VMX128_INTRINSICS_
  7100. }
  7101. //------------------------------------------------------------------------------
  7102. XMFINLINE XMVECTOR XMVector3RefractV
  7103. (
  7104. FXMVECTOR Incident,
  7105. FXMVECTOR Normal,
  7106. FXMVECTOR RefractionIndex
  7107. )
  7108. {
  7109. #if defined(_XM_NO_INTRINSICS_)
  7110. XMVECTOR IDotN;
  7111. XMVECTOR R;
  7112. CONST XMVECTOR Zero = XMVectorZero();
  7113. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  7114. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  7115. IDotN = XMVector3Dot(Incident, Normal);
  7116. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  7117. R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
  7118. R = XMVectorMultiply(R, RefractionIndex);
  7119. R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
  7120. if (XMVector4LessOrEqual(R, Zero))
  7121. {
  7122. // Total internal reflection
  7123. return Zero;
  7124. }
  7125. else
  7126. {
  7127. XMVECTOR Result;
  7128. // R = RefractionIndex * IDotN + sqrt(R)
  7129. R = XMVectorSqrt(R);
  7130. R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
  7131. // Result = RefractionIndex * Incident - Normal * R
  7132. Result = XMVectorMultiply(RefractionIndex, Incident);
  7133. Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
  7134. return Result;
  7135. }
  7136. #elif defined(_XM_SSE_INTRINSICS_)
  7137. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  7138. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  7139. XMVECTOR IDotN = XMVector3Dot(Incident, Normal);
  7140. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  7141. XMVECTOR R = _mm_mul_ps(IDotN, IDotN);
  7142. R = _mm_sub_ps(g_XMOne,R);
  7143. R = _mm_mul_ps(R, RefractionIndex);
  7144. R = _mm_mul_ps(R, RefractionIndex);
  7145. R = _mm_sub_ps(g_XMOne,R);
  7146. XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
  7147. if (_mm_movemask_ps(vResult)==0x0f)
  7148. {
  7149. // Total internal reflection
  7150. vResult = g_XMZero;
  7151. }
  7152. else
  7153. {
  7154. // R = RefractionIndex * IDotN + sqrt(R)
  7155. R = _mm_sqrt_ps(R);
  7156. vResult = _mm_mul_ps(RefractionIndex,IDotN);
  7157. R = _mm_add_ps(R,vResult);
  7158. // Result = RefractionIndex * Incident - Normal * R
  7159. vResult = _mm_mul_ps(RefractionIndex, Incident);
  7160. R = _mm_mul_ps(R,Normal);
  7161. vResult = _mm_sub_ps(vResult,R);
  7162. }
  7163. return vResult;
  7164. #else // _XM_VMX128_INTRINSICS_
  7165. #endif // _XM_VMX128_INTRINSICS_
  7166. }
  7167. //------------------------------------------------------------------------------
  7168. XMFINLINE XMVECTOR XMVector3Orthogonal
  7169. (
  7170. FXMVECTOR V
  7171. )
  7172. {
  7173. #if defined(_XM_NO_INTRINSICS_)
  7174. XMVECTOR NegativeV;
  7175. XMVECTOR Z, YZYY;
  7176. XMVECTOR ZIsNegative, YZYYIsNegative;
  7177. XMVECTOR S, D;
  7178. XMVECTOR R0, R1;
  7179. XMVECTOR Select;
  7180. XMVECTOR Zero;
  7181. XMVECTOR Result;
  7182. static CONST XMVECTORU32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  7183. static CONST XMVECTORU32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  7184. Zero = XMVectorZero();
  7185. Z = XMVectorSplatZ(V);
  7186. YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y.v);
  7187. NegativeV = XMVectorSubtract(Zero, V);
  7188. ZIsNegative = XMVectorLess(Z, Zero);
  7189. YZYYIsNegative = XMVectorLess(YZYY, Zero);
  7190. S = XMVectorAdd(YZYY, Z);
  7191. D = XMVectorSubtract(YZYY, Z);
  7192. Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
  7193. R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X.v);
  7194. R1 = XMVectorPermute(V, D, Permute1X0X0X0X.v);
  7195. Result = XMVectorSelect(R1, R0, Select);
  7196. return Result;
  7197. #elif defined(_XM_SSE_INTRINSICS_)
  7198. XMVECTOR NegativeV;
  7199. XMVECTOR Z, YZYY;
  7200. XMVECTOR ZIsNegative, YZYYIsNegative;
  7201. XMVECTOR S, D;
  7202. XMVECTOR R0, R1;
  7203. XMVECTOR Select;
  7204. XMVECTOR Zero;
  7205. XMVECTOR Result;
  7206. static CONST XMVECTORI32 Permute1X0X0X0X = {XM_PERMUTE_1X, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_0X};
  7207. static CONST XMVECTORI32 Permute0Y0Z0Y0Y= {XM_PERMUTE_0Y, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_0Y};
  7208. Zero = XMVectorZero();
  7209. Z = XMVectorSplatZ(V);
  7210. YZYY = XMVectorPermute(V, V, Permute0Y0Z0Y0Y);
  7211. NegativeV = _mm_sub_ps(Zero, V);
  7212. ZIsNegative = XMVectorLess(Z, Zero);
  7213. YZYYIsNegative = XMVectorLess(YZYY, Zero);
  7214. S = _mm_add_ps(YZYY, Z);
  7215. D = _mm_sub_ps(YZYY, Z);
  7216. Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative);
  7217. R0 = XMVectorPermute(NegativeV, S, Permute1X0X0X0X);
  7218. R1 = XMVectorPermute(V, D,Permute1X0X0X0X);
  7219. Result = XMVectorSelect(R1, R0, Select);
  7220. return Result;
  7221. #else // _XM_VMX128_INTRINSICS_
  7222. #endif // _XM_VMX128_INTRINSICS_
  7223. }
  7224. //------------------------------------------------------------------------------
  7225. XMFINLINE XMVECTOR XMVector3AngleBetweenNormalsEst
  7226. (
  7227. FXMVECTOR N1,
  7228. FXMVECTOR N2
  7229. )
  7230. {
  7231. #if defined(_XM_NO_INTRINSICS_)
  7232. XMVECTOR Result;
  7233. XMVECTOR NegativeOne;
  7234. XMVECTOR One;
  7235. Result = XMVector3Dot(N1, N2);
  7236. NegativeOne = XMVectorSplatConstant(-1, 0);
  7237. One = XMVectorSplatOne();
  7238. Result = XMVectorClamp(Result, NegativeOne, One);
  7239. Result = XMVectorACosEst(Result);
  7240. return Result;
  7241. #elif defined(_XM_SSE_INTRINSICS_)
  7242. XMVECTOR vResult = XMVector3Dot(N1,N2);
  7243. // Clamp to -1.0f to 1.0f
  7244. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  7245. vResult = _mm_min_ps(vResult,g_XMOne);
  7246. vResult = XMVectorACosEst(vResult);
  7247. return vResult;
  7248. #else // _XM_VMX128_INTRINSICS_
  7249. #endif // _XM_VMX128_INTRINSICS_
  7250. }
  7251. //------------------------------------------------------------------------------
  7252. XMFINLINE XMVECTOR XMVector3AngleBetweenNormals
  7253. (
  7254. FXMVECTOR N1,
  7255. FXMVECTOR N2
  7256. )
  7257. {
  7258. #if defined(_XM_NO_INTRINSICS_)
  7259. XMVECTOR Result;
  7260. XMVECTOR NegativeOne;
  7261. XMVECTOR One;
  7262. Result = XMVector3Dot(N1, N2);
  7263. NegativeOne = XMVectorSplatConstant(-1, 0);
  7264. One = XMVectorSplatOne();
  7265. Result = XMVectorClamp(Result, NegativeOne, One);
  7266. Result = XMVectorACos(Result);
  7267. return Result;
  7268. #elif defined(_XM_SSE_INTRINSICS_)
  7269. XMVECTOR vResult = XMVector3Dot(N1,N2);
  7270. // Clamp to -1.0f to 1.0f
  7271. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  7272. vResult = _mm_min_ps(vResult,g_XMOne);
  7273. vResult = XMVectorACos(vResult);
  7274. return vResult;
  7275. #else // _XM_VMX128_INTRINSICS_
  7276. #endif // _XM_VMX128_INTRINSICS_
  7277. }
  7278. //------------------------------------------------------------------------------
  7279. XMFINLINE XMVECTOR XMVector3AngleBetweenVectors
  7280. (
  7281. FXMVECTOR V1,
  7282. FXMVECTOR V2
  7283. )
  7284. {
  7285. #if defined(_XM_NO_INTRINSICS_)
  7286. XMVECTOR L1;
  7287. XMVECTOR L2;
  7288. XMVECTOR Dot;
  7289. XMVECTOR CosAngle;
  7290. XMVECTOR NegativeOne;
  7291. XMVECTOR One;
  7292. XMVECTOR Result;
  7293. L1 = XMVector3ReciprocalLength(V1);
  7294. L2 = XMVector3ReciprocalLength(V2);
  7295. Dot = XMVector3Dot(V1, V2);
  7296. L1 = XMVectorMultiply(L1, L2);
  7297. NegativeOne = XMVectorSplatConstant(-1, 0);
  7298. One = XMVectorSplatOne();
  7299. CosAngle = XMVectorMultiply(Dot, L1);
  7300. CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
  7301. Result = XMVectorACos(CosAngle);
  7302. return Result;
  7303. #elif defined(_XM_SSE_INTRINSICS_)
  7304. XMVECTOR L1;
  7305. XMVECTOR L2;
  7306. XMVECTOR Dot;
  7307. XMVECTOR CosAngle;
  7308. XMVECTOR Result;
  7309. L1 = XMVector3ReciprocalLength(V1);
  7310. L2 = XMVector3ReciprocalLength(V2);
  7311. Dot = XMVector3Dot(V1, V2);
  7312. L1 = _mm_mul_ps(L1, L2);
  7313. CosAngle = _mm_mul_ps(Dot, L1);
  7314. CosAngle = XMVectorClamp(CosAngle,g_XMNegativeOne,g_XMOne);
  7315. Result = XMVectorACos(CosAngle);
  7316. return Result;
  7317. #else // _XM_VMX128_INTRINSICS_
  7318. #endif // _XM_VMX128_INTRINSICS_
  7319. }
  7320. //------------------------------------------------------------------------------
  7321. XMFINLINE XMVECTOR XMVector3LinePointDistance
  7322. (
  7323. FXMVECTOR LinePoint1,
  7324. FXMVECTOR LinePoint2,
  7325. FXMVECTOR Point
  7326. )
  7327. {
  7328. #if defined(_XM_NO_INTRINSICS_)
  7329. XMVECTOR PointVector;
  7330. XMVECTOR LineVector;
  7331. XMVECTOR ReciprocalLengthSq;
  7332. XMVECTOR PointProjectionScale;
  7333. XMVECTOR DistanceVector;
  7334. XMVECTOR Result;
  7335. // Given a vector PointVector from LinePoint1 to Point and a vector
  7336. // LineVector from LinePoint1 to LinePoint2, the scaled distance
  7337. // PointProjectionScale from LinePoint1 to the perpendicular projection
  7338. // of PointVector onto the line is defined as:
  7339. //
  7340. // PointProjectionScale = dot(PointVector, LineVector) / LengthSq(LineVector)
  7341. PointVector = XMVectorSubtract(Point, LinePoint1);
  7342. LineVector = XMVectorSubtract(LinePoint2, LinePoint1);
  7343. ReciprocalLengthSq = XMVector3LengthSq(LineVector);
  7344. ReciprocalLengthSq = XMVectorReciprocal(ReciprocalLengthSq);
  7345. PointProjectionScale = XMVector3Dot(PointVector, LineVector);
  7346. PointProjectionScale = XMVectorMultiply(PointProjectionScale, ReciprocalLengthSq);
  7347. DistanceVector = XMVectorMultiply(LineVector, PointProjectionScale);
  7348. DistanceVector = XMVectorSubtract(PointVector, DistanceVector);
  7349. Result = XMVector3Length(DistanceVector);
  7350. return Result;
  7351. #elif defined(_XM_SSE_INTRINSICS_)
  7352. XMVECTOR PointVector = _mm_sub_ps(Point,LinePoint1);
  7353. XMVECTOR LineVector = _mm_sub_ps(LinePoint2,LinePoint1);
  7354. XMVECTOR ReciprocalLengthSq = XMVector3LengthSq(LineVector);
  7355. XMVECTOR vResult = XMVector3Dot(PointVector,LineVector);
  7356. vResult = _mm_div_ps(vResult,ReciprocalLengthSq);
  7357. vResult = _mm_mul_ps(vResult,LineVector);
  7358. vResult = _mm_sub_ps(PointVector,vResult);
  7359. vResult = XMVector3Length(vResult);
  7360. return vResult;
  7361. #else // _XM_VMX128_INTRINSICS_
  7362. #endif // _XM_VMX128_INTRINSICS_
  7363. }
  7364. //------------------------------------------------------------------------------
  7365. XMFINLINE VOID XMVector3ComponentsFromNormal
  7366. (
  7367. XMVECTOR* pParallel,
  7368. XMVECTOR* pPerpendicular,
  7369. FXMVECTOR V,
  7370. FXMVECTOR Normal
  7371. )
  7372. {
  7373. #if defined(_XM_NO_INTRINSICS_)
  7374. XMVECTOR Parallel;
  7375. XMVECTOR Scale;
  7376. XMASSERT(pParallel);
  7377. XMASSERT(pPerpendicular);
  7378. Scale = XMVector3Dot(V, Normal);
  7379. Parallel = XMVectorMultiply(Normal, Scale);
  7380. *pParallel = Parallel;
  7381. *pPerpendicular = XMVectorSubtract(V, Parallel);
  7382. #elif defined(_XM_SSE_INTRINSICS_)
  7383. XMASSERT(pParallel);
  7384. XMASSERT(pPerpendicular);
  7385. XMVECTOR Scale = XMVector3Dot(V, Normal);
  7386. XMVECTOR Parallel = _mm_mul_ps(Normal,Scale);
  7387. *pParallel = Parallel;
  7388. *pPerpendicular = _mm_sub_ps(V,Parallel);
  7389. #else // _XM_VMX128_INTRINSICS_
  7390. #endif // _XM_VMX128_INTRINSICS_
  7391. }
  7392. //------------------------------------------------------------------------------
  7393. // Transform a vector using a rotation expressed as a unit quaternion
  7394. XMFINLINE XMVECTOR XMVector3Rotate
  7395. (
  7396. FXMVECTOR V,
  7397. FXMVECTOR RotationQuaternion
  7398. )
  7399. {
  7400. #if defined(_XM_NO_INTRINSICS_)
  7401. XMVECTOR A;
  7402. XMVECTOR Q;
  7403. XMVECTOR Result;
  7404. A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
  7405. Q = XMQuaternionConjugate(RotationQuaternion);
  7406. Result = XMQuaternionMultiply(Q, A);
  7407. Result = XMQuaternionMultiply(Result, RotationQuaternion);
  7408. return Result;
  7409. #elif defined(_XM_SSE_INTRINSICS_)
  7410. XMVECTOR A;
  7411. XMVECTOR Q;
  7412. XMVECTOR Result;
  7413. A = _mm_and_ps(V,g_XMMask3);
  7414. Q = XMQuaternionConjugate(RotationQuaternion);
  7415. Result = XMQuaternionMultiply(Q, A);
  7416. Result = XMQuaternionMultiply(Result, RotationQuaternion);
  7417. return Result;
  7418. #else // _XM_VMX128_INTRINSICS_
  7419. #endif // _XM_VMX128_INTRINSICS_
  7420. }
  7421. //------------------------------------------------------------------------------
  7422. // Transform a vector using the inverse of a rotation expressed as a unit quaternion
  7423. XMFINLINE XMVECTOR XMVector3InverseRotate
  7424. (
  7425. FXMVECTOR V,
  7426. FXMVECTOR RotationQuaternion
  7427. )
  7428. {
  7429. #if defined(_XM_NO_INTRINSICS_)
  7430. XMVECTOR A;
  7431. XMVECTOR Q;
  7432. XMVECTOR Result;
  7433. A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v);
  7434. Result = XMQuaternionMultiply(RotationQuaternion, A);
  7435. Q = XMQuaternionConjugate(RotationQuaternion);
  7436. Result = XMQuaternionMultiply(Result, Q);
  7437. return Result;
  7438. #elif defined(_XM_SSE_INTRINSICS_)
  7439. XMVECTOR A;
  7440. XMVECTOR Q;
  7441. XMVECTOR Result;
  7442. A = _mm_and_ps(V,g_XMMask3);
  7443. Result = XMQuaternionMultiply(RotationQuaternion, A);
  7444. Q = XMQuaternionConjugate(RotationQuaternion);
  7445. Result = XMQuaternionMultiply(Result, Q);
  7446. return Result;
  7447. #else // _XM_VMX128_INTRINSICS_
  7448. #endif // _XM_VMX128_INTRINSICS_
  7449. }
  7450. //------------------------------------------------------------------------------
  7451. XMFINLINE XMVECTOR XMVector3Transform
  7452. (
  7453. FXMVECTOR V,
  7454. CXMMATRIX M
  7455. )
  7456. {
  7457. #if defined(_XM_NO_INTRINSICS_)
  7458. XMVECTOR X;
  7459. XMVECTOR Y;
  7460. XMVECTOR Z;
  7461. XMVECTOR Result;
  7462. Z = XMVectorSplatZ(V);
  7463. Y = XMVectorSplatY(V);
  7464. X = XMVectorSplatX(V);
  7465. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7466. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7467. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7468. return Result;
  7469. #elif defined(_XM_SSE_INTRINSICS_)
  7470. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  7471. vResult = _mm_mul_ps(vResult,M.r[0]);
  7472. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  7473. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  7474. vResult = _mm_add_ps(vResult,vTemp);
  7475. vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  7476. vTemp = _mm_mul_ps(vTemp,M.r[2]);
  7477. vResult = _mm_add_ps(vResult,vTemp);
  7478. vResult = _mm_add_ps(vResult,M.r[3]);
  7479. return vResult;
  7480. #else // _XM_VMX128_INTRINSICS_
  7481. #endif // _XM_VMX128_INTRINSICS_
  7482. }
  7483. //------------------------------------------------------------------------------
  7484. XMINLINE XMFLOAT4* XMVector3TransformStream
  7485. (
  7486. XMFLOAT4* pOutputStream,
  7487. UINT OutputStride,
  7488. CONST XMFLOAT3* pInputStream,
  7489. UINT InputStride,
  7490. UINT VectorCount,
  7491. CXMMATRIX M
  7492. )
  7493. {
  7494. #if defined(_XM_NO_INTRINSICS_)
  7495. XMVECTOR V;
  7496. XMVECTOR X;
  7497. XMVECTOR Y;
  7498. XMVECTOR Z;
  7499. XMVECTOR Result;
  7500. UINT i;
  7501. BYTE* pInputVector = (BYTE*)pInputStream;
  7502. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7503. XMASSERT(pOutputStream);
  7504. XMASSERT(pInputStream);
  7505. for (i = 0; i < VectorCount; i++)
  7506. {
  7507. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7508. Z = XMVectorSplatZ(V);
  7509. Y = XMVectorSplatY(V);
  7510. X = XMVectorSplatX(V);
  7511. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7512. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7513. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7514. XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
  7515. pInputVector += InputStride;
  7516. pOutputVector += OutputStride;
  7517. }
  7518. return pOutputStream;
  7519. #elif defined(_XM_SSE_INTRINSICS_)
  7520. XMASSERT(pOutputStream);
  7521. XMASSERT(pInputStream);
  7522. UINT i;
  7523. const BYTE* pInputVector = (const BYTE*)pInputStream;
  7524. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7525. for (i = 0; i < VectorCount; i++)
  7526. {
  7527. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
  7528. XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
  7529. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
  7530. vResult = _mm_mul_ps(vResult,M.r[2]);
  7531. vResult = _mm_add_ps(vResult,M.r[3]);
  7532. Y = _mm_mul_ps(Y,M.r[1]);
  7533. vResult = _mm_add_ps(vResult,Y);
  7534. X = _mm_mul_ps(X,M.r[0]);
  7535. vResult = _mm_add_ps(vResult,X);
  7536. _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vResult);
  7537. pInputVector += InputStride;
  7538. pOutputVector += OutputStride;
  7539. }
  7540. return pOutputStream;
  7541. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7542. #endif // _XM_VMX128_INTRINSICS_
  7543. }
  7544. //------------------------------------------------------------------------------
  7545. XMINLINE XMFLOAT4* XMVector3TransformStreamNC
  7546. (
  7547. XMFLOAT4* pOutputStream,
  7548. UINT OutputStride,
  7549. CONST XMFLOAT3* pInputStream,
  7550. UINT InputStride,
  7551. UINT VectorCount,
  7552. CXMMATRIX M
  7553. )
  7554. {
  7555. #if defined(_XM_NO_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) || defined(_XM_SSE_INTRINSICS_)
  7556. return XMVector3TransformStream( pOutputStream, OutputStride, pInputStream, InputStride, VectorCount, M );
  7557. #else // _XM_VMX128_INTRINSICS_
  7558. #endif // _XM_VMX128_INTRINSICS_
  7559. }
  7560. //------------------------------------------------------------------------------
  7561. XMFINLINE XMVECTOR XMVector3TransformCoord
  7562. (
  7563. FXMVECTOR V,
  7564. CXMMATRIX M
  7565. )
  7566. {
  7567. #if defined(_XM_NO_INTRINSICS_)
  7568. XMVECTOR X;
  7569. XMVECTOR Y;
  7570. XMVECTOR Z;
  7571. XMVECTOR InverseW;
  7572. XMVECTOR Result;
  7573. Z = XMVectorSplatZ(V);
  7574. Y = XMVectorSplatY(V);
  7575. X = XMVectorSplatX(V);
  7576. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7577. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7578. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7579. InverseW = XMVectorSplatW(Result);
  7580. InverseW = XMVectorReciprocal(InverseW);
  7581. Result = XMVectorMultiply(Result, InverseW);
  7582. return Result;
  7583. #elif defined(_XM_SSE_INTRINSICS_)
  7584. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  7585. vResult = _mm_mul_ps(vResult,M.r[0]);
  7586. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  7587. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  7588. vResult = _mm_add_ps(vResult,vTemp);
  7589. vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  7590. vTemp = _mm_mul_ps(vTemp,M.r[2]);
  7591. vResult = _mm_add_ps(vResult,vTemp);
  7592. vResult = _mm_add_ps(vResult,M.r[3]);
  7593. vTemp = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  7594. vResult = _mm_div_ps(vResult,vTemp);
  7595. return vResult;
  7596. #else // _XM_VMX128_INTRINSICS_
  7597. #endif // _XM_VMX128_INTRINSICS_
  7598. }
  7599. //------------------------------------------------------------------------------
  7600. XMINLINE XMFLOAT3* XMVector3TransformCoordStream
  7601. (
  7602. XMFLOAT3* pOutputStream,
  7603. UINT OutputStride,
  7604. CONST XMFLOAT3* pInputStream,
  7605. UINT InputStride,
  7606. UINT VectorCount,
  7607. CXMMATRIX M
  7608. )
  7609. {
  7610. #if defined(_XM_NO_INTRINSICS_)
  7611. XMVECTOR V;
  7612. XMVECTOR X;
  7613. XMVECTOR Y;
  7614. XMVECTOR Z;
  7615. XMVECTOR InverseW;
  7616. XMVECTOR Result;
  7617. UINT i;
  7618. BYTE* pInputVector = (BYTE*)pInputStream;
  7619. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7620. XMASSERT(pOutputStream);
  7621. XMASSERT(pInputStream);
  7622. for (i = 0; i < VectorCount; i++)
  7623. {
  7624. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7625. Z = XMVectorSplatZ(V);
  7626. Y = XMVectorSplatY(V);
  7627. X = XMVectorSplatX(V);
  7628. // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
  7629. // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
  7630. // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
  7631. Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]);
  7632. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7633. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7634. InverseW = XMVectorSplatW(Result);
  7635. InverseW = XMVectorReciprocal(InverseW);
  7636. Result = XMVectorMultiply(Result, InverseW);
  7637. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7638. pInputVector += InputStride;
  7639. pOutputVector += OutputStride;
  7640. }
  7641. return pOutputStream;
  7642. #elif defined(_XM_SSE_INTRINSICS_)
  7643. XMASSERT(pOutputStream);
  7644. XMASSERT(pInputStream);
  7645. UINT i;
  7646. const BYTE *pInputVector = (BYTE*)pInputStream;
  7647. BYTE *pOutputVector = (BYTE*)pOutputStream;
  7648. for (i = 0; i < VectorCount; i++)
  7649. {
  7650. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
  7651. XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
  7652. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
  7653. vResult = _mm_mul_ps(vResult,M.r[2]);
  7654. vResult = _mm_add_ps(vResult,M.r[3]);
  7655. Y = _mm_mul_ps(Y,M.r[1]);
  7656. vResult = _mm_add_ps(vResult,Y);
  7657. X = _mm_mul_ps(X,M.r[0]);
  7658. vResult = _mm_add_ps(vResult,X);
  7659. X = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(3,3,3,3));
  7660. vResult = _mm_div_ps(vResult,X);
  7661. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
  7662. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7663. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
  7664. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7665. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
  7666. pInputVector += InputStride;
  7667. pOutputVector += OutputStride;
  7668. }
  7669. return pOutputStream;
  7670. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7671. #endif // _XM_VMX128_INTRINSICS_
  7672. }
  7673. //------------------------------------------------------------------------------
  7674. XMFINLINE XMVECTOR XMVector3TransformNormal
  7675. (
  7676. FXMVECTOR V,
  7677. CXMMATRIX M
  7678. )
  7679. {
  7680. #if defined(_XM_NO_INTRINSICS_)
  7681. XMVECTOR X;
  7682. XMVECTOR Y;
  7683. XMVECTOR Z;
  7684. XMVECTOR Result;
  7685. Z = XMVectorSplatZ(V);
  7686. Y = XMVectorSplatY(V);
  7687. X = XMVectorSplatX(V);
  7688. Result = XMVectorMultiply(Z, M.r[2]);
  7689. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7690. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7691. return Result;
  7692. #elif defined(_XM_SSE_INTRINSICS_)
  7693. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  7694. vResult = _mm_mul_ps(vResult,M.r[0]);
  7695. XMVECTOR vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  7696. vTemp = _mm_mul_ps(vTemp,M.r[1]);
  7697. vResult = _mm_add_ps(vResult,vTemp);
  7698. vTemp = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  7699. vTemp = _mm_mul_ps(vTemp,M.r[2]);
  7700. vResult = _mm_add_ps(vResult,vTemp);
  7701. return vResult;
  7702. #else // _XM_VMX128_INTRINSICS_
  7703. #endif // _XM_VMX128_INTRINSICS_
  7704. }
  7705. //------------------------------------------------------------------------------
  7706. XMINLINE XMFLOAT3* XMVector3TransformNormalStream
  7707. (
  7708. XMFLOAT3* pOutputStream,
  7709. UINT OutputStride,
  7710. CONST XMFLOAT3* pInputStream,
  7711. UINT InputStride,
  7712. UINT VectorCount,
  7713. CXMMATRIX M
  7714. )
  7715. {
  7716. #if defined(_XM_NO_INTRINSICS_)
  7717. XMVECTOR V;
  7718. XMVECTOR X;
  7719. XMVECTOR Y;
  7720. XMVECTOR Z;
  7721. XMVECTOR Result;
  7722. UINT i;
  7723. BYTE* pInputVector = (BYTE*)pInputStream;
  7724. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7725. XMASSERT(pOutputStream);
  7726. XMASSERT(pInputStream);
  7727. for (i = 0; i < VectorCount; i++)
  7728. {
  7729. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7730. Z = XMVectorSplatZ(V);
  7731. Y = XMVectorSplatY(V);
  7732. X = XMVectorSplatX(V);
  7733. // Z = XMVectorReplicate(((XMFLOAT3*)pInputVector)->z);
  7734. // Y = XMVectorReplicate(((XMFLOAT3*)pInputVector)->y);
  7735. // X = XMVectorReplicate(((XMFLOAT3*)pInputVector)->x);
  7736. Result = XMVectorMultiply(Z, M.r[2]);
  7737. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  7738. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  7739. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7740. pInputVector += InputStride;
  7741. pOutputVector += OutputStride;
  7742. }
  7743. return pOutputStream;
  7744. #elif defined(_XM_SSE_INTRINSICS_)
  7745. XMASSERT(pOutputStream);
  7746. XMASSERT(pInputStream);
  7747. UINT i;
  7748. const BYTE *pInputVector = (BYTE*)pInputStream;
  7749. BYTE *pOutputVector = (BYTE*)pOutputStream;
  7750. for (i = 0; i < VectorCount; i++)
  7751. {
  7752. XMVECTOR X = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->x);
  7753. XMVECTOR Y = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->y);
  7754. XMVECTOR vResult = _mm_load_ps1(&reinterpret_cast<const XMFLOAT3 *>(pInputVector)->z);
  7755. vResult = _mm_mul_ps(vResult,M.r[2]);
  7756. Y = _mm_mul_ps(Y,M.r[1]);
  7757. vResult = _mm_add_ps(vResult,Y);
  7758. X = _mm_mul_ps(X,M.r[0]);
  7759. vResult = _mm_add_ps(vResult,X);
  7760. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->x,vResult);
  7761. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7762. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->y,vResult);
  7763. vResult = _mm_shuffle_ps(vResult,vResult,_MM_SHUFFLE(0,3,2,1));
  7764. _mm_store_ss(&reinterpret_cast<XMFLOAT3 *>(pOutputVector)->z,vResult);
  7765. pInputVector += InputStride;
  7766. pOutputVector += OutputStride;
  7767. }
  7768. return pOutputStream;
  7769. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7770. #endif // _XM_VMX128_INTRINSICS_
  7771. }
  7772. //------------------------------------------------------------------------------
  7773. XMINLINE XMVECTOR XMVector3Project
  7774. (
  7775. FXMVECTOR V,
  7776. FLOAT ViewportX,
  7777. FLOAT ViewportY,
  7778. FLOAT ViewportWidth,
  7779. FLOAT ViewportHeight,
  7780. FLOAT ViewportMinZ,
  7781. FLOAT ViewportMaxZ,
  7782. CXMMATRIX Projection,
  7783. CXMMATRIX View,
  7784. CXMMATRIX World
  7785. )
  7786. {
  7787. #if defined(_XM_NO_INTRINSICS_)
  7788. XMMATRIX Transform;
  7789. XMVECTOR Scale;
  7790. XMVECTOR Offset;
  7791. XMVECTOR Result;
  7792. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7793. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7794. Scale = XMVectorSet(HalfViewportWidth,
  7795. -HalfViewportHeight,
  7796. ViewportMaxZ - ViewportMinZ,
  7797. 0.0f);
  7798. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7799. ViewportY + HalfViewportHeight,
  7800. ViewportMinZ,
  7801. 0.0f);
  7802. Transform = XMMatrixMultiply(World, View);
  7803. Transform = XMMatrixMultiply(Transform, Projection);
  7804. Result = XMVector3TransformCoord(V, Transform);
  7805. Result = XMVectorMultiplyAdd(Result, Scale, Offset);
  7806. return Result;
  7807. #elif defined(_XM_SSE_INTRINSICS_)
  7808. XMMATRIX Transform;
  7809. XMVECTOR Scale;
  7810. XMVECTOR Offset;
  7811. XMVECTOR Result;
  7812. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7813. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7814. Scale = XMVectorSet(HalfViewportWidth,
  7815. -HalfViewportHeight,
  7816. ViewportMaxZ - ViewportMinZ,
  7817. 0.0f);
  7818. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7819. ViewportY + HalfViewportHeight,
  7820. ViewportMinZ,
  7821. 0.0f);
  7822. Transform = XMMatrixMultiply(World, View);
  7823. Transform = XMMatrixMultiply(Transform, Projection);
  7824. Result = XMVector3TransformCoord(V, Transform);
  7825. Result = _mm_mul_ps(Result,Scale);
  7826. Result = _mm_add_ps(Result,Offset);
  7827. return Result;
  7828. #else // _XM_VMX128_INTRINSICS_
  7829. #endif // _XM_VMX128_INTRINSICS_
  7830. }
  7831. //------------------------------------------------------------------------------
  7832. XMINLINE XMFLOAT3* XMVector3ProjectStream
  7833. (
  7834. XMFLOAT3* pOutputStream,
  7835. UINT OutputStride,
  7836. CONST XMFLOAT3* pInputStream,
  7837. UINT InputStride,
  7838. UINT VectorCount,
  7839. FLOAT ViewportX,
  7840. FLOAT ViewportY,
  7841. FLOAT ViewportWidth,
  7842. FLOAT ViewportHeight,
  7843. FLOAT ViewportMinZ,
  7844. FLOAT ViewportMaxZ,
  7845. CXMMATRIX Projection,
  7846. CXMMATRIX View,
  7847. CXMMATRIX World
  7848. )
  7849. {
  7850. #if defined(_XM_NO_INTRINSICS_)
  7851. XMMATRIX Transform;
  7852. XMVECTOR V;
  7853. XMVECTOR Scale;
  7854. XMVECTOR Offset;
  7855. XMVECTOR Result;
  7856. UINT i;
  7857. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7858. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7859. BYTE* pInputVector = (BYTE*)pInputStream;
  7860. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7861. XMASSERT(pOutputStream);
  7862. XMASSERT(pInputStream);
  7863. Scale = XMVectorSet(HalfViewportWidth,
  7864. -HalfViewportHeight,
  7865. ViewportMaxZ - ViewportMinZ,
  7866. 1.0f);
  7867. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7868. ViewportY + HalfViewportHeight,
  7869. ViewportMinZ,
  7870. 0.0f);
  7871. Transform = XMMatrixMultiply(World, View);
  7872. Transform = XMMatrixMultiply(Transform, Projection);
  7873. for (i = 0; i < VectorCount; i++)
  7874. {
  7875. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7876. Result = XMVector3TransformCoord(V, Transform);
  7877. Result = XMVectorMultiplyAdd(Result, Scale, Offset);
  7878. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7879. pInputVector += InputStride;
  7880. pOutputVector += OutputStride;
  7881. }
  7882. return pOutputStream;
  7883. #elif defined(_XM_SSE_INTRINSICS_)
  7884. XMASSERT(pOutputStream);
  7885. XMASSERT(pInputStream);
  7886. XMMATRIX Transform;
  7887. XMVECTOR V;
  7888. XMVECTOR Scale;
  7889. XMVECTOR Offset;
  7890. XMVECTOR Result;
  7891. UINT i;
  7892. FLOAT HalfViewportWidth = ViewportWidth * 0.5f;
  7893. FLOAT HalfViewportHeight = ViewportHeight * 0.5f;
  7894. BYTE* pInputVector = (BYTE*)pInputStream;
  7895. BYTE* pOutputVector = (BYTE*)pOutputStream;
  7896. Scale = XMVectorSet(HalfViewportWidth,
  7897. -HalfViewportHeight,
  7898. ViewportMaxZ - ViewportMinZ,
  7899. 1.0f);
  7900. Offset = XMVectorSet(ViewportX + HalfViewportWidth,
  7901. ViewportY + HalfViewportHeight,
  7902. ViewportMinZ,
  7903. 0.0f);
  7904. Transform = XMMatrixMultiply(World, View);
  7905. Transform = XMMatrixMultiply(Transform, Projection);
  7906. for (i = 0; i < VectorCount; i++)
  7907. {
  7908. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  7909. Result = XMVector3TransformCoord(V, Transform);
  7910. Result = _mm_mul_ps(Result,Scale);
  7911. Result = _mm_add_ps(Result,Offset);
  7912. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  7913. pInputVector += InputStride;
  7914. pOutputVector += OutputStride;
  7915. }
  7916. return pOutputStream;
  7917. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  7918. #endif // _XM_VMX128_INTRINSICS_
  7919. }
  7920. //------------------------------------------------------------------------------
  7921. XMFINLINE XMVECTOR XMVector3Unproject
  7922. (
  7923. FXMVECTOR V,
  7924. FLOAT ViewportX,
  7925. FLOAT ViewportY,
  7926. FLOAT ViewportWidth,
  7927. FLOAT ViewportHeight,
  7928. FLOAT ViewportMinZ,
  7929. FLOAT ViewportMaxZ,
  7930. CXMMATRIX Projection,
  7931. CXMMATRIX View,
  7932. CXMMATRIX World
  7933. )
  7934. {
  7935. #if defined(_XM_NO_INTRINSICS_)
  7936. XMMATRIX Transform;
  7937. XMVECTOR Scale;
  7938. XMVECTOR Offset;
  7939. XMVECTOR Determinant;
  7940. XMVECTOR Result;
  7941. CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
  7942. Scale = XMVectorSet(ViewportWidth * 0.5f,
  7943. -ViewportHeight * 0.5f,
  7944. ViewportMaxZ - ViewportMinZ,
  7945. 1.0f);
  7946. Scale = XMVectorReciprocal(Scale);
  7947. Offset = XMVectorSet(-ViewportX,
  7948. -ViewportY,
  7949. -ViewportMinZ,
  7950. 0.0f);
  7951. Offset = XMVectorMultiplyAdd(Scale, Offset, D);
  7952. Transform = XMMatrixMultiply(World, View);
  7953. Transform = XMMatrixMultiply(Transform, Projection);
  7954. Transform = XMMatrixInverse(&Determinant, Transform);
  7955. Result = XMVectorMultiplyAdd(V, Scale, Offset);
  7956. Result = XMVector3TransformCoord(Result, Transform);
  7957. return Result;
  7958. #elif defined(_XM_SSE_INTRINSICS_)
  7959. XMMATRIX Transform;
  7960. XMVECTOR Scale;
  7961. XMVECTOR Offset;
  7962. XMVECTOR Determinant;
  7963. XMVECTOR Result;
  7964. CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
  7965. Scale = XMVectorSet(ViewportWidth * 0.5f,
  7966. -ViewportHeight * 0.5f,
  7967. ViewportMaxZ - ViewportMinZ,
  7968. 1.0f);
  7969. Scale = XMVectorReciprocal(Scale);
  7970. Offset = XMVectorSet(-ViewportX,
  7971. -ViewportY,
  7972. -ViewportMinZ,
  7973. 0.0f);
  7974. Offset = _mm_mul_ps(Offset,Scale);
  7975. Offset = _mm_add_ps(Offset,D);
  7976. Transform = XMMatrixMultiply(World, View);
  7977. Transform = XMMatrixMultiply(Transform, Projection);
  7978. Transform = XMMatrixInverse(&Determinant, Transform);
  7979. Result = _mm_mul_ps(V,Scale);
  7980. Result = _mm_add_ps(Result,Offset);
  7981. Result = XMVector3TransformCoord(Result, Transform);
  7982. return Result;
  7983. #else // _XM_VMX128_INTRINSICS_
  7984. #endif // _XM_VMX128_INTRINSICS_
  7985. }
  7986. //------------------------------------------------------------------------------
  7987. XMINLINE XMFLOAT3* XMVector3UnprojectStream
  7988. (
  7989. XMFLOAT3* pOutputStream,
  7990. UINT OutputStride,
  7991. CONST XMFLOAT3* pInputStream,
  7992. UINT InputStride,
  7993. UINT VectorCount,
  7994. FLOAT ViewportX,
  7995. FLOAT ViewportY,
  7996. FLOAT ViewportWidth,
  7997. FLOAT ViewportHeight,
  7998. FLOAT ViewportMinZ,
  7999. FLOAT ViewportMaxZ,
  8000. CXMMATRIX Projection,
  8001. CXMMATRIX View,
  8002. CXMMATRIX World)
  8003. {
  8004. #if defined(_XM_NO_INTRINSICS_)
  8005. XMMATRIX Transform;
  8006. XMVECTOR Scale;
  8007. XMVECTOR Offset;
  8008. XMVECTOR V;
  8009. XMVECTOR Determinant;
  8010. XMVECTOR Result;
  8011. UINT i;
  8012. BYTE* pInputVector = (BYTE*)pInputStream;
  8013. BYTE* pOutputVector = (BYTE*)pOutputStream;
  8014. CONST XMVECTOR D = XMVectorSet(-1.0f, 1.0f, 0.0f, 0.0f);
  8015. XMASSERT(pOutputStream);
  8016. XMASSERT(pInputStream);
  8017. Scale = XMVectorSet(ViewportWidth * 0.5f,
  8018. -ViewportHeight * 0.5f,
  8019. ViewportMaxZ - ViewportMinZ,
  8020. 1.0f);
  8021. Scale = XMVectorReciprocal(Scale);
  8022. Offset = XMVectorSet(-ViewportX,
  8023. -ViewportY,
  8024. -ViewportMinZ,
  8025. 0.0f);
  8026. Offset = XMVectorMultiplyAdd(Scale, Offset, D);
  8027. Transform = XMMatrixMultiply(World, View);
  8028. Transform = XMMatrixMultiply(Transform, Projection);
  8029. Transform = XMMatrixInverse(&Determinant, Transform);
  8030. for (i = 0; i < VectorCount; i++)
  8031. {
  8032. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  8033. Result = XMVectorMultiplyAdd(V, Scale, Offset);
  8034. Result = XMVector3TransformCoord(Result, Transform);
  8035. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  8036. pInputVector += InputStride;
  8037. pOutputVector += OutputStride;
  8038. }
  8039. return pOutputStream;
  8040. #elif defined(_XM_SSE_INTRINSICS_)
  8041. XMASSERT(pOutputStream);
  8042. XMASSERT(pInputStream);
  8043. XMMATRIX Transform;
  8044. XMVECTOR Scale;
  8045. XMVECTOR Offset;
  8046. XMVECTOR V;
  8047. XMVECTOR Determinant;
  8048. XMVECTOR Result;
  8049. UINT i;
  8050. BYTE* pInputVector = (BYTE*)pInputStream;
  8051. BYTE* pOutputVector = (BYTE*)pOutputStream;
  8052. CONST XMVECTORF32 D = {-1.0f, 1.0f, 0.0f, 0.0f};
  8053. Scale = XMVectorSet(ViewportWidth * 0.5f,
  8054. -ViewportHeight * 0.5f,
  8055. ViewportMaxZ - ViewportMinZ,
  8056. 1.0f);
  8057. Scale = XMVectorReciprocal(Scale);
  8058. Offset = XMVectorSet(-ViewportX,
  8059. -ViewportY,
  8060. -ViewportMinZ,
  8061. 0.0f);
  8062. Offset = _mm_mul_ps(Offset,Scale);
  8063. Offset = _mm_add_ps(Offset,D);
  8064. Transform = XMMatrixMultiply(World, View);
  8065. Transform = XMMatrixMultiply(Transform, Projection);
  8066. Transform = XMMatrixInverse(&Determinant, Transform);
  8067. for (i = 0; i < VectorCount; i++)
  8068. {
  8069. V = XMLoadFloat3((XMFLOAT3*)pInputVector);
  8070. Result = XMVectorMultiplyAdd(V, Scale, Offset);
  8071. Result = XMVector3TransformCoord(Result, Transform);
  8072. XMStoreFloat3((XMFLOAT3*)pOutputVector, Result);
  8073. pInputVector += InputStride;
  8074. pOutputVector += OutputStride;
  8075. }
  8076. return pOutputStream;
  8077. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  8078. #endif // _XM_VMX128_INTRINSICS_
  8079. }
  8080. /****************************************************************************
  8081. *
  8082. * 4D Vector
  8083. *
  8084. ****************************************************************************/
  8085. //------------------------------------------------------------------------------
  8086. // Comparison operations
  8087. //------------------------------------------------------------------------------
  8088. //------------------------------------------------------------------------------
  8089. XMFINLINE BOOL XMVector4Equal
  8090. (
  8091. FXMVECTOR V1,
  8092. FXMVECTOR V2
  8093. )
  8094. {
  8095. #if defined(_XM_NO_INTRINSICS_)
  8096. return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && (V1.vector4_f32[1] == V2.vector4_f32[1]) && (V1.vector4_f32[2] == V2.vector4_f32[2]) && (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0);
  8097. #elif defined(_XM_SSE_INTRINSICS_)
  8098. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  8099. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8100. #else
  8101. return XMComparisonAllTrue(XMVector4EqualR(V1, V2));
  8102. #endif
  8103. }
  8104. //------------------------------------------------------------------------------
  8105. XMFINLINE UINT XMVector4EqualR
  8106. (
  8107. FXMVECTOR V1,
  8108. FXMVECTOR V2
  8109. )
  8110. {
  8111. #if defined(_XM_NO_INTRINSICS_)
  8112. UINT CR = 0;
  8113. if ((V1.vector4_f32[0] == V2.vector4_f32[0]) &&
  8114. (V1.vector4_f32[1] == V2.vector4_f32[1]) &&
  8115. (V1.vector4_f32[2] == V2.vector4_f32[2]) &&
  8116. (V1.vector4_f32[3] == V2.vector4_f32[3]))
  8117. {
  8118. CR = XM_CRMASK_CR6TRUE;
  8119. }
  8120. else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) &&
  8121. (V1.vector4_f32[1] != V2.vector4_f32[1]) &&
  8122. (V1.vector4_f32[2] != V2.vector4_f32[2]) &&
  8123. (V1.vector4_f32[3] != V2.vector4_f32[3]))
  8124. {
  8125. CR = XM_CRMASK_CR6FALSE;
  8126. }
  8127. return CR;
  8128. #elif defined(_XM_SSE_INTRINSICS_)
  8129. XMVECTOR vTemp = _mm_cmpeq_ps(V1,V2);
  8130. int iTest = _mm_movemask_ps(vTemp);
  8131. UINT CR = 0;
  8132. if (iTest==0xf) // All equal?
  8133. {
  8134. CR = XM_CRMASK_CR6TRUE;
  8135. }
  8136. else if (iTest==0) // All not equal?
  8137. {
  8138. CR = XM_CRMASK_CR6FALSE;
  8139. }
  8140. return CR;
  8141. #else // _XM_VMX128_INTRINSICS_
  8142. #endif // _XM_VMX128_INTRINSICS_
  8143. }
  8144. //------------------------------------------------------------------------------
  8145. XMFINLINE BOOL XMVector4EqualInt
  8146. (
  8147. FXMVECTOR V1,
  8148. FXMVECTOR V2
  8149. )
  8150. {
  8151. #if defined(_XM_NO_INTRINSICS_)
  8152. return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && (V1.vector4_u32[1] == V2.vector4_u32[1]) && (V1.vector4_u32[2] == V2.vector4_u32[2]) && (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0);
  8153. #elif defined(_XM_SSE_INTRINSICS_)
  8154. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  8155. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])==0xf) != 0);
  8156. #else
  8157. return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2));
  8158. #endif
  8159. }
  8160. //------------------------------------------------------------------------------
  8161. XMFINLINE UINT XMVector4EqualIntR
  8162. (
  8163. FXMVECTOR V1,
  8164. FXMVECTOR V2
  8165. )
  8166. {
  8167. #if defined(_XM_NO_INTRINSICS_)
  8168. UINT CR = 0;
  8169. if (V1.vector4_u32[0] == V2.vector4_u32[0] &&
  8170. V1.vector4_u32[1] == V2.vector4_u32[1] &&
  8171. V1.vector4_u32[2] == V2.vector4_u32[2] &&
  8172. V1.vector4_u32[3] == V2.vector4_u32[3])
  8173. {
  8174. CR = XM_CRMASK_CR6TRUE;
  8175. }
  8176. else if (V1.vector4_u32[0] != V2.vector4_u32[0] &&
  8177. V1.vector4_u32[1] != V2.vector4_u32[1] &&
  8178. V1.vector4_u32[2] != V2.vector4_u32[2] &&
  8179. V1.vector4_u32[3] != V2.vector4_u32[3])
  8180. {
  8181. CR = XM_CRMASK_CR6FALSE;
  8182. }
  8183. return CR;
  8184. #elif defined(_XM_SSE_INTRINSICS_)
  8185. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  8186. int iTest = _mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0]);
  8187. UINT CR = 0;
  8188. if (iTest==0xf) // All equal?
  8189. {
  8190. CR = XM_CRMASK_CR6TRUE;
  8191. }
  8192. else if (iTest==0) // All not equal?
  8193. {
  8194. CR = XM_CRMASK_CR6FALSE;
  8195. }
  8196. return CR;
  8197. #else // _XM_VMX128_INTRINSICS_
  8198. #endif // _XM_VMX128_INTRINSICS_
  8199. }
  8200. XMFINLINE BOOL XMVector4NearEqual
  8201. (
  8202. FXMVECTOR V1,
  8203. FXMVECTOR V2,
  8204. FXMVECTOR Epsilon
  8205. )
  8206. {
  8207. #if defined(_XM_NO_INTRINSICS_)
  8208. FLOAT dx, dy, dz, dw;
  8209. dx = fabsf(V1.vector4_f32[0]-V2.vector4_f32[0]);
  8210. dy = fabsf(V1.vector4_f32[1]-V2.vector4_f32[1]);
  8211. dz = fabsf(V1.vector4_f32[2]-V2.vector4_f32[2]);
  8212. dw = fabsf(V1.vector4_f32[3]-V2.vector4_f32[3]);
  8213. return (((dx <= Epsilon.vector4_f32[0]) &&
  8214. (dy <= Epsilon.vector4_f32[1]) &&
  8215. (dz <= Epsilon.vector4_f32[2]) &&
  8216. (dw <= Epsilon.vector4_f32[3])) != 0);
  8217. #elif defined(_XM_SSE_INTRINSICS_)
  8218. // Get the difference
  8219. XMVECTOR vDelta = _mm_sub_ps(V1,V2);
  8220. // Get the absolute value of the difference
  8221. XMVECTOR vTemp = _mm_setzero_ps();
  8222. vTemp = _mm_sub_ps(vTemp,vDelta);
  8223. vTemp = _mm_max_ps(vTemp,vDelta);
  8224. vTemp = _mm_cmple_ps(vTemp,Epsilon);
  8225. return ((_mm_movemask_ps(vTemp)==0xf) != 0);
  8226. #else // _XM_VMX128_INTRINSICS_
  8227. #endif // _XM_VMX128_INTRINSICS_
  8228. }
  8229. //------------------------------------------------------------------------------
  8230. XMFINLINE BOOL XMVector4NotEqual
  8231. (
  8232. FXMVECTOR V1,
  8233. FXMVECTOR V2
  8234. )
  8235. {
  8236. #if defined(_XM_NO_INTRINSICS_)
  8237. return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || (V1.vector4_f32[1] != V2.vector4_f32[1]) || (V1.vector4_f32[2] != V2.vector4_f32[2]) || (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0);
  8238. #elif defined(_XM_SSE_INTRINSICS_)
  8239. XMVECTOR vTemp = _mm_cmpneq_ps(V1,V2);
  8240. return ((_mm_movemask_ps(vTemp)) != 0);
  8241. #else
  8242. return XMComparisonAnyFalse(XMVector4EqualR(V1, V2));
  8243. #endif
  8244. }
  8245. //------------------------------------------------------------------------------
  8246. XMFINLINE BOOL XMVector4NotEqualInt
  8247. (
  8248. FXMVECTOR V1,
  8249. FXMVECTOR V2
  8250. )
  8251. {
  8252. #if defined(_XM_NO_INTRINSICS_)
  8253. return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || (V1.vector4_u32[1] != V2.vector4_u32[1]) || (V1.vector4_u32[2] != V2.vector4_u32[2]) || (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0);
  8254. #elif defined(_XM_SSE_INTRINSICS_)
  8255. __m128i vTemp = _mm_cmpeq_epi32(reinterpret_cast<const __m128i *>(&V1)[0],reinterpret_cast<const __m128i *>(&V2)[0]);
  8256. return ((_mm_movemask_ps(reinterpret_cast<const __m128 *>(&vTemp)[0])!=0xF) != 0);
  8257. #else
  8258. return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2));
  8259. #endif
  8260. }
  8261. //------------------------------------------------------------------------------
  8262. XMFINLINE BOOL XMVector4Greater
  8263. (
  8264. FXMVECTOR V1,
  8265. FXMVECTOR V2
  8266. )
  8267. {
  8268. #if defined(_XM_NO_INTRINSICS_)
  8269. return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && (V1.vector4_f32[1] > V2.vector4_f32[1]) && (V1.vector4_f32[2] > V2.vector4_f32[2]) && (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0);
  8270. #elif defined(_XM_SSE_INTRINSICS_)
  8271. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  8272. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8273. #else
  8274. return XMComparisonAllTrue(XMVector4GreaterR(V1, V2));
  8275. #endif
  8276. }
  8277. //------------------------------------------------------------------------------
  8278. XMFINLINE UINT XMVector4GreaterR
  8279. (
  8280. FXMVECTOR V1,
  8281. FXMVECTOR V2
  8282. )
  8283. {
  8284. #if defined(_XM_NO_INTRINSICS_)
  8285. UINT CR = 0;
  8286. if (V1.vector4_f32[0] > V2.vector4_f32[0] &&
  8287. V1.vector4_f32[1] > V2.vector4_f32[1] &&
  8288. V1.vector4_f32[2] > V2.vector4_f32[2] &&
  8289. V1.vector4_f32[3] > V2.vector4_f32[3])
  8290. {
  8291. CR = XM_CRMASK_CR6TRUE;
  8292. }
  8293. else if (V1.vector4_f32[0] <= V2.vector4_f32[0] &&
  8294. V1.vector4_f32[1] <= V2.vector4_f32[1] &&
  8295. V1.vector4_f32[2] <= V2.vector4_f32[2] &&
  8296. V1.vector4_f32[3] <= V2.vector4_f32[3])
  8297. {
  8298. CR = XM_CRMASK_CR6FALSE;
  8299. }
  8300. return CR;
  8301. #elif defined(_XM_SSE_INTRINSICS_)
  8302. UINT CR = 0;
  8303. XMVECTOR vTemp = _mm_cmpgt_ps(V1,V2);
  8304. int iTest = _mm_movemask_ps(vTemp);
  8305. if (iTest==0xf) {
  8306. CR = XM_CRMASK_CR6TRUE;
  8307. }
  8308. else if (!iTest)
  8309. {
  8310. CR = XM_CRMASK_CR6FALSE;
  8311. }
  8312. return CR;
  8313. #else // _XM_VMX128_INTRINSICS_
  8314. #endif // _XM_VMX128_INTRINSICS_
  8315. }
  8316. //------------------------------------------------------------------------------
  8317. XMFINLINE BOOL XMVector4GreaterOrEqual
  8318. (
  8319. FXMVECTOR V1,
  8320. FXMVECTOR V2
  8321. )
  8322. {
  8323. #if defined(_XM_NO_INTRINSICS_)
  8324. return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && (V1.vector4_f32[1] >= V2.vector4_f32[1]) && (V1.vector4_f32[2] >= V2.vector4_f32[2]) && (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0);
  8325. #elif defined(_XM_SSE_INTRINSICS_)
  8326. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  8327. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8328. #else
  8329. return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2));
  8330. #endif
  8331. }
  8332. //------------------------------------------------------------------------------
  8333. XMFINLINE UINT XMVector4GreaterOrEqualR
  8334. (
  8335. FXMVECTOR V1,
  8336. FXMVECTOR V2
  8337. )
  8338. {
  8339. #if defined(_XM_NO_INTRINSICS_)
  8340. UINT CR = 0;
  8341. if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) &&
  8342. (V1.vector4_f32[1] >= V2.vector4_f32[1]) &&
  8343. (V1.vector4_f32[2] >= V2.vector4_f32[2]) &&
  8344. (V1.vector4_f32[3] >= V2.vector4_f32[3]))
  8345. {
  8346. CR = XM_CRMASK_CR6TRUE;
  8347. }
  8348. else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) &&
  8349. (V1.vector4_f32[1] < V2.vector4_f32[1]) &&
  8350. (V1.vector4_f32[2] < V2.vector4_f32[2]) &&
  8351. (V1.vector4_f32[3] < V2.vector4_f32[3]))
  8352. {
  8353. CR = XM_CRMASK_CR6FALSE;
  8354. }
  8355. return CR;
  8356. #elif defined(_XM_SSE_INTRINSICS_)
  8357. UINT CR = 0;
  8358. XMVECTOR vTemp = _mm_cmpge_ps(V1,V2);
  8359. int iTest = _mm_movemask_ps(vTemp);
  8360. if (iTest==0x0f)
  8361. {
  8362. CR = XM_CRMASK_CR6TRUE;
  8363. }
  8364. else if (!iTest)
  8365. {
  8366. CR = XM_CRMASK_CR6FALSE;
  8367. }
  8368. return CR;
  8369. #else // _XM_VMX128_INTRINSICS_
  8370. #endif // _XM_VMX128_INTRINSICS_
  8371. }
  8372. //------------------------------------------------------------------------------
  8373. XMFINLINE BOOL XMVector4Less
  8374. (
  8375. FXMVECTOR V1,
  8376. FXMVECTOR V2
  8377. )
  8378. {
  8379. #if defined(_XM_NO_INTRINSICS_)
  8380. return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && (V1.vector4_f32[1] < V2.vector4_f32[1]) && (V1.vector4_f32[2] < V2.vector4_f32[2]) && (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0);
  8381. #elif defined(_XM_SSE_INTRINSICS_)
  8382. XMVECTOR vTemp = _mm_cmplt_ps(V1,V2);
  8383. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8384. #else
  8385. return XMComparisonAllTrue(XMVector4GreaterR(V2, V1));
  8386. #endif
  8387. }
  8388. //------------------------------------------------------------------------------
  8389. XMFINLINE BOOL XMVector4LessOrEqual
  8390. (
  8391. FXMVECTOR V1,
  8392. FXMVECTOR V2
  8393. )
  8394. {
  8395. #if defined(_XM_NO_INTRINSICS_)
  8396. return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && (V1.vector4_f32[1] <= V2.vector4_f32[1]) && (V1.vector4_f32[2] <= V2.vector4_f32[2]) && (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0);
  8397. #elif defined(_XM_SSE_INTRINSICS_)
  8398. XMVECTOR vTemp = _mm_cmple_ps(V1,V2);
  8399. return ((_mm_movemask_ps(vTemp)==0x0f) != 0);
  8400. #else
  8401. return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1));
  8402. #endif
  8403. }
  8404. //------------------------------------------------------------------------------
  8405. XMFINLINE BOOL XMVector4InBounds
  8406. (
  8407. FXMVECTOR V,
  8408. FXMVECTOR Bounds
  8409. )
  8410. {
  8411. #if defined(_XM_NO_INTRINSICS_)
  8412. return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
  8413. (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
  8414. (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
  8415. (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0);
  8416. #elif defined(_XM_SSE_INTRINSICS_)
  8417. // Test if less than or equal
  8418. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  8419. // Negate the bounds
  8420. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  8421. // Test if greater or equal (Reversed)
  8422. vTemp2 = _mm_cmple_ps(vTemp2,V);
  8423. // Blend answers
  8424. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  8425. // All in bounds?
  8426. return ((_mm_movemask_ps(vTemp1)==0x0f) != 0);
  8427. #else
  8428. return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds));
  8429. #endif
  8430. }
  8431. //------------------------------------------------------------------------------
  8432. XMFINLINE UINT XMVector4InBoundsR
  8433. (
  8434. FXMVECTOR V,
  8435. FXMVECTOR Bounds
  8436. )
  8437. {
  8438. #if defined(_XM_NO_INTRINSICS_)
  8439. UINT CR = 0;
  8440. if ((V.vector4_f32[0] <= Bounds.vector4_f32[0] && V.vector4_f32[0] >= -Bounds.vector4_f32[0]) &&
  8441. (V.vector4_f32[1] <= Bounds.vector4_f32[1] && V.vector4_f32[1] >= -Bounds.vector4_f32[1]) &&
  8442. (V.vector4_f32[2] <= Bounds.vector4_f32[2] && V.vector4_f32[2] >= -Bounds.vector4_f32[2]) &&
  8443. (V.vector4_f32[3] <= Bounds.vector4_f32[3] && V.vector4_f32[3] >= -Bounds.vector4_f32[3]))
  8444. {
  8445. CR = XM_CRMASK_CR6BOUNDS;
  8446. }
  8447. return CR;
  8448. #elif defined(_XM_SSE_INTRINSICS_)
  8449. // Test if less than or equal
  8450. XMVECTOR vTemp1 = _mm_cmple_ps(V,Bounds);
  8451. // Negate the bounds
  8452. XMVECTOR vTemp2 = _mm_mul_ps(Bounds,g_XMNegativeOne);
  8453. // Test if greater or equal (Reversed)
  8454. vTemp2 = _mm_cmple_ps(vTemp2,V);
  8455. // Blend answers
  8456. vTemp1 = _mm_and_ps(vTemp1,vTemp2);
  8457. // All in bounds?
  8458. return (_mm_movemask_ps(vTemp1)==0x0f) ? XM_CRMASK_CR6BOUNDS : 0;
  8459. #else // _XM_VMX128_INTRINSICS_
  8460. #endif // _XM_VMX128_INTRINSICS_
  8461. }
  8462. //------------------------------------------------------------------------------
  8463. XMFINLINE BOOL XMVector4IsNaN
  8464. (
  8465. FXMVECTOR V
  8466. )
  8467. {
  8468. #if defined(_XM_NO_INTRINSICS_)
  8469. return (XMISNAN(V.vector4_f32[0]) ||
  8470. XMISNAN(V.vector4_f32[1]) ||
  8471. XMISNAN(V.vector4_f32[2]) ||
  8472. XMISNAN(V.vector4_f32[3]));
  8473. #elif defined(_XM_SSE_INTRINSICS_)
  8474. // Test against itself. NaN is always not equal
  8475. XMVECTOR vTempNan = _mm_cmpneq_ps(V,V);
  8476. // If any are NaN, the mask is non-zero
  8477. return (_mm_movemask_ps(vTempNan)!=0);
  8478. #else // _XM_VMX128_INTRINSICS_
  8479. #endif // _XM_VMX128_INTRINSICS_
  8480. }
  8481. //------------------------------------------------------------------------------
  8482. XMFINLINE BOOL XMVector4IsInfinite
  8483. (
  8484. FXMVECTOR V
  8485. )
  8486. {
  8487. #if defined(_XM_NO_INTRINSICS_)
  8488. return (XMISINF(V.vector4_f32[0]) ||
  8489. XMISINF(V.vector4_f32[1]) ||
  8490. XMISINF(V.vector4_f32[2]) ||
  8491. XMISINF(V.vector4_f32[3]));
  8492. #elif defined(_XM_SSE_INTRINSICS_)
  8493. // Mask off the sign bit
  8494. XMVECTOR vTemp = _mm_and_ps(V,g_XMAbsMask);
  8495. // Compare to infinity
  8496. vTemp = _mm_cmpeq_ps(vTemp,g_XMInfinity);
  8497. // If any are infinity, the signs are true.
  8498. return (_mm_movemask_ps(vTemp) != 0);
  8499. #else // _XM_VMX128_INTRINSICS_
  8500. #endif // _XM_VMX128_INTRINSICS_
  8501. }
  8502. //------------------------------------------------------------------------------
  8503. // Computation operations
  8504. //------------------------------------------------------------------------------
  8505. //------------------------------------------------------------------------------
  8506. XMFINLINE XMVECTOR XMVector4Dot
  8507. (
  8508. FXMVECTOR V1,
  8509. FXMVECTOR V2
  8510. )
  8511. {
  8512. #if defined(_XM_NO_INTRINSICS_)
  8513. XMVECTOR Result;
  8514. Result.vector4_f32[0] =
  8515. Result.vector4_f32[1] =
  8516. Result.vector4_f32[2] =
  8517. Result.vector4_f32[3] = V1.vector4_f32[0] * V2.vector4_f32[0] + V1.vector4_f32[1] * V2.vector4_f32[1] + V1.vector4_f32[2] * V2.vector4_f32[2] + V1.vector4_f32[3] * V2.vector4_f32[3];
  8518. return Result;
  8519. #elif defined(_XM_SSE_INTRINSICS_)
  8520. XMVECTOR vTemp2 = V2;
  8521. XMVECTOR vTemp = _mm_mul_ps(V1,vTemp2);
  8522. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp,_MM_SHUFFLE(1,0,0,0)); // Copy X to the Z position and Y to the W position
  8523. vTemp2 = _mm_add_ps(vTemp2,vTemp); // Add Z = X+Z; W = Y+W;
  8524. vTemp = _mm_shuffle_ps(vTemp,vTemp2,_MM_SHUFFLE(0,3,0,0)); // Copy W to the Z position
  8525. vTemp = _mm_add_ps(vTemp,vTemp2); // Add Z and W together
  8526. return _mm_shuffle_ps(vTemp,vTemp,_MM_SHUFFLE(2,2,2,2)); // Splat Z and return
  8527. #else // _XM_VMX128_INTRINSICS_
  8528. #endif // _XM_VMX128_INTRINSICS_
  8529. }
  8530. //------------------------------------------------------------------------------
  8531. XMFINLINE XMVECTOR XMVector4Cross
  8532. (
  8533. FXMVECTOR V1,
  8534. FXMVECTOR V2,
  8535. FXMVECTOR V3
  8536. )
  8537. {
  8538. #if defined(_XM_NO_INTRINSICS_)
  8539. XMVECTOR Result;
  8540. Result.vector4_f32[0] = (((V2.vector4_f32[2]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[2]))*V1.vector4_f32[1])-(((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[2])+(((V2.vector4_f32[1]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[1]))*V1.vector4_f32[3]);
  8541. Result.vector4_f32[1] = (((V2.vector4_f32[3]*V3.vector4_f32[2])-(V2.vector4_f32[2]*V3.vector4_f32[3]))*V1.vector4_f32[0])-(((V2.vector4_f32[3]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[3]))*V1.vector4_f32[2])+(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[3]);
  8542. Result.vector4_f32[2] = (((V2.vector4_f32[1]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[1]))*V1.vector4_f32[0])-(((V2.vector4_f32[0]*V3.vector4_f32[3])-(V2.vector4_f32[3]*V3.vector4_f32[0]))*V1.vector4_f32[1])+(((V2.vector4_f32[0]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[0]))*V1.vector4_f32[3]);
  8543. Result.vector4_f32[3] = (((V2.vector4_f32[2]*V3.vector4_f32[1])-(V2.vector4_f32[1]*V3.vector4_f32[2]))*V1.vector4_f32[0])-(((V2.vector4_f32[2]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[2]))*V1.vector4_f32[1])+(((V2.vector4_f32[1]*V3.vector4_f32[0])-(V2.vector4_f32[0]*V3.vector4_f32[1]))*V1.vector4_f32[2]);
  8544. return Result;
  8545. #elif defined(_XM_SSE_INTRINSICS_)
  8546. // V2zwyz * V3wzwy
  8547. XMVECTOR vResult = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,1,3,2));
  8548. XMVECTOR vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,3,2,3));
  8549. vResult = _mm_mul_ps(vResult,vTemp3);
  8550. // - V2wzwy * V3zwyz
  8551. XMVECTOR vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,3,2,3));
  8552. vTemp3 = _mm_shuffle_ps(vTemp3,vTemp3,_MM_SHUFFLE(1,3,0,1));
  8553. vTemp2 = _mm_mul_ps(vTemp2,vTemp3);
  8554. vResult = _mm_sub_ps(vResult,vTemp2);
  8555. // term1 * V1yxxx
  8556. XMVECTOR vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(0,0,0,1));
  8557. vResult = _mm_mul_ps(vResult,vTemp1);
  8558. // V2ywxz * V3wxwx
  8559. vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(2,0,3,1));
  8560. vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,3,0,3));
  8561. vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
  8562. // - V2wxwx * V3ywxz
  8563. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,1,2,1));
  8564. vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(2,0,3,1));
  8565. vTemp2 = _mm_mul_ps(vTemp2,vTemp1);
  8566. vTemp3 = _mm_sub_ps(vTemp3,vTemp2);
  8567. // vResult - temp * V1zzyy
  8568. vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(1,1,2,2));
  8569. vTemp1 = _mm_mul_ps(vTemp1,vTemp3);
  8570. vResult = _mm_sub_ps(vResult,vTemp1);
  8571. // V2yzxy * V3zxyx
  8572. vTemp2 = _mm_shuffle_ps(V2,V2,_MM_SHUFFLE(1,0,2,1));
  8573. vTemp3 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(0,1,0,2));
  8574. vTemp3 = _mm_mul_ps(vTemp3,vTemp2);
  8575. // - V2zxyx * V3yzxy
  8576. vTemp2 = _mm_shuffle_ps(vTemp2,vTemp2,_MM_SHUFFLE(2,0,2,1));
  8577. vTemp1 = _mm_shuffle_ps(V3,V3,_MM_SHUFFLE(1,0,2,1));
  8578. vTemp1 = _mm_mul_ps(vTemp1,vTemp2);
  8579. vTemp3 = _mm_sub_ps(vTemp3,vTemp1);
  8580. // vResult + term * V1wwwz
  8581. vTemp1 = _mm_shuffle_ps(V1,V1,_MM_SHUFFLE(2,3,3,3));
  8582. vTemp3 = _mm_mul_ps(vTemp3,vTemp1);
  8583. vResult = _mm_add_ps(vResult,vTemp3);
  8584. return vResult;
  8585. #else // _XM_VMX128_INTRINSICS_
  8586. #endif // _XM_VMX128_INTRINSICS_
  8587. }
  8588. //------------------------------------------------------------------------------
  8589. XMFINLINE XMVECTOR XMVector4LengthSq
  8590. (
  8591. FXMVECTOR V
  8592. )
  8593. {
  8594. return XMVector4Dot(V, V);
  8595. }
  8596. //------------------------------------------------------------------------------
  8597. XMFINLINE XMVECTOR XMVector4ReciprocalLengthEst
  8598. (
  8599. FXMVECTOR V
  8600. )
  8601. {
  8602. #if defined(_XM_NO_INTRINSICS_)
  8603. XMVECTOR Result;
  8604. Result = XMVector4LengthSq(V);
  8605. Result = XMVectorReciprocalSqrtEst(Result);
  8606. return Result;
  8607. #elif defined(_XM_SSE_INTRINSICS_)
  8608. // Perform the dot product on x,y,z and w
  8609. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8610. // vTemp has z and w
  8611. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8612. // x+z, y+w
  8613. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8614. // x+z,x+z,x+z,y+w
  8615. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8616. // ??,??,y+w,y+w
  8617. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8618. // ??,??,x+z+y+w,??
  8619. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8620. // Splat the length
  8621. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8622. // Get the reciprocal
  8623. vLengthSq = _mm_rsqrt_ps(vLengthSq);
  8624. return vLengthSq;
  8625. #else // _XM_VMX128_INTRINSICS_
  8626. #endif // _XM_VMX128_INTRINSICS_
  8627. }
  8628. //------------------------------------------------------------------------------
  8629. XMFINLINE XMVECTOR XMVector4ReciprocalLength
  8630. (
  8631. FXMVECTOR V
  8632. )
  8633. {
  8634. #if defined(_XM_NO_INTRINSICS_)
  8635. XMVECTOR Result;
  8636. Result = XMVector4LengthSq(V);
  8637. Result = XMVectorReciprocalSqrt(Result);
  8638. return Result;
  8639. #elif defined(_XM_SSE_INTRINSICS_)
  8640. // Perform the dot product on x,y,z and w
  8641. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8642. // vTemp has z and w
  8643. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8644. // x+z, y+w
  8645. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8646. // x+z,x+z,x+z,y+w
  8647. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8648. // ??,??,y+w,y+w
  8649. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8650. // ??,??,x+z+y+w,??
  8651. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8652. // Splat the length
  8653. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8654. // Get the reciprocal
  8655. vLengthSq = _mm_sqrt_ps(vLengthSq);
  8656. // Accurate!
  8657. vLengthSq = _mm_div_ps(g_XMOne,vLengthSq);
  8658. return vLengthSq;
  8659. #else // _XM_VMX128_INTRINSICS_
  8660. #endif // _XM_VMX128_INTRINSICS_
  8661. }
  8662. //------------------------------------------------------------------------------
  8663. XMFINLINE XMVECTOR XMVector4LengthEst
  8664. (
  8665. FXMVECTOR V
  8666. )
  8667. {
  8668. #if defined(_XM_NO_INTRINSICS_)
  8669. XMVECTOR Result;
  8670. Result = XMVector4LengthSq(V);
  8671. Result = XMVectorSqrtEst(Result);
  8672. return Result;
  8673. #elif defined(_XM_SSE_INTRINSICS_)
  8674. // Perform the dot product on x,y,z and w
  8675. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8676. // vTemp has z and w
  8677. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8678. // x+z, y+w
  8679. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8680. // x+z,x+z,x+z,y+w
  8681. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8682. // ??,??,y+w,y+w
  8683. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8684. // ??,??,x+z+y+w,??
  8685. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8686. // Splat the length
  8687. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8688. // Prepare for the division
  8689. vLengthSq = _mm_sqrt_ps(vLengthSq);
  8690. return vLengthSq;
  8691. #else // _XM_VMX128_INTRINSICS_
  8692. #endif // _XM_VMX128_INTRINSICS_
  8693. }
  8694. //------------------------------------------------------------------------------
  8695. XMFINLINE XMVECTOR XMVector4Length
  8696. (
  8697. FXMVECTOR V
  8698. )
  8699. {
  8700. #if defined(_XM_NO_INTRINSICS_)
  8701. XMVECTOR Result;
  8702. Result = XMVector4LengthSq(V);
  8703. Result = XMVectorSqrt(Result);
  8704. return Result;
  8705. #elif defined(_XM_SSE_INTRINSICS_)
  8706. // Perform the dot product on x,y,z and w
  8707. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8708. // vTemp has z and w
  8709. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8710. // x+z, y+w
  8711. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8712. // x+z,x+z,x+z,y+w
  8713. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8714. // ??,??,y+w,y+w
  8715. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8716. // ??,??,x+z+y+w,??
  8717. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8718. // Splat the length
  8719. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8720. // Prepare for the division
  8721. vLengthSq = _mm_sqrt_ps(vLengthSq);
  8722. return vLengthSq;
  8723. #else // _XM_VMX128_INTRINSICS_
  8724. #endif // _XM_VMX128_INTRINSICS_
  8725. }
  8726. //------------------------------------------------------------------------------
  8727. // XMVector4NormalizeEst uses a reciprocal estimate and
  8728. // returns QNaN on zero and infinite vectors.
  8729. XMFINLINE XMVECTOR XMVector4NormalizeEst
  8730. (
  8731. FXMVECTOR V
  8732. )
  8733. {
  8734. #if defined(_XM_NO_INTRINSICS_)
  8735. XMVECTOR Result;
  8736. Result = XMVector4ReciprocalLength(V);
  8737. Result = XMVectorMultiply(V, Result);
  8738. return Result;
  8739. #elif defined(_XM_SSE_INTRINSICS_)
  8740. // Perform the dot product on x,y,z and w
  8741. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8742. // vTemp has z and w
  8743. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8744. // x+z, y+w
  8745. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8746. // x+z,x+z,x+z,y+w
  8747. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8748. // ??,??,y+w,y+w
  8749. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8750. // ??,??,x+z+y+w,??
  8751. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8752. // Splat the length
  8753. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8754. // Get the reciprocal
  8755. XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq);
  8756. // Reciprocal mul to perform the normalization
  8757. vResult = _mm_mul_ps(vResult,V);
  8758. return vResult;
  8759. #else // _XM_VMX128_INTRINSICS_
  8760. #endif // _XM_VMX128_INTRINSICS_
  8761. }
  8762. //------------------------------------------------------------------------------
  8763. XMFINLINE XMVECTOR XMVector4Normalize
  8764. (
  8765. FXMVECTOR V
  8766. )
  8767. {
  8768. #if defined(_XM_NO_INTRINSICS_)
  8769. FLOAT fLength;
  8770. XMVECTOR vResult;
  8771. vResult = XMVector4Length( V );
  8772. fLength = vResult.vector4_f32[0];
  8773. // Prevent divide by zero
  8774. if (fLength > 0) {
  8775. fLength = 1.0f/fLength;
  8776. }
  8777. vResult.vector4_f32[0] = V.vector4_f32[0]*fLength;
  8778. vResult.vector4_f32[1] = V.vector4_f32[1]*fLength;
  8779. vResult.vector4_f32[2] = V.vector4_f32[2]*fLength;
  8780. vResult.vector4_f32[3] = V.vector4_f32[3]*fLength;
  8781. return vResult;
  8782. #elif defined(_XM_SSE_INTRINSICS_)
  8783. // Perform the dot product on x,y,z and w
  8784. XMVECTOR vLengthSq = _mm_mul_ps(V,V);
  8785. // vTemp has z and w
  8786. XMVECTOR vTemp = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(3,2,3,2));
  8787. // x+z, y+w
  8788. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8789. // x+z,x+z,x+z,y+w
  8790. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(1,0,0,0));
  8791. // ??,??,y+w,y+w
  8792. vTemp = _mm_shuffle_ps(vTemp,vLengthSq,_MM_SHUFFLE(3,3,0,0));
  8793. // ??,??,x+z+y+w,??
  8794. vLengthSq = _mm_add_ps(vLengthSq,vTemp);
  8795. // Splat the length
  8796. vLengthSq = _mm_shuffle_ps(vLengthSq,vLengthSq,_MM_SHUFFLE(2,2,2,2));
  8797. // Prepare for the division
  8798. XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
  8799. // Create zero with a single instruction
  8800. XMVECTOR vZeroMask = _mm_setzero_ps();
  8801. // Test for a divide by zero (Must be FP to detect -0.0)
  8802. vZeroMask = _mm_cmpneq_ps(vZeroMask,vResult);
  8803. // Failsafe on zero (Or epsilon) length planes
  8804. // If the length is infinity, set the elements to zero
  8805. vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
  8806. // Divide to perform the normalization
  8807. vResult = _mm_div_ps(V,vResult);
  8808. // Any that are infinity, set to zero
  8809. vResult = _mm_and_ps(vResult,vZeroMask);
  8810. // Select qnan or result based on infinite length
  8811. XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq,g_XMQNaN);
  8812. XMVECTOR vTemp2 = _mm_and_ps(vResult,vLengthSq);
  8813. vResult = _mm_or_ps(vTemp1,vTemp2);
  8814. return vResult;
  8815. #else // _XM_VMX128_INTRINSICS_
  8816. #endif // _XM_VMX128_INTRINSICS_
  8817. }
  8818. //------------------------------------------------------------------------------
  8819. XMFINLINE XMVECTOR XMVector4ClampLength
  8820. (
  8821. FXMVECTOR V,
  8822. FLOAT LengthMin,
  8823. FLOAT LengthMax
  8824. )
  8825. {
  8826. #if defined(_XM_NO_INTRINSICS_)
  8827. XMVECTOR ClampMax;
  8828. XMVECTOR ClampMin;
  8829. ClampMax = XMVectorReplicate(LengthMax);
  8830. ClampMin = XMVectorReplicate(LengthMin);
  8831. return XMVector4ClampLengthV(V, ClampMin, ClampMax);
  8832. #elif defined(_XM_SSE_INTRINSICS_)
  8833. XMVECTOR ClampMax = _mm_set_ps1(LengthMax);
  8834. XMVECTOR ClampMin = _mm_set_ps1(LengthMin);
  8835. return XMVector4ClampLengthV(V, ClampMin, ClampMax);
  8836. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  8837. #endif // _XM_VMX128_INTRINSICS_
  8838. }
  8839. //------------------------------------------------------------------------------
  8840. XMFINLINE XMVECTOR XMVector4ClampLengthV
  8841. (
  8842. FXMVECTOR V,
  8843. FXMVECTOR LengthMin,
  8844. FXMVECTOR LengthMax
  8845. )
  8846. {
  8847. #if defined(_XM_NO_INTRINSICS_)
  8848. XMVECTOR ClampLength;
  8849. XMVECTOR LengthSq;
  8850. XMVECTOR RcpLength;
  8851. XMVECTOR Length;
  8852. XMVECTOR Normal;
  8853. XMVECTOR Zero;
  8854. XMVECTOR InfiniteLength;
  8855. XMVECTOR ZeroLength;
  8856. XMVECTOR Select;
  8857. XMVECTOR ControlMax;
  8858. XMVECTOR ControlMin;
  8859. XMVECTOR Control;
  8860. XMVECTOR Result;
  8861. XMASSERT((LengthMin.vector4_f32[1] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[2] == LengthMin.vector4_f32[0]) && (LengthMin.vector4_f32[3] == LengthMin.vector4_f32[0]));
  8862. XMASSERT((LengthMax.vector4_f32[1] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[2] == LengthMax.vector4_f32[0]) && (LengthMax.vector4_f32[3] == LengthMax.vector4_f32[0]));
  8863. XMASSERT(XMVector4GreaterOrEqual(LengthMin, XMVectorZero()));
  8864. XMASSERT(XMVector4GreaterOrEqual(LengthMax, XMVectorZero()));
  8865. XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
  8866. LengthSq = XMVector4LengthSq(V);
  8867. Zero = XMVectorZero();
  8868. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  8869. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v);
  8870. ZeroLength = XMVectorEqual(LengthSq, Zero);
  8871. Normal = XMVectorMultiply(V, RcpLength);
  8872. Length = XMVectorMultiply(LengthSq, RcpLength);
  8873. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  8874. Length = XMVectorSelect(LengthSq, Length, Select);
  8875. Normal = XMVectorSelect(LengthSq, Normal, Select);
  8876. ControlMax = XMVectorGreater(Length, LengthMax);
  8877. ControlMin = XMVectorLess(Length, LengthMin);
  8878. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  8879. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  8880. Result = XMVectorMultiply(Normal, ClampLength);
  8881. // Preserve the original vector (with no precision loss) if the length falls within the given range
  8882. Control = XMVectorEqualInt(ControlMax, ControlMin);
  8883. Result = XMVectorSelect(Result, V, Control);
  8884. return Result;
  8885. #elif defined(_XM_SSE_INTRINSICS_)
  8886. XMVECTOR ClampLength;
  8887. XMVECTOR LengthSq;
  8888. XMVECTOR RcpLength;
  8889. XMVECTOR Length;
  8890. XMVECTOR Normal;
  8891. XMVECTOR Zero;
  8892. XMVECTOR InfiniteLength;
  8893. XMVECTOR ZeroLength;
  8894. XMVECTOR Select;
  8895. XMVECTOR ControlMax;
  8896. XMVECTOR ControlMin;
  8897. XMVECTOR Control;
  8898. XMVECTOR Result;
  8899. XMASSERT((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin)));
  8900. XMASSERT((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax)));
  8901. XMASSERT(XMVector4GreaterOrEqual(LengthMin, g_XMZero));
  8902. XMASSERT(XMVector4GreaterOrEqual(LengthMax, g_XMZero));
  8903. XMASSERT(XMVector4GreaterOrEqual(LengthMax, LengthMin));
  8904. LengthSq = XMVector4LengthSq(V);
  8905. Zero = XMVectorZero();
  8906. RcpLength = XMVectorReciprocalSqrt(LengthSq);
  8907. InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity);
  8908. ZeroLength = XMVectorEqual(LengthSq, Zero);
  8909. Normal = _mm_mul_ps(V, RcpLength);
  8910. Length = _mm_mul_ps(LengthSq, RcpLength);
  8911. Select = XMVectorEqualInt(InfiniteLength, ZeroLength);
  8912. Length = XMVectorSelect(LengthSq, Length, Select);
  8913. Normal = XMVectorSelect(LengthSq, Normal, Select);
  8914. ControlMax = XMVectorGreater(Length, LengthMax);
  8915. ControlMin = XMVectorLess(Length, LengthMin);
  8916. ClampLength = XMVectorSelect(Length, LengthMax, ControlMax);
  8917. ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin);
  8918. Result = _mm_mul_ps(Normal, ClampLength);
  8919. // Preserve the original vector (with no precision loss) if the length falls within the given range
  8920. Control = XMVectorEqualInt(ControlMax,ControlMin);
  8921. Result = XMVectorSelect(Result,V,Control);
  8922. return Result;
  8923. #else // _XM_VMX128_INTRINSICS_
  8924. #endif // _XM_VMX128_INTRINSICS_
  8925. }
  8926. //------------------------------------------------------------------------------
  8927. XMFINLINE XMVECTOR XMVector4Reflect
  8928. (
  8929. FXMVECTOR Incident,
  8930. FXMVECTOR Normal
  8931. )
  8932. {
  8933. #if defined(_XM_NO_INTRINSICS_)
  8934. XMVECTOR Result;
  8935. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  8936. Result = XMVector4Dot(Incident, Normal);
  8937. Result = XMVectorAdd(Result, Result);
  8938. Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident);
  8939. return Result;
  8940. #elif defined(_XM_SSE_INTRINSICS_)
  8941. // Result = Incident - (2 * dot(Incident, Normal)) * Normal
  8942. XMVECTOR Result = XMVector4Dot(Incident,Normal);
  8943. Result = _mm_add_ps(Result,Result);
  8944. Result = _mm_mul_ps(Result,Normal);
  8945. Result = _mm_sub_ps(Incident,Result);
  8946. return Result;
  8947. #else // _XM_VMX128_INTRINSICS_
  8948. #endif // _XM_VMX128_INTRINSICS_
  8949. }
  8950. //------------------------------------------------------------------------------
  8951. XMFINLINE XMVECTOR XMVector4Refract
  8952. (
  8953. FXMVECTOR Incident,
  8954. FXMVECTOR Normal,
  8955. FLOAT RefractionIndex
  8956. )
  8957. {
  8958. #if defined(_XM_NO_INTRINSICS_)
  8959. XMVECTOR Index;
  8960. Index = XMVectorReplicate(RefractionIndex);
  8961. return XMVector4RefractV(Incident, Normal, Index);
  8962. #elif defined(_XM_SSE_INTRINSICS_)
  8963. XMVECTOR Index = _mm_set_ps1(RefractionIndex);
  8964. return XMVector4RefractV(Incident,Normal,Index);
  8965. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  8966. #endif // _XM_VMX128_INTRINSICS_
  8967. }
  8968. //------------------------------------------------------------------------------
  8969. XMFINLINE XMVECTOR XMVector4RefractV
  8970. (
  8971. FXMVECTOR Incident,
  8972. FXMVECTOR Normal,
  8973. FXMVECTOR RefractionIndex
  8974. )
  8975. {
  8976. #if defined(_XM_NO_INTRINSICS_)
  8977. XMVECTOR IDotN;
  8978. XMVECTOR R;
  8979. CONST XMVECTOR Zero = XMVectorZero();
  8980. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  8981. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  8982. IDotN = XMVector4Dot(Incident, Normal);
  8983. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  8984. R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v);
  8985. R = XMVectorMultiply(R, RefractionIndex);
  8986. R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v);
  8987. if (XMVector4LessOrEqual(R, Zero))
  8988. {
  8989. // Total internal reflection
  8990. return Zero;
  8991. }
  8992. else
  8993. {
  8994. XMVECTOR Result;
  8995. // R = RefractionIndex * IDotN + sqrt(R)
  8996. R = XMVectorSqrt(R);
  8997. R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R);
  8998. // Result = RefractionIndex * Incident - Normal * R
  8999. Result = XMVectorMultiply(RefractionIndex, Incident);
  9000. Result = XMVectorNegativeMultiplySubtract(Normal, R, Result);
  9001. return Result;
  9002. }
  9003. #elif defined(_XM_SSE_INTRINSICS_)
  9004. // Result = RefractionIndex * Incident - Normal * (RefractionIndex * dot(Incident, Normal) +
  9005. // sqrt(1 - RefractionIndex * RefractionIndex * (1 - dot(Incident, Normal) * dot(Incident, Normal))))
  9006. XMVECTOR IDotN = XMVector4Dot(Incident,Normal);
  9007. // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN)
  9008. XMVECTOR R = _mm_mul_ps(IDotN,IDotN);
  9009. R = _mm_sub_ps(g_XMOne,R);
  9010. R = _mm_mul_ps(R, RefractionIndex);
  9011. R = _mm_mul_ps(R, RefractionIndex);
  9012. R = _mm_sub_ps(g_XMOne,R);
  9013. XMVECTOR vResult = _mm_cmple_ps(R,g_XMZero);
  9014. if (_mm_movemask_ps(vResult)==0x0f)
  9015. {
  9016. // Total internal reflection
  9017. vResult = g_XMZero;
  9018. }
  9019. else
  9020. {
  9021. // R = RefractionIndex * IDotN + sqrt(R)
  9022. R = _mm_sqrt_ps(R);
  9023. vResult = _mm_mul_ps(RefractionIndex, IDotN);
  9024. R = _mm_add_ps(R,vResult);
  9025. // Result = RefractionIndex * Incident - Normal * R
  9026. vResult = _mm_mul_ps(RefractionIndex, Incident);
  9027. R = _mm_mul_ps(R,Normal);
  9028. vResult = _mm_sub_ps(vResult,R);
  9029. }
  9030. return vResult;
  9031. #else // _XM_VMX128_INTRINSICS_
  9032. #endif // _XM_VMX128_INTRINSICS_
  9033. }
  9034. //------------------------------------------------------------------------------
  9035. XMFINLINE XMVECTOR XMVector4Orthogonal
  9036. (
  9037. FXMVECTOR V
  9038. )
  9039. {
  9040. #if defined(_XM_NO_INTRINSICS_)
  9041. XMVECTOR Result;
  9042. Result.vector4_f32[0] = V.vector4_f32[2];
  9043. Result.vector4_f32[1] = V.vector4_f32[3];
  9044. Result.vector4_f32[2] = -V.vector4_f32[0];
  9045. Result.vector4_f32[3] = -V.vector4_f32[1];
  9046. return Result;
  9047. #elif defined(_XM_SSE_INTRINSICS_)
  9048. static const XMVECTORF32 FlipZW = {1.0f,1.0f,-1.0f,-1.0f};
  9049. XMVECTOR vResult = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,0,3,2));
  9050. vResult = _mm_mul_ps(vResult,FlipZW);
  9051. return vResult;
  9052. #else // _XM_VMX128_INTRINSICS_
  9053. #endif // _XM_VMX128_INTRINSICS_
  9054. }
  9055. //------------------------------------------------------------------------------
  9056. XMFINLINE XMVECTOR XMVector4AngleBetweenNormalsEst
  9057. (
  9058. FXMVECTOR N1,
  9059. FXMVECTOR N2
  9060. )
  9061. {
  9062. #if defined(_XM_NO_INTRINSICS_)
  9063. XMVECTOR NegativeOne;
  9064. XMVECTOR One;
  9065. XMVECTOR Result;
  9066. Result = XMVector4Dot(N1, N2);
  9067. NegativeOne = XMVectorSplatConstant(-1, 0);
  9068. One = XMVectorSplatOne();
  9069. Result = XMVectorClamp(Result, NegativeOne, One);
  9070. Result = XMVectorACosEst(Result);
  9071. return Result;
  9072. #elif defined(_XM_SSE_INTRINSICS_)
  9073. XMVECTOR vResult = XMVector4Dot(N1,N2);
  9074. // Clamp to -1.0f to 1.0f
  9075. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  9076. vResult = _mm_min_ps(vResult,g_XMOne);;
  9077. vResult = XMVectorACosEst(vResult);
  9078. return vResult;
  9079. #else // _XM_VMX128_INTRINSICS_
  9080. #endif // _XM_VMX128_INTRINSICS_
  9081. }
  9082. //------------------------------------------------------------------------------
  9083. XMFINLINE XMVECTOR XMVector4AngleBetweenNormals
  9084. (
  9085. FXMVECTOR N1,
  9086. FXMVECTOR N2
  9087. )
  9088. {
  9089. #if defined(_XM_NO_INTRINSICS_)
  9090. XMVECTOR NegativeOne;
  9091. XMVECTOR One;
  9092. XMVECTOR Result;
  9093. Result = XMVector4Dot(N1, N2);
  9094. NegativeOne = XMVectorSplatConstant(-1, 0);
  9095. One = XMVectorSplatOne();
  9096. Result = XMVectorClamp(Result, NegativeOne, One);
  9097. Result = XMVectorACos(Result);
  9098. return Result;
  9099. #elif defined(_XM_SSE_INTRINSICS_)
  9100. XMVECTOR vResult = XMVector4Dot(N1,N2);
  9101. // Clamp to -1.0f to 1.0f
  9102. vResult = _mm_max_ps(vResult,g_XMNegativeOne);
  9103. vResult = _mm_min_ps(vResult,g_XMOne);;
  9104. vResult = XMVectorACos(vResult);
  9105. return vResult;
  9106. #else // _XM_VMX128_INTRINSICS_
  9107. #endif // _XM_VMX128_INTRINSICS_
  9108. }
  9109. //------------------------------------------------------------------------------
  9110. XMFINLINE XMVECTOR XMVector4AngleBetweenVectors
  9111. (
  9112. FXMVECTOR V1,
  9113. FXMVECTOR V2
  9114. )
  9115. {
  9116. #if defined(_XM_NO_INTRINSICS_)
  9117. XMVECTOR L1;
  9118. XMVECTOR L2;
  9119. XMVECTOR Dot;
  9120. XMVECTOR CosAngle;
  9121. XMVECTOR NegativeOne;
  9122. XMVECTOR One;
  9123. XMVECTOR Result;
  9124. L1 = XMVector4ReciprocalLength(V1);
  9125. L2 = XMVector4ReciprocalLength(V2);
  9126. Dot = XMVector4Dot(V1, V2);
  9127. L1 = XMVectorMultiply(L1, L2);
  9128. CosAngle = XMVectorMultiply(Dot, L1);
  9129. NegativeOne = XMVectorSplatConstant(-1, 0);
  9130. One = XMVectorSplatOne();
  9131. CosAngle = XMVectorClamp(CosAngle, NegativeOne, One);
  9132. Result = XMVectorACos(CosAngle);
  9133. return Result;
  9134. #elif defined(_XM_SSE_INTRINSICS_)
  9135. XMVECTOR L1;
  9136. XMVECTOR L2;
  9137. XMVECTOR Dot;
  9138. XMVECTOR CosAngle;
  9139. XMVECTOR Result;
  9140. L1 = XMVector4ReciprocalLength(V1);
  9141. L2 = XMVector4ReciprocalLength(V2);
  9142. Dot = XMVector4Dot(V1, V2);
  9143. L1 = _mm_mul_ps(L1,L2);
  9144. CosAngle = _mm_mul_ps(Dot,L1);
  9145. CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne, g_XMOne);
  9146. Result = XMVectorACos(CosAngle);
  9147. return Result;
  9148. #else // _XM_VMX128_INTRINSICS_
  9149. #endif // _XM_VMX128_INTRINSICS_
  9150. }
  9151. //------------------------------------------------------------------------------
  9152. XMFINLINE XMVECTOR XMVector4Transform
  9153. (
  9154. FXMVECTOR V,
  9155. CXMMATRIX M
  9156. )
  9157. {
  9158. #if defined(_XM_NO_INTRINSICS_)
  9159. FLOAT fX = (M.m[0][0]*V.vector4_f32[0])+(M.m[1][0]*V.vector4_f32[1])+(M.m[2][0]*V.vector4_f32[2])+(M.m[3][0]*V.vector4_f32[3]);
  9160. FLOAT fY = (M.m[0][1]*V.vector4_f32[0])+(M.m[1][1]*V.vector4_f32[1])+(M.m[2][1]*V.vector4_f32[2])+(M.m[3][1]*V.vector4_f32[3]);
  9161. FLOAT fZ = (M.m[0][2]*V.vector4_f32[0])+(M.m[1][2]*V.vector4_f32[1])+(M.m[2][2]*V.vector4_f32[2])+(M.m[3][2]*V.vector4_f32[3]);
  9162. FLOAT fW = (M.m[0][3]*V.vector4_f32[0])+(M.m[1][3]*V.vector4_f32[1])+(M.m[2][3]*V.vector4_f32[2])+(M.m[3][3]*V.vector4_f32[3]);
  9163. XMVECTOR vResult = {
  9164. fX,
  9165. fY,
  9166. fZ,
  9167. fW
  9168. };
  9169. return vResult;
  9170. #elif defined(_XM_SSE_INTRINSICS_)
  9171. // Splat x,y,z and w
  9172. XMVECTOR vTempX = _mm_shuffle_ps(V,V,_MM_SHUFFLE(0,0,0,0));
  9173. XMVECTOR vTempY = _mm_shuffle_ps(V,V,_MM_SHUFFLE(1,1,1,1));
  9174. XMVECTOR vTempZ = _mm_shuffle_ps(V,V,_MM_SHUFFLE(2,2,2,2));
  9175. XMVECTOR vTempW = _mm_shuffle_ps(V,V,_MM_SHUFFLE(3,3,3,3));
  9176. // Mul by the matrix
  9177. vTempX = _mm_mul_ps(vTempX,M.r[0]);
  9178. vTempY = _mm_mul_ps(vTempY,M.r[1]);
  9179. vTempZ = _mm_mul_ps(vTempZ,M.r[2]);
  9180. vTempW = _mm_mul_ps(vTempW,M.r[3]);
  9181. // Add them all together
  9182. vTempX = _mm_add_ps(vTempX,vTempY);
  9183. vTempZ = _mm_add_ps(vTempZ,vTempW);
  9184. vTempX = _mm_add_ps(vTempX,vTempZ);
  9185. return vTempX;
  9186. #else // _XM_VMX128_INTRINSICS_
  9187. #endif // _XM_VMX128_INTRINSICS_
  9188. }
  9189. //------------------------------------------------------------------------------
  9190. XMINLINE XMFLOAT4* XMVector4TransformStream
  9191. (
  9192. XMFLOAT4* pOutputStream,
  9193. UINT OutputStride,
  9194. CONST XMFLOAT4* pInputStream,
  9195. UINT InputStride,
  9196. UINT VectorCount,
  9197. CXMMATRIX M
  9198. )
  9199. {
  9200. #if defined(_XM_NO_INTRINSICS_)
  9201. XMVECTOR V;
  9202. XMVECTOR X;
  9203. XMVECTOR Y;
  9204. XMVECTOR Z;
  9205. XMVECTOR W;
  9206. XMVECTOR Result;
  9207. UINT i;
  9208. BYTE* pInputVector = (BYTE*)pInputStream;
  9209. BYTE* pOutputVector = (BYTE*)pOutputStream;
  9210. XMASSERT(pOutputStream);
  9211. XMASSERT(pInputStream);
  9212. for (i = 0; i < VectorCount; i++)
  9213. {
  9214. V = XMLoadFloat4((XMFLOAT4*)pInputVector);
  9215. W = XMVectorSplatW(V);
  9216. Z = XMVectorSplatZ(V);
  9217. Y = XMVectorSplatY(V);
  9218. X = XMVectorSplatX(V);
  9219. // W = XMVectorReplicate(((XMFLOAT4*)pInputVector)->w);
  9220. // Z = XMVectorReplicate(((XMFLOAT4*)pInputVector)->z);
  9221. // Y = XMVectorReplicate(((XMFLOAT4*)pInputVector)->y);
  9222. // X = XMVectorReplicate(((XMFLOAT4*)pInputVector)->x);
  9223. Result = XMVectorMultiply(W, M.r[3]);
  9224. Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
  9225. Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
  9226. Result = XMVectorMultiplyAdd(X, M.r[0], Result);
  9227. XMStoreFloat4((XMFLOAT4*)pOutputVector, Result);
  9228. pInputVector += InputStride;
  9229. pOutputVector += OutputStride;
  9230. }
  9231. return pOutputStream;
  9232. #elif defined(_XM_SSE_INTRINSICS_)
  9233. UINT i;
  9234. XMASSERT(pOutputStream);
  9235. XMASSERT(pInputStream);
  9236. const BYTE*pInputVector = reinterpret_cast<const BYTE *>(pInputStream);
  9237. BYTE* pOutputVector = reinterpret_cast<BYTE *>(pOutputStream);
  9238. for (i = 0; i < VectorCount; i++)
  9239. {
  9240. // Fetch the row and splat it
  9241. XMVECTOR vTempx = _mm_loadu_ps(reinterpret_cast<const float *>(pInputVector));
  9242. XMVECTOR vTempy = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(1,1,1,1));
  9243. XMVECTOR vTempz = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(2,2,2,2));
  9244. XMVECTOR vTempw = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(3,3,3,3));
  9245. vTempx = _mm_shuffle_ps(vTempx,vTempx,_MM_SHUFFLE(0,0,0,0));
  9246. vTempx = _mm_mul_ps(vTempx,M.r[0]);
  9247. vTempy = _mm_mul_ps(vTempy,M.r[1]);
  9248. vTempz = _mm_mul_ps(vTempz,M.r[2]);
  9249. vTempw = _mm_mul_ps(vTempw,M.r[3]);
  9250. vTempx = _mm_add_ps(vTempx,vTempy);
  9251. vTempw = _mm_add_ps(vTempw,vTempz);
  9252. vTempw = _mm_add_ps(vTempw,vTempx);
  9253. // Store the transformed vector
  9254. _mm_storeu_ps(reinterpret_cast<float *>(pOutputVector),vTempw);
  9255. pInputVector += InputStride;
  9256. pOutputVector += OutputStride;
  9257. }
  9258. return pOutputStream;
  9259. #elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
  9260. #endif // _XM_VMX128_INTRINSICS_
  9261. }
  9262. #ifdef __cplusplus
  9263. /****************************************************************************
  9264. *
  9265. * XMVECTOR operators
  9266. *
  9267. ****************************************************************************/
  9268. #ifndef XM_NO_OPERATOR_OVERLOADS
  9269. //------------------------------------------------------------------------------
  9270. XMFINLINE XMVECTOR operator+ (FXMVECTOR V)
  9271. {
  9272. return V;
  9273. }
  9274. //------------------------------------------------------------------------------
  9275. XMFINLINE XMVECTOR operator- (FXMVECTOR V)
  9276. {
  9277. return XMVectorNegate(V);
  9278. }
  9279. //------------------------------------------------------------------------------
  9280. XMFINLINE XMVECTOR& operator+=
  9281. (
  9282. XMVECTOR& V1,
  9283. FXMVECTOR V2
  9284. )
  9285. {
  9286. V1 = XMVectorAdd(V1, V2);
  9287. return V1;
  9288. }
  9289. //------------------------------------------------------------------------------
  9290. XMFINLINE XMVECTOR& operator-=
  9291. (
  9292. XMVECTOR& V1,
  9293. FXMVECTOR V2
  9294. )
  9295. {
  9296. V1 = XMVectorSubtract(V1, V2);
  9297. return V1;
  9298. }
  9299. //------------------------------------------------------------------------------
  9300. XMFINLINE XMVECTOR& operator*=
  9301. (
  9302. XMVECTOR& V1,
  9303. FXMVECTOR V2
  9304. )
  9305. {
  9306. V1 = XMVectorMultiply(V1, V2);
  9307. return V1;
  9308. }
  9309. //------------------------------------------------------------------------------
  9310. XMFINLINE XMVECTOR& operator/=
  9311. (
  9312. XMVECTOR& V1,
  9313. FXMVECTOR V2
  9314. )
  9315. {
  9316. V1 = XMVectorDivide(V1,V2);
  9317. return V1;
  9318. }
  9319. //------------------------------------------------------------------------------
  9320. XMFINLINE XMVECTOR& operator*=
  9321. (
  9322. XMVECTOR& V,
  9323. CONST FLOAT S
  9324. )
  9325. {
  9326. V = XMVectorScale(V, S);
  9327. return V;
  9328. }
  9329. //------------------------------------------------------------------------------
  9330. XMFINLINE XMVECTOR& operator/=
  9331. (
  9332. XMVECTOR& V,
  9333. CONST FLOAT S
  9334. )
  9335. {
  9336. V = XMVectorScale(V, 1.0f / S);
  9337. return V;
  9338. }
  9339. //------------------------------------------------------------------------------
  9340. XMFINLINE XMVECTOR operator+
  9341. (
  9342. FXMVECTOR V1,
  9343. FXMVECTOR V2
  9344. )
  9345. {
  9346. return XMVectorAdd(V1, V2);
  9347. }
  9348. //------------------------------------------------------------------------------
  9349. XMFINLINE XMVECTOR operator-
  9350. (
  9351. FXMVECTOR V1,
  9352. FXMVECTOR V2
  9353. )
  9354. {
  9355. return XMVectorSubtract(V1, V2);
  9356. }
  9357. //------------------------------------------------------------------------------
  9358. XMFINLINE XMVECTOR operator*
  9359. (
  9360. FXMVECTOR V1,
  9361. FXMVECTOR V2
  9362. )
  9363. {
  9364. return XMVectorMultiply(V1, V2);
  9365. }
  9366. //------------------------------------------------------------------------------
  9367. XMFINLINE XMVECTOR operator/
  9368. (
  9369. FXMVECTOR V1,
  9370. FXMVECTOR V2
  9371. )
  9372. {
  9373. return XMVectorDivide(V1,V2);
  9374. }
  9375. //------------------------------------------------------------------------------
  9376. XMFINLINE XMVECTOR operator*
  9377. (
  9378. FXMVECTOR V,
  9379. CONST FLOAT S
  9380. )
  9381. {
  9382. return XMVectorScale(V, S);
  9383. }
  9384. //------------------------------------------------------------------------------
  9385. XMFINLINE XMVECTOR operator/
  9386. (
  9387. FXMVECTOR V,
  9388. CONST FLOAT S
  9389. )
  9390. {
  9391. return XMVectorScale(V, 1.0f / S);
  9392. }
  9393. //------------------------------------------------------------------------------
  9394. XMFINLINE XMVECTOR operator*
  9395. (
  9396. FLOAT S,
  9397. FXMVECTOR V
  9398. )
  9399. {
  9400. return XMVectorScale(V, S);
  9401. }
  9402. #endif // !XM_NO_OPERATOR_OVERLOADS
  9403. /****************************************************************************
  9404. *
  9405. * XMFLOAT2 operators
  9406. *
  9407. ****************************************************************************/
  9408. //------------------------------------------------------------------------------
  9409. XMFINLINE _XMFLOAT2::_XMFLOAT2
  9410. (
  9411. CONST FLOAT* pArray
  9412. )
  9413. {
  9414. x = pArray[0];
  9415. y = pArray[1];
  9416. }
  9417. //------------------------------------------------------------------------------
  9418. XMFINLINE _XMFLOAT2& _XMFLOAT2::operator=
  9419. (
  9420. CONST _XMFLOAT2& Float2
  9421. )
  9422. {
  9423. x = Float2.x;
  9424. y = Float2.y;
  9425. return *this;
  9426. }
  9427. //------------------------------------------------------------------------------
  9428. XMFINLINE XMFLOAT2A& XMFLOAT2A::operator=
  9429. (
  9430. CONST XMFLOAT2A& Float2
  9431. )
  9432. {
  9433. x = Float2.x;
  9434. y = Float2.y;
  9435. return *this;
  9436. }
  9437. /****************************************************************************
  9438. *
  9439. * XMHALF2 operators
  9440. *
  9441. ****************************************************************************/
  9442. //------------------------------------------------------------------------------
  9443. XMFINLINE _XMHALF2::_XMHALF2
  9444. (
  9445. CONST HALF* pArray
  9446. )
  9447. {
  9448. x = pArray[0];
  9449. y = pArray[1];
  9450. }
  9451. //------------------------------------------------------------------------------
  9452. XMFINLINE _XMHALF2::_XMHALF2
  9453. (
  9454. FLOAT _x,
  9455. FLOAT _y
  9456. )
  9457. {
  9458. x = XMConvertFloatToHalf(_x);
  9459. y = XMConvertFloatToHalf(_y);
  9460. }
  9461. //------------------------------------------------------------------------------
  9462. XMFINLINE _XMHALF2::_XMHALF2
  9463. (
  9464. CONST FLOAT* pArray
  9465. )
  9466. {
  9467. x = XMConvertFloatToHalf(pArray[0]);
  9468. y = XMConvertFloatToHalf(pArray[1]);
  9469. }
  9470. //------------------------------------------------------------------------------
  9471. XMFINLINE _XMHALF2& _XMHALF2::operator=
  9472. (
  9473. CONST _XMHALF2& Half2
  9474. )
  9475. {
  9476. x = Half2.x;
  9477. y = Half2.y;
  9478. return *this;
  9479. }
  9480. /****************************************************************************
  9481. *
  9482. * XMSHORTN2 operators
  9483. *
  9484. ****************************************************************************/
  9485. //------------------------------------------------------------------------------
  9486. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9487. (
  9488. CONST SHORT* pArray
  9489. )
  9490. {
  9491. x = pArray[0];
  9492. y = pArray[1];
  9493. }
  9494. //------------------------------------------------------------------------------
  9495. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9496. (
  9497. FLOAT _x,
  9498. FLOAT _y
  9499. )
  9500. {
  9501. XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9502. }
  9503. //------------------------------------------------------------------------------
  9504. XMFINLINE _XMSHORTN2::_XMSHORTN2
  9505. (
  9506. CONST FLOAT* pArray
  9507. )
  9508. {
  9509. XMStoreShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9510. }
  9511. //------------------------------------------------------------------------------
  9512. XMFINLINE _XMSHORTN2& _XMSHORTN2::operator=
  9513. (
  9514. CONST _XMSHORTN2& ShortN2
  9515. )
  9516. {
  9517. x = ShortN2.x;
  9518. y = ShortN2.y;
  9519. return *this;
  9520. }
  9521. /****************************************************************************
  9522. *
  9523. * XMSHORT2 operators
  9524. *
  9525. ****************************************************************************/
  9526. //------------------------------------------------------------------------------
  9527. XMFINLINE _XMSHORT2::_XMSHORT2
  9528. (
  9529. CONST SHORT* pArray
  9530. )
  9531. {
  9532. x = pArray[0];
  9533. y = pArray[1];
  9534. }
  9535. //------------------------------------------------------------------------------
  9536. XMFINLINE _XMSHORT2::_XMSHORT2
  9537. (
  9538. FLOAT _x,
  9539. FLOAT _y
  9540. )
  9541. {
  9542. XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9543. }
  9544. //------------------------------------------------------------------------------
  9545. XMFINLINE _XMSHORT2::_XMSHORT2
  9546. (
  9547. CONST FLOAT* pArray
  9548. )
  9549. {
  9550. XMStoreShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9551. }
  9552. //------------------------------------------------------------------------------
  9553. XMFINLINE _XMSHORT2& _XMSHORT2::operator=
  9554. (
  9555. CONST _XMSHORT2& Short2
  9556. )
  9557. {
  9558. x = Short2.x;
  9559. y = Short2.y;
  9560. return *this;
  9561. }
  9562. /****************************************************************************
  9563. *
  9564. * XMUSHORTN2 operators
  9565. *
  9566. ****************************************************************************/
  9567. //------------------------------------------------------------------------------
  9568. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9569. (
  9570. CONST USHORT* pArray
  9571. )
  9572. {
  9573. x = pArray[0];
  9574. y = pArray[1];
  9575. }
  9576. //------------------------------------------------------------------------------
  9577. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9578. (
  9579. FLOAT _x,
  9580. FLOAT _y
  9581. )
  9582. {
  9583. XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9584. }
  9585. //------------------------------------------------------------------------------
  9586. XMFINLINE _XMUSHORTN2::_XMUSHORTN2
  9587. (
  9588. CONST FLOAT* pArray
  9589. )
  9590. {
  9591. XMStoreUShortN2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9592. }
  9593. //------------------------------------------------------------------------------
  9594. XMFINLINE _XMUSHORTN2& _XMUSHORTN2::operator=
  9595. (
  9596. CONST _XMUSHORTN2& UShortN2
  9597. )
  9598. {
  9599. x = UShortN2.x;
  9600. y = UShortN2.y;
  9601. return *this;
  9602. }
  9603. /****************************************************************************
  9604. *
  9605. * XMUSHORT2 operators
  9606. *
  9607. ****************************************************************************/
  9608. //------------------------------------------------------------------------------
  9609. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9610. (
  9611. CONST USHORT* pArray
  9612. )
  9613. {
  9614. x = pArray[0];
  9615. y = pArray[1];
  9616. }
  9617. //------------------------------------------------------------------------------
  9618. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9619. (
  9620. FLOAT _x,
  9621. FLOAT _y
  9622. )
  9623. {
  9624. XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
  9625. }
  9626. //------------------------------------------------------------------------------
  9627. XMFINLINE _XMUSHORT2::_XMUSHORT2
  9628. (
  9629. CONST FLOAT* pArray
  9630. )
  9631. {
  9632. XMStoreUShort2(this, XMLoadFloat2((XMFLOAT2*)pArray));
  9633. }
  9634. //------------------------------------------------------------------------------
  9635. XMFINLINE _XMUSHORT2& _XMUSHORT2::operator=
  9636. (
  9637. CONST _XMUSHORT2& UShort2
  9638. )
  9639. {
  9640. x = UShort2.x;
  9641. y = UShort2.y;
  9642. return *this;
  9643. }
  9644. /****************************************************************************
  9645. *
  9646. * XMFLOAT3 operators
  9647. *
  9648. ****************************************************************************/
  9649. //------------------------------------------------------------------------------
  9650. XMFINLINE _XMFLOAT3::_XMFLOAT3
  9651. (
  9652. CONST FLOAT* pArray
  9653. )
  9654. {
  9655. x = pArray[0];
  9656. y = pArray[1];
  9657. z = pArray[2];
  9658. }
  9659. //------------------------------------------------------------------------------
  9660. XMFINLINE _XMFLOAT3& _XMFLOAT3::operator=
  9661. (
  9662. CONST _XMFLOAT3& Float3
  9663. )
  9664. {
  9665. x = Float3.x;
  9666. y = Float3.y;
  9667. z = Float3.z;
  9668. return *this;
  9669. }
  9670. //------------------------------------------------------------------------------
  9671. XMFINLINE XMFLOAT3A& XMFLOAT3A::operator=
  9672. (
  9673. CONST XMFLOAT3A& Float3
  9674. )
  9675. {
  9676. x = Float3.x;
  9677. y = Float3.y;
  9678. z = Float3.z;
  9679. return *this;
  9680. }
  9681. /****************************************************************************
  9682. *
  9683. * XMHENDN3 operators
  9684. *
  9685. ****************************************************************************/
  9686. //------------------------------------------------------------------------------
  9687. XMFINLINE _XMHENDN3::_XMHENDN3
  9688. (
  9689. FLOAT _x,
  9690. FLOAT _y,
  9691. FLOAT _z
  9692. )
  9693. {
  9694. XMStoreHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9695. }
  9696. //------------------------------------------------------------------------------
  9697. XMFINLINE _XMHENDN3::_XMHENDN3
  9698. (
  9699. CONST FLOAT* pArray
  9700. )
  9701. {
  9702. XMStoreHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9703. }
  9704. //------------------------------------------------------------------------------
  9705. XMFINLINE _XMHENDN3& _XMHENDN3::operator=
  9706. (
  9707. CONST _XMHENDN3& HenDN3
  9708. )
  9709. {
  9710. v = HenDN3.v;
  9711. return *this;
  9712. }
  9713. //------------------------------------------------------------------------------
  9714. XMFINLINE _XMHENDN3& _XMHENDN3::operator=
  9715. (
  9716. CONST UINT Packed
  9717. )
  9718. {
  9719. v = Packed;
  9720. return *this;
  9721. }
  9722. /****************************************************************************
  9723. *
  9724. * XMHEND3 operators
  9725. *
  9726. ****************************************************************************/
  9727. //------------------------------------------------------------------------------
  9728. XMFINLINE _XMHEND3::_XMHEND3
  9729. (
  9730. FLOAT _x,
  9731. FLOAT _y,
  9732. FLOAT _z
  9733. )
  9734. {
  9735. XMStoreHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9736. }
  9737. //------------------------------------------------------------------------------
  9738. XMFINLINE _XMHEND3::_XMHEND3
  9739. (
  9740. CONST FLOAT* pArray
  9741. )
  9742. {
  9743. XMStoreHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9744. }
  9745. //------------------------------------------------------------------------------
  9746. XMFINLINE _XMHEND3& _XMHEND3::operator=
  9747. (
  9748. CONST _XMHEND3& HenD3
  9749. )
  9750. {
  9751. v = HenD3.v;
  9752. return *this;
  9753. }
  9754. //------------------------------------------------------------------------------
  9755. XMFINLINE _XMHEND3& _XMHEND3::operator=
  9756. (
  9757. CONST UINT Packed
  9758. )
  9759. {
  9760. v = Packed;
  9761. return *this;
  9762. }
  9763. /****************************************************************************
  9764. *
  9765. * XMUHENDN3 operators
  9766. *
  9767. ****************************************************************************/
  9768. //------------------------------------------------------------------------------
  9769. XMFINLINE _XMUHENDN3::_XMUHENDN3
  9770. (
  9771. FLOAT _x,
  9772. FLOAT _y,
  9773. FLOAT _z
  9774. )
  9775. {
  9776. XMStoreUHenDN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9777. }
  9778. //------------------------------------------------------------------------------
  9779. XMFINLINE _XMUHENDN3::_XMUHENDN3
  9780. (
  9781. CONST FLOAT* pArray
  9782. )
  9783. {
  9784. XMStoreUHenDN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9785. }
  9786. //------------------------------------------------------------------------------
  9787. XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
  9788. (
  9789. CONST _XMUHENDN3& UHenDN3
  9790. )
  9791. {
  9792. v = UHenDN3.v;
  9793. return *this;
  9794. }
  9795. //------------------------------------------------------------------------------
  9796. XMFINLINE _XMUHENDN3& _XMUHENDN3::operator=
  9797. (
  9798. CONST UINT Packed
  9799. )
  9800. {
  9801. v = Packed;
  9802. return *this;
  9803. }
  9804. /****************************************************************************
  9805. *
  9806. * XMUHEND3 operators
  9807. *
  9808. ****************************************************************************/
  9809. //------------------------------------------------------------------------------
  9810. XMFINLINE _XMUHEND3::_XMUHEND3
  9811. (
  9812. FLOAT _x,
  9813. FLOAT _y,
  9814. FLOAT _z
  9815. )
  9816. {
  9817. XMStoreUHenD3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9818. }
  9819. //------------------------------------------------------------------------------
  9820. XMFINLINE _XMUHEND3::_XMUHEND3
  9821. (
  9822. CONST FLOAT* pArray
  9823. )
  9824. {
  9825. XMStoreUHenD3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9826. }
  9827. //------------------------------------------------------------------------------
  9828. XMFINLINE _XMUHEND3& _XMUHEND3::operator=
  9829. (
  9830. CONST _XMUHEND3& UHenD3
  9831. )
  9832. {
  9833. v = UHenD3.v;
  9834. return *this;
  9835. }
  9836. //------------------------------------------------------------------------------
  9837. XMFINLINE _XMUHEND3& _XMUHEND3::operator=
  9838. (
  9839. CONST UINT Packed
  9840. )
  9841. {
  9842. v = Packed;
  9843. return *this;
  9844. }
  9845. /****************************************************************************
  9846. *
  9847. * XMDHENN3 operators
  9848. *
  9849. ****************************************************************************/
  9850. //------------------------------------------------------------------------------
  9851. XMFINLINE _XMDHENN3::_XMDHENN3
  9852. (
  9853. FLOAT _x,
  9854. FLOAT _y,
  9855. FLOAT _z
  9856. )
  9857. {
  9858. XMStoreDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9859. }
  9860. //------------------------------------------------------------------------------
  9861. XMFINLINE _XMDHENN3::_XMDHENN3
  9862. (
  9863. CONST FLOAT* pArray
  9864. )
  9865. {
  9866. XMStoreDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9867. }
  9868. //------------------------------------------------------------------------------
  9869. XMFINLINE _XMDHENN3& _XMDHENN3::operator=
  9870. (
  9871. CONST _XMDHENN3& DHenN3
  9872. )
  9873. {
  9874. v = DHenN3.v;
  9875. return *this;
  9876. }
  9877. //------------------------------------------------------------------------------
  9878. XMFINLINE _XMDHENN3& _XMDHENN3::operator=
  9879. (
  9880. CONST UINT Packed
  9881. )
  9882. {
  9883. v = Packed;
  9884. return *this;
  9885. }
  9886. /****************************************************************************
  9887. *
  9888. * XMDHEN3 operators
  9889. *
  9890. ****************************************************************************/
  9891. //------------------------------------------------------------------------------
  9892. XMFINLINE _XMDHEN3::_XMDHEN3
  9893. (
  9894. FLOAT _x,
  9895. FLOAT _y,
  9896. FLOAT _z
  9897. )
  9898. {
  9899. XMStoreDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9900. }
  9901. //------------------------------------------------------------------------------
  9902. XMFINLINE _XMDHEN3::_XMDHEN3
  9903. (
  9904. CONST FLOAT* pArray
  9905. )
  9906. {
  9907. XMStoreDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9908. }
  9909. //------------------------------------------------------------------------------
  9910. XMFINLINE _XMDHEN3& _XMDHEN3::operator=
  9911. (
  9912. CONST _XMDHEN3& DHen3
  9913. )
  9914. {
  9915. v = DHen3.v;
  9916. return *this;
  9917. }
  9918. //------------------------------------------------------------------------------
  9919. XMFINLINE _XMDHEN3& _XMDHEN3::operator=
  9920. (
  9921. CONST UINT Packed
  9922. )
  9923. {
  9924. v = Packed;
  9925. return *this;
  9926. }
  9927. /****************************************************************************
  9928. *
  9929. * XMUDHENN3 operators
  9930. *
  9931. ****************************************************************************/
  9932. //------------------------------------------------------------------------------
  9933. XMFINLINE _XMUDHENN3::_XMUDHENN3
  9934. (
  9935. FLOAT _x,
  9936. FLOAT _y,
  9937. FLOAT _z
  9938. )
  9939. {
  9940. XMStoreUDHenN3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9941. }
  9942. //------------------------------------------------------------------------------
  9943. XMFINLINE _XMUDHENN3::_XMUDHENN3
  9944. (
  9945. CONST FLOAT* pArray
  9946. )
  9947. {
  9948. XMStoreUDHenN3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9949. }
  9950. //------------------------------------------------------------------------------
  9951. XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
  9952. (
  9953. CONST _XMUDHENN3& UDHenN3
  9954. )
  9955. {
  9956. v = UDHenN3.v;
  9957. return *this;
  9958. }
  9959. //------------------------------------------------------------------------------
  9960. XMFINLINE _XMUDHENN3& _XMUDHENN3::operator=
  9961. (
  9962. CONST UINT Packed
  9963. )
  9964. {
  9965. v = Packed;
  9966. return *this;
  9967. }
  9968. /****************************************************************************
  9969. *
  9970. * XMUDHEN3 operators
  9971. *
  9972. ****************************************************************************/
  9973. //------------------------------------------------------------------------------
  9974. XMFINLINE _XMUDHEN3::_XMUDHEN3
  9975. (
  9976. FLOAT _x,
  9977. FLOAT _y,
  9978. FLOAT _z
  9979. )
  9980. {
  9981. XMStoreUDHen3(this, XMVectorSet(_x, _y, _z, 0.0f));
  9982. }
  9983. //------------------------------------------------------------------------------
  9984. XMFINLINE _XMUDHEN3::_XMUDHEN3
  9985. (
  9986. CONST FLOAT* pArray
  9987. )
  9988. {
  9989. XMStoreUDHen3(this, XMLoadFloat3((XMFLOAT3*)pArray));
  9990. }
  9991. //------------------------------------------------------------------------------
  9992. XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
  9993. (
  9994. CONST _XMUDHEN3& UDHen3
  9995. )
  9996. {
  9997. v = UDHen3.v;
  9998. return *this;
  9999. }
  10000. //------------------------------------------------------------------------------
  10001. XMFINLINE _XMUDHEN3& _XMUDHEN3::operator=
  10002. (
  10003. CONST UINT Packed
  10004. )
  10005. {
  10006. v = Packed;
  10007. return *this;
  10008. }
  10009. /****************************************************************************
  10010. *
  10011. * XMU565 operators
  10012. *
  10013. ****************************************************************************/
  10014. XMFINLINE _XMU565::_XMU565
  10015. (
  10016. CONST CHAR *pArray
  10017. )
  10018. {
  10019. x = pArray[0];
  10020. y = pArray[1];
  10021. z = pArray[2];
  10022. }
  10023. XMFINLINE _XMU565::_XMU565
  10024. (
  10025. FLOAT _x,
  10026. FLOAT _y,
  10027. FLOAT _z
  10028. )
  10029. {
  10030. XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
  10031. }
  10032. XMFINLINE _XMU565::_XMU565
  10033. (
  10034. CONST FLOAT *pArray
  10035. )
  10036. {
  10037. XMStoreU565(this, XMLoadFloat3((XMFLOAT3*)pArray ));
  10038. }
  10039. XMFINLINE _XMU565& _XMU565::operator=
  10040. (
  10041. CONST _XMU565& U565
  10042. )
  10043. {
  10044. v = U565.v;
  10045. return *this;
  10046. }
  10047. XMFINLINE _XMU565& _XMU565::operator=
  10048. (
  10049. CONST USHORT Packed
  10050. )
  10051. {
  10052. v = Packed;
  10053. return *this;
  10054. }
  10055. /****************************************************************************
  10056. *
  10057. * XMFLOAT3PK operators
  10058. *
  10059. ****************************************************************************/
  10060. XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
  10061. (
  10062. FLOAT _x,
  10063. FLOAT _y,
  10064. FLOAT _z
  10065. )
  10066. {
  10067. XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
  10068. }
  10069. XMFINLINE _XMFLOAT3PK::_XMFLOAT3PK
  10070. (
  10071. CONST FLOAT *pArray
  10072. )
  10073. {
  10074. XMStoreFloat3PK(this, XMLoadFloat3((XMFLOAT3*)pArray ));
  10075. }
  10076. XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
  10077. (
  10078. CONST _XMFLOAT3PK& float3pk
  10079. )
  10080. {
  10081. v = float3pk.v;
  10082. return *this;
  10083. }
  10084. XMFINLINE _XMFLOAT3PK& _XMFLOAT3PK::operator=
  10085. (
  10086. CONST UINT Packed
  10087. )
  10088. {
  10089. v = Packed;
  10090. return *this;
  10091. }
  10092. /****************************************************************************
  10093. *
  10094. * XMFLOAT3SE operators
  10095. *
  10096. ****************************************************************************/
  10097. XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
  10098. (
  10099. FLOAT _x,
  10100. FLOAT _y,
  10101. FLOAT _z
  10102. )
  10103. {
  10104. XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
  10105. }
  10106. XMFINLINE _XMFLOAT3SE::_XMFLOAT3SE
  10107. (
  10108. CONST FLOAT *pArray
  10109. )
  10110. {
  10111. XMStoreFloat3SE(this, XMLoadFloat3((XMFLOAT3*)pArray ));
  10112. }
  10113. XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
  10114. (
  10115. CONST _XMFLOAT3SE& float3se
  10116. )
  10117. {
  10118. v = float3se.v;
  10119. return *this;
  10120. }
  10121. XMFINLINE _XMFLOAT3SE& _XMFLOAT3SE::operator=
  10122. (
  10123. CONST UINT Packed
  10124. )
  10125. {
  10126. v = Packed;
  10127. return *this;
  10128. }
  10129. /****************************************************************************
  10130. *
  10131. * XMFLOAT4 operators
  10132. *
  10133. ****************************************************************************/
  10134. //------------------------------------------------------------------------------
  10135. XMFINLINE _XMFLOAT4::_XMFLOAT4
  10136. (
  10137. CONST FLOAT* pArray
  10138. )
  10139. {
  10140. x = pArray[0];
  10141. y = pArray[1];
  10142. z = pArray[2];
  10143. w = pArray[3];
  10144. }
  10145. //------------------------------------------------------------------------------
  10146. XMFINLINE _XMFLOAT4& _XMFLOAT4::operator=
  10147. (
  10148. CONST _XMFLOAT4& Float4
  10149. )
  10150. {
  10151. x = Float4.x;
  10152. y = Float4.y;
  10153. z = Float4.z;
  10154. w = Float4.w;
  10155. return *this;
  10156. }
  10157. //------------------------------------------------------------------------------
  10158. XMFINLINE XMFLOAT4A& XMFLOAT4A::operator=
  10159. (
  10160. CONST XMFLOAT4A& Float4
  10161. )
  10162. {
  10163. x = Float4.x;
  10164. y = Float4.y;
  10165. z = Float4.z;
  10166. w = Float4.w;
  10167. return *this;
  10168. }
  10169. /****************************************************************************
  10170. *
  10171. * XMHALF4 operators
  10172. *
  10173. ****************************************************************************/
  10174. //------------------------------------------------------------------------------
  10175. XMFINLINE _XMHALF4::_XMHALF4
  10176. (
  10177. CONST HALF* pArray
  10178. )
  10179. {
  10180. x = pArray[0];
  10181. y = pArray[1];
  10182. z = pArray[2];
  10183. w = pArray[3];
  10184. }
  10185. //------------------------------------------------------------------------------
  10186. XMFINLINE _XMHALF4::_XMHALF4
  10187. (
  10188. FLOAT _x,
  10189. FLOAT _y,
  10190. FLOAT _z,
  10191. FLOAT _w
  10192. )
  10193. {
  10194. x = XMConvertFloatToHalf(_x);
  10195. y = XMConvertFloatToHalf(_y);
  10196. z = XMConvertFloatToHalf(_z);
  10197. w = XMConvertFloatToHalf(_w);
  10198. }
  10199. //------------------------------------------------------------------------------
  10200. XMFINLINE _XMHALF4::_XMHALF4
  10201. (
  10202. CONST FLOAT* pArray
  10203. )
  10204. {
  10205. XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(FLOAT), 4);
  10206. }
  10207. //------------------------------------------------------------------------------
  10208. XMFINLINE _XMHALF4& _XMHALF4::operator=
  10209. (
  10210. CONST _XMHALF4& Half4
  10211. )
  10212. {
  10213. x = Half4.x;
  10214. y = Half4.y;
  10215. z = Half4.z;
  10216. w = Half4.w;
  10217. return *this;
  10218. }
  10219. /****************************************************************************
  10220. *
  10221. * XMSHORTN4 operators
  10222. *
  10223. ****************************************************************************/
  10224. //------------------------------------------------------------------------------
  10225. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10226. (
  10227. CONST SHORT* pArray
  10228. )
  10229. {
  10230. x = pArray[0];
  10231. y = pArray[1];
  10232. z = pArray[2];
  10233. w = pArray[3];
  10234. }
  10235. //------------------------------------------------------------------------------
  10236. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10237. (
  10238. FLOAT _x,
  10239. FLOAT _y,
  10240. FLOAT _z,
  10241. FLOAT _w
  10242. )
  10243. {
  10244. XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
  10245. }
  10246. //------------------------------------------------------------------------------
  10247. XMFINLINE _XMSHORTN4::_XMSHORTN4
  10248. (
  10249. CONST FLOAT* pArray
  10250. )
  10251. {
  10252. XMStoreShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10253. }
  10254. //------------------------------------------------------------------------------
  10255. XMFINLINE _XMSHORTN4& _XMSHORTN4::operator=
  10256. (
  10257. CONST _XMSHORTN4& ShortN4
  10258. )
  10259. {
  10260. x = ShortN4.x;
  10261. y = ShortN4.y;
  10262. z = ShortN4.z;
  10263. w = ShortN4.w;
  10264. return *this;
  10265. }
  10266. /****************************************************************************
  10267. *
  10268. * XMSHORT4 operators
  10269. *
  10270. ****************************************************************************/
  10271. //------------------------------------------------------------------------------
  10272. XMFINLINE _XMSHORT4::_XMSHORT4
  10273. (
  10274. CONST SHORT* pArray
  10275. )
  10276. {
  10277. x = pArray[0];
  10278. y = pArray[1];
  10279. z = pArray[2];
  10280. w = pArray[3];
  10281. }
  10282. //------------------------------------------------------------------------------
  10283. XMFINLINE _XMSHORT4::_XMSHORT4
  10284. (
  10285. FLOAT _x,
  10286. FLOAT _y,
  10287. FLOAT _z,
  10288. FLOAT _w
  10289. )
  10290. {
  10291. XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
  10292. }
  10293. //------------------------------------------------------------------------------
  10294. XMFINLINE _XMSHORT4::_XMSHORT4
  10295. (
  10296. CONST FLOAT* pArray
  10297. )
  10298. {
  10299. XMStoreShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10300. }
  10301. //------------------------------------------------------------------------------
  10302. XMFINLINE _XMSHORT4& _XMSHORT4::operator=
  10303. (
  10304. CONST _XMSHORT4& Short4
  10305. )
  10306. {
  10307. x = Short4.x;
  10308. y = Short4.y;
  10309. z = Short4.z;
  10310. w = Short4.w;
  10311. return *this;
  10312. }
  10313. /****************************************************************************
  10314. *
  10315. * XMUSHORTN4 operators
  10316. *
  10317. ****************************************************************************/
  10318. //------------------------------------------------------------------------------
  10319. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10320. (
  10321. CONST USHORT* pArray
  10322. )
  10323. {
  10324. x = pArray[0];
  10325. y = pArray[1];
  10326. z = pArray[2];
  10327. w = pArray[3];
  10328. }
  10329. //------------------------------------------------------------------------------
  10330. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10331. (
  10332. FLOAT _x,
  10333. FLOAT _y,
  10334. FLOAT _z,
  10335. FLOAT _w
  10336. )
  10337. {
  10338. XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
  10339. }
  10340. //------------------------------------------------------------------------------
  10341. XMFINLINE _XMUSHORTN4::_XMUSHORTN4
  10342. (
  10343. CONST FLOAT* pArray
  10344. )
  10345. {
  10346. XMStoreUShortN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10347. }
  10348. //------------------------------------------------------------------------------
  10349. XMFINLINE _XMUSHORTN4& _XMUSHORTN4::operator=
  10350. (
  10351. CONST _XMUSHORTN4& UShortN4
  10352. )
  10353. {
  10354. x = UShortN4.x;
  10355. y = UShortN4.y;
  10356. z = UShortN4.z;
  10357. w = UShortN4.w;
  10358. return *this;
  10359. }
  10360. /****************************************************************************
  10361. *
  10362. * XMUSHORT4 operators
  10363. *
  10364. ****************************************************************************/
  10365. //------------------------------------------------------------------------------
  10366. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10367. (
  10368. CONST USHORT* pArray
  10369. )
  10370. {
  10371. x = pArray[0];
  10372. y = pArray[1];
  10373. z = pArray[2];
  10374. w = pArray[3];
  10375. }
  10376. //------------------------------------------------------------------------------
  10377. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10378. (
  10379. FLOAT _x,
  10380. FLOAT _y,
  10381. FLOAT _z,
  10382. FLOAT _w
  10383. )
  10384. {
  10385. XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
  10386. }
  10387. //------------------------------------------------------------------------------
  10388. XMFINLINE _XMUSHORT4::_XMUSHORT4
  10389. (
  10390. CONST FLOAT* pArray
  10391. )
  10392. {
  10393. XMStoreUShort4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10394. }
  10395. //------------------------------------------------------------------------------
  10396. XMFINLINE _XMUSHORT4& _XMUSHORT4::operator=
  10397. (
  10398. CONST _XMUSHORT4& UShort4
  10399. )
  10400. {
  10401. x = UShort4.x;
  10402. y = UShort4.y;
  10403. z = UShort4.z;
  10404. w = UShort4.w;
  10405. return *this;
  10406. }
  10407. /****************************************************************************
  10408. *
  10409. * XMXDECN4 operators
  10410. *
  10411. ****************************************************************************/
  10412. //------------------------------------------------------------------------------
  10413. XMFINLINE _XMXDECN4::_XMXDECN4
  10414. (
  10415. FLOAT _x,
  10416. FLOAT _y,
  10417. FLOAT _z,
  10418. FLOAT _w
  10419. )
  10420. {
  10421. XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
  10422. }
  10423. //------------------------------------------------------------------------------
  10424. XMFINLINE _XMXDECN4::_XMXDECN4
  10425. (
  10426. CONST FLOAT* pArray
  10427. )
  10428. {
  10429. XMStoreXDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10430. }
  10431. //------------------------------------------------------------------------------
  10432. XMFINLINE _XMXDECN4& _XMXDECN4::operator=
  10433. (
  10434. CONST _XMXDECN4& XDecN4
  10435. )
  10436. {
  10437. v = XDecN4.v;
  10438. return *this;
  10439. }
  10440. //------------------------------------------------------------------------------
  10441. XMFINLINE _XMXDECN4& _XMXDECN4::operator=
  10442. (
  10443. CONST UINT Packed
  10444. )
  10445. {
  10446. v = Packed;
  10447. return *this;
  10448. }
  10449. /****************************************************************************
  10450. *
  10451. * XMXDEC4 operators
  10452. *
  10453. ****************************************************************************/
  10454. //------------------------------------------------------------------------------
  10455. XMFINLINE _XMXDEC4::_XMXDEC4
  10456. (
  10457. FLOAT _x,
  10458. FLOAT _y,
  10459. FLOAT _z,
  10460. FLOAT _w
  10461. )
  10462. {
  10463. XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
  10464. }
  10465. //------------------------------------------------------------------------------
  10466. XMFINLINE _XMXDEC4::_XMXDEC4
  10467. (
  10468. CONST FLOAT* pArray
  10469. )
  10470. {
  10471. XMStoreXDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10472. }
  10473. //------------------------------------------------------------------------------
  10474. XMFINLINE _XMXDEC4& _XMXDEC4::operator=
  10475. (
  10476. CONST _XMXDEC4& XDec4
  10477. )
  10478. {
  10479. v = XDec4.v;
  10480. return *this;
  10481. }
  10482. //------------------------------------------------------------------------------
  10483. XMFINLINE _XMXDEC4& _XMXDEC4::operator=
  10484. (
  10485. CONST UINT Packed
  10486. )
  10487. {
  10488. v = Packed;
  10489. return *this;
  10490. }
  10491. /****************************************************************************
  10492. *
  10493. * XMDECN4 operators
  10494. *
  10495. ****************************************************************************/
  10496. //------------------------------------------------------------------------------
  10497. XMFINLINE _XMDECN4::_XMDECN4
  10498. (
  10499. FLOAT _x,
  10500. FLOAT _y,
  10501. FLOAT _z,
  10502. FLOAT _w
  10503. )
  10504. {
  10505. XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
  10506. }
  10507. //------------------------------------------------------------------------------
  10508. XMFINLINE _XMDECN4::_XMDECN4
  10509. (
  10510. CONST FLOAT* pArray
  10511. )
  10512. {
  10513. XMStoreDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10514. }
  10515. //------------------------------------------------------------------------------
  10516. XMFINLINE _XMDECN4& _XMDECN4::operator=
  10517. (
  10518. CONST _XMDECN4& DecN4
  10519. )
  10520. {
  10521. v = DecN4.v;
  10522. return *this;
  10523. }
  10524. //------------------------------------------------------------------------------
  10525. XMFINLINE _XMDECN4& _XMDECN4::operator=
  10526. (
  10527. CONST UINT Packed
  10528. )
  10529. {
  10530. v = Packed;
  10531. return *this;
  10532. }
  10533. /****************************************************************************
  10534. *
  10535. * XMDEC4 operators
  10536. *
  10537. ****************************************************************************/
  10538. //------------------------------------------------------------------------------
  10539. XMFINLINE _XMDEC4::_XMDEC4
  10540. (
  10541. FLOAT _x,
  10542. FLOAT _y,
  10543. FLOAT _z,
  10544. FLOAT _w
  10545. )
  10546. {
  10547. XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
  10548. }
  10549. //------------------------------------------------------------------------------
  10550. XMFINLINE _XMDEC4::_XMDEC4
  10551. (
  10552. CONST FLOAT* pArray
  10553. )
  10554. {
  10555. XMStoreDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10556. }
  10557. //------------------------------------------------------------------------------
  10558. XMFINLINE _XMDEC4& _XMDEC4::operator=
  10559. (
  10560. CONST _XMDEC4& Dec4
  10561. )
  10562. {
  10563. v = Dec4.v;
  10564. return *this;
  10565. }
  10566. //------------------------------------------------------------------------------
  10567. XMFINLINE _XMDEC4& _XMDEC4::operator=
  10568. (
  10569. CONST UINT Packed
  10570. )
  10571. {
  10572. v = Packed;
  10573. return *this;
  10574. }
  10575. /****************************************************************************
  10576. *
  10577. * XMUDECN4 operators
  10578. *
  10579. ****************************************************************************/
  10580. //------------------------------------------------------------------------------
  10581. XMFINLINE _XMUDECN4::_XMUDECN4
  10582. (
  10583. FLOAT _x,
  10584. FLOAT _y,
  10585. FLOAT _z,
  10586. FLOAT _w
  10587. )
  10588. {
  10589. XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
  10590. }
  10591. //------------------------------------------------------------------------------
  10592. XMFINLINE _XMUDECN4::_XMUDECN4
  10593. (
  10594. CONST FLOAT* pArray
  10595. )
  10596. {
  10597. XMStoreUDecN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10598. }
  10599. //------------------------------------------------------------------------------
  10600. XMFINLINE _XMUDECN4& _XMUDECN4::operator=
  10601. (
  10602. CONST _XMUDECN4& UDecN4
  10603. )
  10604. {
  10605. v = UDecN4.v;
  10606. return *this;
  10607. }
  10608. //------------------------------------------------------------------------------
  10609. XMFINLINE _XMUDECN4& _XMUDECN4::operator=
  10610. (
  10611. CONST UINT Packed
  10612. )
  10613. {
  10614. v = Packed;
  10615. return *this;
  10616. }
  10617. /****************************************************************************
  10618. *
  10619. * XMUDEC4 operators
  10620. *
  10621. ****************************************************************************/
  10622. //------------------------------------------------------------------------------
  10623. XMFINLINE _XMUDEC4::_XMUDEC4
  10624. (
  10625. FLOAT _x,
  10626. FLOAT _y,
  10627. FLOAT _z,
  10628. FLOAT _w
  10629. )
  10630. {
  10631. XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
  10632. }
  10633. //------------------------------------------------------------------------------
  10634. XMFINLINE _XMUDEC4::_XMUDEC4
  10635. (
  10636. CONST FLOAT* pArray
  10637. )
  10638. {
  10639. XMStoreUDec4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10640. }
  10641. //------------------------------------------------------------------------------
  10642. XMFINLINE _XMUDEC4& _XMUDEC4::operator=
  10643. (
  10644. CONST _XMUDEC4& UDec4
  10645. )
  10646. {
  10647. v = UDec4.v;
  10648. return *this;
  10649. }
  10650. //------------------------------------------------------------------------------
  10651. XMFINLINE _XMUDEC4& _XMUDEC4::operator=
  10652. (
  10653. CONST UINT Packed
  10654. )
  10655. {
  10656. v = Packed;
  10657. return *this;
  10658. }
  10659. /****************************************************************************
  10660. *
  10661. * XMXICON4 operators
  10662. *
  10663. ****************************************************************************/
  10664. //------------------------------------------------------------------------------
  10665. XMFINLINE _XMXICON4::_XMXICON4
  10666. (
  10667. FLOAT _x,
  10668. FLOAT _y,
  10669. FLOAT _z,
  10670. FLOAT _w
  10671. )
  10672. {
  10673. XMStoreXIcoN4(this, XMVectorSet(_x, _y, _z, _w));
  10674. }
  10675. //------------------------------------------------------------------------------
  10676. XMFINLINE _XMXICON4::_XMXICON4
  10677. (
  10678. CONST FLOAT* pArray
  10679. )
  10680. {
  10681. XMStoreXIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10682. }
  10683. //------------------------------------------------------------------------------
  10684. XMFINLINE _XMXICON4& _XMXICON4::operator=
  10685. (
  10686. CONST _XMXICON4& XIcoN4
  10687. )
  10688. {
  10689. v = XIcoN4.v;
  10690. return *this;
  10691. }
  10692. //------------------------------------------------------------------------------
  10693. XMFINLINE _XMXICON4& _XMXICON4::operator=
  10694. (
  10695. CONST UINT64 Packed
  10696. )
  10697. {
  10698. v = Packed;
  10699. return *this;
  10700. }
  10701. /****************************************************************************
  10702. *
  10703. * XMXICO4 operators
  10704. *
  10705. ****************************************************************************/
  10706. //------------------------------------------------------------------------------
  10707. XMFINLINE _XMXICO4::_XMXICO4
  10708. (
  10709. FLOAT _x,
  10710. FLOAT _y,
  10711. FLOAT _z,
  10712. FLOAT _w
  10713. )
  10714. {
  10715. XMStoreXIco4(this, XMVectorSet(_x, _y, _z, _w));
  10716. }
  10717. //------------------------------------------------------------------------------
  10718. XMFINLINE _XMXICO4::_XMXICO4
  10719. (
  10720. CONST FLOAT* pArray
  10721. )
  10722. {
  10723. XMStoreXIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10724. }
  10725. //------------------------------------------------------------------------------
  10726. XMFINLINE _XMXICO4& _XMXICO4::operator=
  10727. (
  10728. CONST _XMXICO4& XIco4
  10729. )
  10730. {
  10731. v = XIco4.v;
  10732. return *this;
  10733. }
  10734. //------------------------------------------------------------------------------
  10735. XMFINLINE _XMXICO4& _XMXICO4::operator=
  10736. (
  10737. CONST UINT64 Packed
  10738. )
  10739. {
  10740. v = Packed;
  10741. return *this;
  10742. }
  10743. /****************************************************************************
  10744. *
  10745. * XMICON4 operators
  10746. *
  10747. ****************************************************************************/
  10748. //------------------------------------------------------------------------------
  10749. XMFINLINE _XMICON4::_XMICON4
  10750. (
  10751. FLOAT _x,
  10752. FLOAT _y,
  10753. FLOAT _z,
  10754. FLOAT _w
  10755. )
  10756. {
  10757. XMStoreIcoN4(this, XMVectorSet(_x, _y, _z, _w));
  10758. }
  10759. //------------------------------------------------------------------------------
  10760. XMFINLINE _XMICON4::_XMICON4
  10761. (
  10762. CONST FLOAT* pArray
  10763. )
  10764. {
  10765. XMStoreIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10766. }
  10767. //------------------------------------------------------------------------------
  10768. XMFINLINE _XMICON4& _XMICON4::operator=
  10769. (
  10770. CONST _XMICON4& IcoN4
  10771. )
  10772. {
  10773. v = IcoN4.v;
  10774. return *this;
  10775. }
  10776. //------------------------------------------------------------------------------
  10777. XMFINLINE _XMICON4& _XMICON4::operator=
  10778. (
  10779. CONST UINT64 Packed
  10780. )
  10781. {
  10782. v = Packed;
  10783. return *this;
  10784. }
  10785. /****************************************************************************
  10786. *
  10787. * XMICO4 operators
  10788. *
  10789. ****************************************************************************/
  10790. //------------------------------------------------------------------------------
  10791. XMFINLINE _XMICO4::_XMICO4
  10792. (
  10793. FLOAT _x,
  10794. FLOAT _y,
  10795. FLOAT _z,
  10796. FLOAT _w
  10797. )
  10798. {
  10799. XMStoreIco4(this, XMVectorSet(_x, _y, _z, _w));
  10800. }
  10801. //------------------------------------------------------------------------------
  10802. XMFINLINE _XMICO4::_XMICO4
  10803. (
  10804. CONST FLOAT* pArray
  10805. )
  10806. {
  10807. XMStoreIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10808. }
  10809. //------------------------------------------------------------------------------
  10810. XMFINLINE _XMICO4& _XMICO4::operator=
  10811. (
  10812. CONST _XMICO4& Ico4
  10813. )
  10814. {
  10815. v = Ico4.v;
  10816. return *this;
  10817. }
  10818. //------------------------------------------------------------------------------
  10819. XMFINLINE _XMICO4& _XMICO4::operator=
  10820. (
  10821. CONST UINT64 Packed
  10822. )
  10823. {
  10824. v = Packed;
  10825. return *this;
  10826. }
  10827. /****************************************************************************
  10828. *
  10829. * XMUICON4 operators
  10830. *
  10831. ****************************************************************************/
  10832. //------------------------------------------------------------------------------
  10833. XMFINLINE _XMUICON4::_XMUICON4
  10834. (
  10835. FLOAT _x,
  10836. FLOAT _y,
  10837. FLOAT _z,
  10838. FLOAT _w
  10839. )
  10840. {
  10841. XMStoreUIcoN4(this, XMVectorSet(_x, _y, _z, _w));
  10842. }
  10843. //------------------------------------------------------------------------------
  10844. XMFINLINE _XMUICON4::_XMUICON4
  10845. (
  10846. CONST FLOAT* pArray
  10847. )
  10848. {
  10849. XMStoreUIcoN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10850. }
  10851. //------------------------------------------------------------------------------
  10852. XMFINLINE _XMUICON4& _XMUICON4::operator=
  10853. (
  10854. CONST _XMUICON4& UIcoN4
  10855. )
  10856. {
  10857. v = UIcoN4.v;
  10858. return *this;
  10859. }
  10860. //------------------------------------------------------------------------------
  10861. XMFINLINE _XMUICON4& _XMUICON4::operator=
  10862. (
  10863. CONST UINT64 Packed
  10864. )
  10865. {
  10866. v = Packed;
  10867. return *this;
  10868. }
  10869. /****************************************************************************
  10870. *
  10871. * XMUICO4 operators
  10872. *
  10873. ****************************************************************************/
  10874. //------------------------------------------------------------------------------
  10875. XMFINLINE _XMUICO4::_XMUICO4
  10876. (
  10877. FLOAT _x,
  10878. FLOAT _y,
  10879. FLOAT _z,
  10880. FLOAT _w
  10881. )
  10882. {
  10883. XMStoreUIco4(this, XMVectorSet(_x, _y, _z, _w));
  10884. }
  10885. //------------------------------------------------------------------------------
  10886. XMFINLINE _XMUICO4::_XMUICO4
  10887. (
  10888. CONST FLOAT* pArray
  10889. )
  10890. {
  10891. XMStoreUIco4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10892. }
  10893. //------------------------------------------------------------------------------
  10894. XMFINLINE _XMUICO4& _XMUICO4::operator=
  10895. (
  10896. CONST _XMUICO4& UIco4
  10897. )
  10898. {
  10899. v = UIco4.v;
  10900. return *this;
  10901. }
  10902. //------------------------------------------------------------------------------
  10903. XMFINLINE _XMUICO4& _XMUICO4::operator=
  10904. (
  10905. CONST UINT64 Packed
  10906. )
  10907. {
  10908. v = Packed;
  10909. return *this;
  10910. }
  10911. /****************************************************************************
  10912. *
  10913. * XMCOLOR4 operators
  10914. *
  10915. ****************************************************************************/
  10916. //------------------------------------------------------------------------------
  10917. XMFINLINE _XMCOLOR::_XMCOLOR
  10918. (
  10919. FLOAT _r,
  10920. FLOAT _g,
  10921. FLOAT _b,
  10922. FLOAT _a
  10923. )
  10924. {
  10925. XMStoreColor(this, XMVectorSet(_r, _g, _b, _a));
  10926. }
  10927. //------------------------------------------------------------------------------
  10928. XMFINLINE _XMCOLOR::_XMCOLOR
  10929. (
  10930. CONST FLOAT* pArray
  10931. )
  10932. {
  10933. XMStoreColor(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10934. }
  10935. //------------------------------------------------------------------------------
  10936. XMFINLINE _XMCOLOR& _XMCOLOR::operator=
  10937. (
  10938. CONST _XMCOLOR& Color
  10939. )
  10940. {
  10941. c = Color.c;
  10942. return *this;
  10943. }
  10944. //------------------------------------------------------------------------------
  10945. XMFINLINE _XMCOLOR& _XMCOLOR::operator=
  10946. (
  10947. CONST UINT Color
  10948. )
  10949. {
  10950. c = Color;
  10951. return *this;
  10952. }
  10953. /****************************************************************************
  10954. *
  10955. * XMBYTEN4 operators
  10956. *
  10957. ****************************************************************************/
  10958. //------------------------------------------------------------------------------
  10959. XMFINLINE _XMBYTEN4::_XMBYTEN4
  10960. (
  10961. CONST CHAR* pArray
  10962. )
  10963. {
  10964. x = pArray[0];
  10965. y = pArray[1];
  10966. z = pArray[2];
  10967. w = pArray[3];
  10968. }
  10969. //------------------------------------------------------------------------------
  10970. XMFINLINE _XMBYTEN4::_XMBYTEN4
  10971. (
  10972. FLOAT _x,
  10973. FLOAT _y,
  10974. FLOAT _z,
  10975. FLOAT _w
  10976. )
  10977. {
  10978. XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
  10979. }
  10980. //------------------------------------------------------------------------------
  10981. XMFINLINE _XMBYTEN4::_XMBYTEN4
  10982. (
  10983. CONST FLOAT* pArray
  10984. )
  10985. {
  10986. XMStoreByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  10987. }
  10988. //------------------------------------------------------------------------------
  10989. XMFINLINE _XMBYTEN4& _XMBYTEN4::operator=
  10990. (
  10991. CONST _XMBYTEN4& ByteN4
  10992. )
  10993. {
  10994. x = ByteN4.x;
  10995. y = ByteN4.y;
  10996. z = ByteN4.z;
  10997. w = ByteN4.w;
  10998. return *this;
  10999. }
  11000. /****************************************************************************
  11001. *
  11002. * XMBYTE4 operators
  11003. *
  11004. ****************************************************************************/
  11005. //------------------------------------------------------------------------------
  11006. XMFINLINE _XMBYTE4::_XMBYTE4
  11007. (
  11008. CONST CHAR* pArray
  11009. )
  11010. {
  11011. x = pArray[0];
  11012. y = pArray[1];
  11013. z = pArray[2];
  11014. w = pArray[3];
  11015. }
  11016. //------------------------------------------------------------------------------
  11017. XMFINLINE _XMBYTE4::_XMBYTE4
  11018. (
  11019. FLOAT _x,
  11020. FLOAT _y,
  11021. FLOAT _z,
  11022. FLOAT _w
  11023. )
  11024. {
  11025. XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
  11026. }
  11027. //------------------------------------------------------------------------------
  11028. XMFINLINE _XMBYTE4::_XMBYTE4
  11029. (
  11030. CONST FLOAT* pArray
  11031. )
  11032. {
  11033. XMStoreByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11034. }
  11035. //------------------------------------------------------------------------------
  11036. XMFINLINE _XMBYTE4& _XMBYTE4::operator=
  11037. (
  11038. CONST _XMBYTE4& Byte4
  11039. )
  11040. {
  11041. x = Byte4.x;
  11042. y = Byte4.y;
  11043. z = Byte4.z;
  11044. w = Byte4.w;
  11045. return *this;
  11046. }
  11047. /****************************************************************************
  11048. *
  11049. * XMUBYTEN4 operators
  11050. *
  11051. ****************************************************************************/
  11052. //------------------------------------------------------------------------------
  11053. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11054. (
  11055. CONST BYTE* pArray
  11056. )
  11057. {
  11058. x = pArray[0];
  11059. y = pArray[1];
  11060. z = pArray[2];
  11061. w = pArray[3];
  11062. }
  11063. //------------------------------------------------------------------------------
  11064. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11065. (
  11066. FLOAT _x,
  11067. FLOAT _y,
  11068. FLOAT _z,
  11069. FLOAT _w
  11070. )
  11071. {
  11072. XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
  11073. }
  11074. //------------------------------------------------------------------------------
  11075. XMFINLINE _XMUBYTEN4::_XMUBYTEN4
  11076. (
  11077. CONST FLOAT* pArray
  11078. )
  11079. {
  11080. XMStoreUByteN4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11081. }
  11082. //------------------------------------------------------------------------------
  11083. XMFINLINE _XMUBYTEN4& _XMUBYTEN4::operator=
  11084. (
  11085. CONST _XMUBYTEN4& UByteN4
  11086. )
  11087. {
  11088. x = UByteN4.x;
  11089. y = UByteN4.y;
  11090. z = UByteN4.z;
  11091. w = UByteN4.w;
  11092. return *this;
  11093. }
  11094. /****************************************************************************
  11095. *
  11096. * XMUBYTE4 operators
  11097. *
  11098. ****************************************************************************/
  11099. //------------------------------------------------------------------------------
  11100. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11101. (
  11102. CONST BYTE* pArray
  11103. )
  11104. {
  11105. x = pArray[0];
  11106. y = pArray[1];
  11107. z = pArray[2];
  11108. w = pArray[3];
  11109. }
  11110. //------------------------------------------------------------------------------
  11111. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11112. (
  11113. FLOAT _x,
  11114. FLOAT _y,
  11115. FLOAT _z,
  11116. FLOAT _w
  11117. )
  11118. {
  11119. XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
  11120. }
  11121. //------------------------------------------------------------------------------
  11122. XMFINLINE _XMUBYTE4::_XMUBYTE4
  11123. (
  11124. CONST FLOAT* pArray
  11125. )
  11126. {
  11127. XMStoreUByte4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11128. }
  11129. //------------------------------------------------------------------------------
  11130. XMFINLINE _XMUBYTE4& _XMUBYTE4::operator=
  11131. (
  11132. CONST _XMUBYTE4& UByte4
  11133. )
  11134. {
  11135. x = UByte4.x;
  11136. y = UByte4.y;
  11137. z = UByte4.z;
  11138. w = UByte4.w;
  11139. return *this;
  11140. }
  11141. /****************************************************************************
  11142. *
  11143. * XMUNIBBLE4 operators
  11144. *
  11145. ****************************************************************************/
  11146. //------------------------------------------------------------------------------
  11147. XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
  11148. (
  11149. CONST CHAR *pArray
  11150. )
  11151. {
  11152. x = pArray[0];
  11153. y = pArray[1];
  11154. z = pArray[2];
  11155. w = pArray[3];
  11156. }
  11157. //------------------------------------------------------------------------------
  11158. XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
  11159. (
  11160. FLOAT _x,
  11161. FLOAT _y,
  11162. FLOAT _z,
  11163. FLOAT _w
  11164. )
  11165. {
  11166. XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
  11167. }
  11168. //------------------------------------------------------------------------------
  11169. XMFINLINE _XMUNIBBLE4::_XMUNIBBLE4
  11170. (
  11171. CONST FLOAT *pArray
  11172. )
  11173. {
  11174. XMStoreUNibble4(this, XMLoadFloat4((XMFLOAT4*)pArray));
  11175. }
  11176. //------------------------------------------------------------------------------
  11177. XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
  11178. (
  11179. CONST _XMUNIBBLE4& UNibble4
  11180. )
  11181. {
  11182. v = UNibble4.v;
  11183. return *this;
  11184. }
  11185. //------------------------------------------------------------------------------
  11186. XMFINLINE _XMUNIBBLE4& _XMUNIBBLE4::operator=
  11187. (
  11188. CONST USHORT Packed
  11189. )
  11190. {
  11191. v = Packed;
  11192. return *this;
  11193. }
  11194. /****************************************************************************
  11195. *
  11196. * XMU555 operators
  11197. *
  11198. ****************************************************************************/
  11199. //------------------------------------------------------------------------------
  11200. XMFINLINE _XMU555::_XMU555
  11201. (
  11202. CONST CHAR *pArray,
  11203. BOOL _w
  11204. )
  11205. {
  11206. x = pArray[0];
  11207. y = pArray[1];
  11208. z = pArray[2];
  11209. w = _w;
  11210. }
  11211. //------------------------------------------------------------------------------
  11212. XMFINLINE _XMU555::_XMU555
  11213. (
  11214. FLOAT _x,
  11215. FLOAT _y,
  11216. FLOAT _z,
  11217. BOOL _w
  11218. )
  11219. {
  11220. XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
  11221. }
  11222. //------------------------------------------------------------------------------
  11223. XMFINLINE _XMU555::_XMU555
  11224. (
  11225. CONST FLOAT *pArray,
  11226. BOOL _w
  11227. )
  11228. {
  11229. XMVECTOR V = XMLoadFloat3((XMFLOAT3*)pArray);
  11230. XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
  11231. }
  11232. //------------------------------------------------------------------------------
  11233. XMFINLINE _XMU555& _XMU555::operator=
  11234. (
  11235. CONST _XMU555& U555
  11236. )
  11237. {
  11238. v = U555.v;
  11239. return *this;
  11240. }
  11241. //------------------------------------------------------------------------------
  11242. XMFINLINE _XMU555& _XMU555::operator=
  11243. (
  11244. CONST USHORT Packed
  11245. )
  11246. {
  11247. v = Packed;
  11248. return *this;
  11249. }
  11250. #endif // __cplusplus
  11251. #if defined(_XM_NO_INTRINSICS_)
  11252. #undef XMISNAN
  11253. #undef XMISINF
  11254. #endif
  11255. #endif // __XNAMATHVECTOR_INL__